Add a conductor liveness check utility script

This script runs a liveness check on the configured conductor hostname
and will fail if the conductor is not online. Its intended purpose is to
be used as a kubernetes pod startup or liveness probe for the conductor
container.

Change-Id: I88288e0d7a1da4ec99f31c20771299cce2499bf0
Signed-off-by: Steve Baker <sbaker@redhat.com>
This commit is contained in:
Steve Baker 2025-12-03 10:17:46 +13:00
parent 7495f77258
commit 194b638c72

57
tools/live_check_conductor Executable file
View file

@ -0,0 +1,57 @@
#!/bin/env python3
#
# Copyright 2025 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# This script runs a liveness check on the configured conductor hostname
# and will fail if the conductor is not online. Its intended purpose is to
# be used as a kubernetes pod startup or liveness probe for the conductor
# container.
from ironic.common import config
from ironic.common import exception
import ironic.conf
CONF = ironic.conf.CONF
config.parse_args([])
from ironic.db.sqlalchemy import api
import sys
def main():
dbapi = api.get_backend()
# Attempt a database query
try:
conductors = dbapi.get_online_conductors()
except Exception as e:
print(f"Error querying database for online conductors: {e}",
file=sys.stderr)
sys.exit(1)
# Check if host is online
if CONF.host in conductors:
print(f"{CONF.host} is online", file=sys.stderr)
sys.exit(0)
# Check why host is offline (old heartbeat vs no heartbeat)
try:
dbapi.get_conductor(CONF.host)
print(f"{CONF.host} stopped sending heartbeats", file=sys.stderr)
except exception.ConductorNotFound as e:
print(f"{CONF.host} is offline", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()