Make post-firmware-update reboot conditional on component

On most hardware platforms, each firmware component that can be updated
has different reboot requirements. In addition to this some platforms
are particularly sensitive to reboots happening at the expected time.
This change attempts to make the reboot behavior dependent on the
component being updated in _execute_firmware_update method, so it works
for multi-component scenarios

Assisted-By: Claude Code Sonnet 4.5
Change-Id: Ie4fe72406e3aedb8af246703f13f41e31866f58c
Signed-off-by: Jacob Anders <janders@redhat.com>
Signed-off-by: Iury Gregory Melo Ferreira <imelofer@redhat.com>
This commit is contained in:
Jacob Anders 2025-11-07 05:06:12 +10:00
parent 8200f0f5df
commit 8e19a8eae6
5 changed files with 1338 additions and 161 deletions

View file

@ -123,6 +123,28 @@ opts = [
'BMC firmware updates may need extended time to handle '
'BMC transitional states during the firmware update '
'process.')),
cfg.IntOpt('firmware_update_reboot_delay',
min=0,
default=300,
help=_('Default wait time (in seconds) for component-specific '
'firmware update operations. Used for: BIOS firmware '
'update wait before reboot, BMC firmware version check '
'timeout, and NIC firmware task completion timeout.')),
cfg.IntOpt('firmware_update_bmc_version_check_interval',
min=0,
default=30,
help=_('Interval (in seconds) for checking BMC firmware '
'version after BMC firmware update. Used to verify '
'if BMC firmware has been successfully applied.')),
cfg.IntOpt('firmware_update_nic_starting_wait',
min=0,
default=30,
help=_('Time (in seconds) to wait for a NIC firmware update '
'task to progress beyond the STARTING state before '
'triggering a reboot. Some NICs need a reboot to '
'start applying firmware, while others can begin '
'immediately. This timeout helps determine which '
'behavior the hardware exhibits.')),
cfg.StrOpt('firmware_source',
choices=[('http', _('If firmware source URL is also HTTP, then '
'serve from original location, otherwise '

View file

@ -18,6 +18,7 @@ from oslo_log import log
from oslo_utils import timeutils
import sushy
from ironic.common import async_steps
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import metrics_utils
@ -220,35 +221,298 @@ class RedfishFirmware(base.FirmwareInterface):
{'node_uuid': node.uuid, 'settings': settings})
self._execute_firmware_update(node, update_service, settings)
fw_upd = settings[0]
# Store updated settings and save
node.set_driver_internal_info('redfish_fw_updates', settings)
node.save()
# Return wait state to keep the step active and let polling handle
# the monitoring and eventual completion/reboot
return async_steps.get_return_state(node)
def _setup_bmc_update_monitoring(self, node, fw_upd):
"""Set up monitoring for BMC firmware update.
BMC updates do not reboot immediately. Instead, we check the BMC
version periodically. If the version changed, we continue without
reboot. If timeout expires without version change, we trigger a reboot.
:param node: the Ironic node object
:param fw_upd: firmware update settings dict
"""
# Record current BMC version before update
try:
system = redfish_utils.get_system(node)
manager = redfish_utils.get_manager(node, system)
current_bmc_version = manager.firmware_version
node.set_driver_internal_info(
'bmc_fw_version_before_update', current_bmc_version)
LOG.debug('BMC version before update for node %(node)s: '
'%(version)s',
{'node': node.uuid, 'version': current_bmc_version})
except Exception as e:
LOG.warning('Could not read BMC version before update for '
'node %(node)s: %(error)s',
{'node': node.uuid, 'error': e})
node.set_driver_internal_info(
'bmc_fw_check_start_time',
str(timeutils.utcnow().isoformat()))
LOG.info('BMC firmware update for node %(node)s. '
'Monitoring BMC version instead of immediate reboot.',
{'node': node.uuid})
# Use wait_interval or default reboot delay
wait_interval = fw_upd.get('wait')
if wait_interval is None:
wait_interval = CONF.redfish.firmware_update_reboot_delay
fw_upd['wait'] = wait_interval
# Set wait_start_time so polling can detect when task monitor
# becomes unresponsive and transition to version checking
fw_upd['wait_start_time'] = str(timeutils.utcnow().isoformat())
# Mark this as a BMC update so we can handle timeouts properly
fw_upd['component_type'] = redfish_utils.BMC
# Check if BMC firmware is being updated - BMC may need extended
# timeout after firmware update as it transitions through states
has_bmc_update = any(
setting.get('component', '') == redfish_utils.BMC
for setting in settings
)
# Use extended timeout for BMC updates to handle transitional states
if has_bmc_update and wait_interval is None:
reboot_timeout = CONF.redfish.firmware_update_bmc_timeout
LOG.info('BMC firmware update detected, using extended reboot '
'timeout of %(timeout)s seconds for node %(node)s to '
'handle BMC transitional states',
{'timeout': reboot_timeout, 'node': node.uuid})
else:
reboot_timeout = wait_interval
# BMC: Set async flags without immediate reboot
deploy_utils.set_async_step_flags(
node,
reboot=True,
skip_current_step=True,
reboot=False,
polling=True
)
return deploy_utils.reboot_to_finish_step(task, timeout=reboot_timeout,
disable_ramdisk=True)
def _setup_nic_update_monitoring(self, node):
"""Set up monitoring for NIC firmware update.
NIC firmware behavior varies by hardware. Some NICs update immediately,
some need reboot to start. The handler will wait 30s and decide whether
to reboot.
:param node: the Ironic node object
"""
LOG.info('NIC firmware update for node %(node)s. Will monitor '
'task state to determine if reboot is needed.',
{'node': node.uuid})
# NIC: Set async flags with reboot enabled
# (reboot will be triggered conditionally if hardware needs it)
deploy_utils.set_async_step_flags(
node,
reboot=True,
polling=True
)
def _setup_bios_update_monitoring(self, node):
"""Set up monitoring for BIOS firmware update.
BIOS updates require a reboot to apply, so we trigger it as soon
as the update task begins rather than waiting for completion.
:param node: the Ironic node object
"""
LOG.info('BIOS firmware update for node %(node)s. Will reboot '
'when update task starts.',
{'node': node.uuid})
# BIOS: Set async flags with reboot enabled
deploy_utils.set_async_step_flags(
node,
reboot=True,
polling=True
)
def _setup_default_update_monitoring(self, node, fw_upd):
"""Set up monitoring for unknown/default firmware component types.
Default behavior for unknown component types uses standard reboot
handling with configurable wait interval.
:param node: the Ironic node object
:param fw_upd: firmware update settings dict
"""
component = fw_upd.get('component', '')
LOG.warning(
'Unknown component type %(component)s for node %(node)s. '
'Using default firmware update behavior.',
{'component': component, 'node': node.uuid})
wait_interval = fw_upd.get('wait')
if wait_interval is None:
wait_interval = (
node.driver_info.get('firmware_update_unresponsive_bmc_wait')
or CONF.redfish.firmware_update_wait_unresponsive_bmc)
fw_upd['wait'] = wait_interval
# Default: Set async flags with reboot enabled
deploy_utils.set_async_step_flags(
node,
reboot=True,
polling=True
)
def _get_current_bmc_version(self, node):
"""Get current BMC firmware version.
Note: BMC may be temporarily unresponsive after firmware update.
Expected exceptions (timeouts, connection refused, HTTP errors) are
caught and logged, returning None to indicate version unavailable.
:param node: the Ironic node object
:returns: Current BMC firmware version string, or None if BMC
is unresponsive/inaccessible
"""
try:
system = redfish_utils.get_system(node)
manager = redfish_utils.get_manager(node, system)
return manager.firmware_version
except (exception.RedfishError,
exception.RedfishConnectionError,
sushy.exceptions.SushyError) as e:
# BMC unresponsiveness is expected after firmware update
# (timeouts, connection refused, HTTP 4xx/5xx errors)
LOG.debug('BMC temporarily unresponsive for node %(node)s: '
'%(error)s', {'node': node.uuid, 'error': e})
return None
def _handle_bmc_update_completion(self, task, update_service,
settings, current_update):
"""Handle BMC firmware update completion with version checking.
For BMC updates, we don't reboot immediately. Instead, we check
the BMC version periodically. If the version changed, we continue
without reboot. If timeout expires without version change, we trigger
a reboot.
:param task: a TaskManager instance
:param update_service: the sushy firmware update service
:param settings: firmware update settings
:param current_update: the current firmware update being processed
"""
node = task.node
# Try to get current BMC version
# Note: BMC may be unresponsive after firmware update - expected
current_version = self._get_current_bmc_version(node)
version_before = node.driver_internal_info.get(
'bmc_fw_version_before_update')
# If we can read the version and it changed, update is complete
if (current_version is not None
and version_before is not None
and current_version != version_before):
LOG.info(
'BMC firmware version for node %(node)s changed from '
'%(old)s to %(new)s. Update complete. Continuing without '
'reboot.',
{'node': node.uuid, 'old': version_before,
'new': current_version})
node.del_driver_internal_info('bmc_fw_check_start_time')
node.del_driver_internal_info('bmc_fw_version_before_update')
node.save()
self._continue_updates(task, update_service, settings)
return
# Check if we've been checking for too long
check_start_time = node.driver_internal_info.get(
'bmc_fw_check_start_time')
if check_start_time:
check_start = timeutils.parse_isotime(check_start_time)
elapsed_time = timeutils.utcnow(True) - check_start
timeout = current_update.get(
'wait', CONF.redfish.firmware_update_reboot_delay)
if elapsed_time.seconds >= timeout:
# Timeout: version didn't change or BMC unresponsive
if (current_version is not None
and version_before is not None
and current_version == version_before):
# Version didn't change - skip reboot
LOG.info(
'BMC firmware version for node %(node)s did not '
'change (still %(version)s). Update appears to be '
'a no-op or does not require reboot. Continuing '
'without reboot.',
{'node': node.uuid, 'version': current_version})
else:
# Version changed or we can't tell - reboot to apply
LOG.warning(
'BMC firmware version check timeout expired for '
'node %(node)s after %(elapsed)s seconds. '
'Will reboot to complete firmware update.',
{'node': node.uuid, 'elapsed': elapsed_time.seconds})
# Mark that reboot is needed
node.set_driver_internal_info(
'firmware_reboot_requested', True)
# Enable reboot flag now that we're ready to reboot
deploy_utils.set_async_step_flags(
node,
reboot=True,
polling=True
)
node.del_driver_internal_info('bmc_fw_check_start_time')
node.del_driver_internal_info('bmc_fw_version_before_update')
node.save()
self._continue_updates(task, update_service, settings)
return
# Continue checking - set wait to check again
wait_interval = (
CONF.redfish.firmware_update_bmc_version_check_interval)
current_update['wait'] = wait_interval
current_update['wait_start_time'] = str(
timeutils.utcnow().isoformat())
current_update['bmc_version_checking'] = True
node.set_driver_internal_info('redfish_fw_updates', settings)
node.save()
LOG.debug('BMC firmware version check continuing for node %(node)s. '
'Will check again in %(interval)s seconds.',
{'node': node.uuid, 'interval': wait_interval})
def _handle_nic_update_completion(self, task, update_service, settings,
current_update):
"""Handle NIC firmware update completion.
For NIC updates, check if a reboot is needed based on whether the
task went through the Running state (needs reboot after completion)
or if reboot already occurred during the Starting phase.
:param task: a TaskManager instance
:param update_service: the sushy firmware update service
:param settings: firmware update settings
:param current_update: the current firmware update being processed
"""
node = task.node
# Check if reboot is needed (task went to Running state)
needs_reboot = current_update.get(
'nic_needs_post_completion_reboot', False)
if needs_reboot:
LOG.info(
'NIC firmware update task completed for node '
'%(node)s. Reboot required to apply update.',
{'node': node.uuid})
# Mark that reboot is needed
node.set_driver_internal_info(
'firmware_reboot_requested', True)
# Clean up flags
current_update.pop('nic_needs_post_completion_reboot', None)
current_update.pop('nic_starting_timestamp', None)
current_update.pop('nic_reboot_triggered', None)
else:
LOG.info(
'NIC firmware update task completed for node '
'%(node)s. Reboot already occurred during update '
'start.', {'node': node.uuid})
# Clean up all NIC-related flags
current_update.pop('nic_starting_timestamp', None)
current_update.pop('nic_reboot_triggered', None)
self._continue_updates(task, update_service, settings)
def _execute_firmware_update(self, node, update_service, settings):
"""Executes the next firmware update to the node
@ -261,6 +525,8 @@ class RedfishFirmware(base.FirmwareInterface):
to be executed.
"""
fw_upd = settings[0]
# Store power timeout to use on reboot operations
fw_upd['power_timeout'] = CONF.redfish.firmware_update_reboot_delay
# NOTE(janders) try to get the collection of Systems on the BMC
# to determine if there may be more than one System
try:
@ -299,27 +565,9 @@ class RedfishFirmware(base.FirmwareInterface):
{'node': node.uuid, 'error': e.message})
raise exception.RedfishError(error=e)
# NOTE(iurygregory): In case we are doing firmware updates we need to
# account for unresponsive BMC, in this case we wait for a set of
# minutes before proceeding to the power actions.
# In case the node has firmware_update_unresponsive_bmc_wait set we
# give priority over the configuration option.
wait_unres_bmc = (
node.driver_info.get('firmware_update_unresponsive_bmc_wait')
or CONF.redfish.firmware_update_wait_unresponsive_bmc
)
LOG.debug('BMC firmware update in progress. Waiting %(wait_time)s '
'seconds before proceeding to reboot the node %(node_uuid)s '
'to complete the step', {'node_uuid': node.uuid,
'wait_time': wait_unres_bmc})
# Store task monitor URI for periodic task polling
# TODO(iurygregory): Improve the logic here to identify if the BMC
# is back, so we don't have to unconditionally wait.
# The wait_unres_bmc will be the maximum time to wait.
time.sleep(wait_unres_bmc)
LOG.debug('Wait completed. Proceeding to reboot the node '
'%(node_uuid)s to complete the step.',
{'node_uuid': node.uuid})
# NOTE(janders): Component-specific wait/reboot behavior is now
# handled by the update() method and periodic polling, not here
fw_upd['task_monitor'] = task_monitor.task_monitor_uri
node.set_driver_internal_info('redfish_fw_updates', settings)
@ -332,6 +580,19 @@ class RedfishFirmware(base.FirmwareInterface):
fw_clean.append(cleanup)
node.set_driver_internal_info('firmware_cleanup', fw_clean)
component = fw_upd.get('component', '')
component_type = redfish_utils.get_component_type(component)
if component_type == redfish_utils.BMC:
self._setup_bmc_update_monitoring(node, fw_upd)
elif component_type == redfish_utils.NIC:
self._setup_nic_update_monitoring(node)
elif component_type == redfish_utils.BIOS:
self._setup_bios_update_monitoring(node)
else:
self._setup_default_update_monitoring(node, fw_upd)
def _validate_resources_stability(self, node):
"""Validate that BMC resources are consistently available.
@ -458,10 +719,27 @@ class RedfishFirmware(base.FirmwareInterface):
return
if len(settings) == 1:
# Last firmware update - check if reboot is needed
reboot_requested = node.driver_internal_info.get(
'firmware_reboot_requested', False)
self._clear_updates(node)
LOG.info('Firmware updates completed for node %(node)s',
{'node': node.uuid})
# If reboot was requested (e.g., for BMC timeout or NIC
# completion), trigger the reboot before notifying conductor
if reboot_requested:
LOG.info('Rebooting node %(node)s to apply firmware updates',
{'node': node.uuid})
manager_utils.node_power_action(task, states.REBOOT)
LOG.debug('Validating BMC responsiveness before resuming '
'conductor operations for node %(node)s',
{'node': node.uuid})
self._validate_resources_stability(node)
if task.node.clean_step:
manager_utils.notify_conductor_resume_clean(task)
elif task.node.service_step:
@ -470,12 +748,40 @@ class RedfishFirmware(base.FirmwareInterface):
manager_utils.notify_conductor_resume_deploy(task)
else:
# Validate BMC resources are stable before continuing next update
LOG.info('Validating BMC responsiveness before continuing '
'to next firmware update for node %(node)s',
{'node': node.uuid})
self._validate_resources_stability(node)
settings.pop(0)
self._execute_firmware_update(node,
update_service,
settings)
node.save()
manager_utils.node_power_action(task, states.REBOOT)
# Only reboot if the component code requested it.
if task.node.clean_step:
reboot_field = async_steps.CLEANING_REBOOT
elif task.node.deploy_step:
reboot_field = async_steps.DEPLOYMENT_REBOOT
elif task.node.service_step:
reboot_field = async_steps.SERVICING_REBOOT
else:
reboot_field = None
# Default to reboot=True for backwards compatibility.
should_reboot = (node.driver_internal_info.get(reboot_field, True)
if reboot_field else True)
if should_reboot:
power_timeout = settings[0].get('power_timeout', 0)
manager_utils.node_power_action(task, states.REBOOT,
power_timeout)
else:
LOG.debug('Component requested no immediate reboot for node '
'%(node)s. Continuing with async polling.',
{'node': node.uuid})
def _clear_updates(self, node):
"""Clears firmware updates artifacts
@ -490,6 +796,7 @@ class RedfishFirmware(base.FirmwareInterface):
firmware_utils.cleanup(node)
node.del_driver_internal_info('redfish_fw_updates')
node.del_driver_internal_info('firmware_cleanup')
node.del_driver_internal_info('firmware_reboot_requested')
node.save()
@METRICS.timer('RedfishFirmware._query_update_failed')
@ -528,6 +835,329 @@ class RedfishFirmware(base.FirmwareInterface):
"""Periodic job to check firmware update tasks."""
self._check_node_redfish_firmware_update(task)
def _handle_task_completion(self, task, sushy_task, messages,
update_service, settings, current_update):
"""Handle firmware update task completion.
:param task: a TaskManager instance
:param sushy_task: the sushy task object
:param messages: list of task messages
:param update_service: the sushy firmware update service
:param settings: firmware update settings
:param current_update: the current firmware update being processed
"""
node = task.node
if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED
and sushy_task.task_status in
[sushy.HEALTH_OK, sushy.HEALTH_WARNING]):
LOG.info('Firmware update task completed for node %(node)s, '
'firmware %(firmware_image)s: %(messages)s.',
{'node': node.uuid,
'firmware_image': current_update['url'],
'messages': ", ".join(messages)})
# Component-specific post-update handling
component = current_update.get('component', '')
component_type = redfish_utils.get_component_type(component)
if component_type == redfish_utils.BMC:
# BMC: Start version checking instead of immediate reboot
self._handle_bmc_update_completion(
task, update_service, settings, current_update)
elif component_type == redfish_utils.NIC:
# NIC: Handle completion with appropriate reboot behavior
self._handle_nic_update_completion(
task, update_service, settings, current_update)
elif component_type == redfish_utils.BIOS:
# BIOS: Reboot was already triggered when task started,
# just continue with next update
LOG.info('BIOS firmware update task completed for node '
'%(node)s. System was already rebooted. '
'Proceeding with continuation.',
{'node': node.uuid})
# Clean up the reboot trigger flag
current_update.pop('bios_reboot_triggered', None)
self._continue_updates(task, update_service, settings)
else:
# Default: continue as before
self._continue_updates(task, update_service, settings)
else:
error_msg = (_('Firmware update failed for node %(node)s, '
'firmware %(firmware_image)s. '
'Error: %(errors)s') %
{'node': node.uuid,
'firmware_image': current_update['url'],
'errors': ", ".join(messages)})
self._clear_updates(node)
if task.node.clean_step:
manager_utils.cleaning_error_handler(task, error_msg)
elif task.node.deploy_step:
manager_utils.deploying_error_handler(task, error_msg)
elif task.node.service_step:
manager_utils.servicing_error_handler(task, error_msg)
def _handle_nic_task_starting(self, task, task_monitor, settings,
current_update):
"""Handle NIC firmware update task when it starts.
NIC firmware behavior varies by hardware:
- Some NICs need reboot to START applying (task stays at Starting)
- Some NICs can start immediately but need reboot to APPLY (goes to
Running, then needs reboot after completion)
This method waits for the configured time
(CONF.redfish.firmware_update_nic_starting_wait) to determine which
type:
- If still Starting after wait time trigger reboot to start
- If moves to Running let it finish, reboot will happen after
completion
:param task: a TaskManager instance
:param task_monitor: the sushy task monitor
:param settings: firmware update settings
:param current_update: the current firmware update being processed
:returns: True if should stop polling, False to continue
"""
node = task.node
# Upgrade lock at the start since we may modify driver_internal_info
task.upgrade_lock()
try:
sushy_task = task_monitor.get_task()
task_state = sushy_task.task_state
LOG.debug('NIC update task state for node %(node)s: %(state)s',
{'node': node.uuid, 'state': task_state})
# If task is Running, mark that reboot will be needed after
# completion and let it continue
if task_state == sushy.TASK_STATE_RUNNING:
LOG.debug('NIC update task for node %(node)s is running. '
'Will wait for completion then reboot.',
{'node': node.uuid})
# Clear flags since we're past the starting phase
current_update.pop('nic_starting_timestamp', None)
current_update.pop('nic_reboot_triggered', None)
# Mark that reboot will be needed after completion
current_update['nic_needs_post_completion_reboot'] = True
node.set_driver_internal_info('redfish_fw_updates', settings)
node.save()
return False # Continue polling until completion
# If task is in STARTING, check if we need to wait or reboot
if task_state == sushy.TASK_STATE_STARTING:
# Check if we already triggered a reboot
if current_update.get('nic_reboot_triggered'):
LOG.debug('NIC firmware update for node %(node)s: '
'reboot already triggered, waiting for task '
'to progress.', {'node': node.uuid})
return False # Continue polling
starting_time = current_update.get('nic_starting_timestamp')
if not starting_time:
# First time seeing STARTING - record timestamp
current_update['nic_starting_timestamp'] = str(
timeutils.utcnow().isoformat())
node.set_driver_internal_info(
'redfish_fw_updates', settings)
node.save()
LOG.debug('NIC firmware update task for node %(node)s '
'is in STARTING state. Waiting to determine if '
'reboot is needed to start update.',
{'node': node.uuid})
return False # Keep polling
# Check if configured wait time has elapsed
start_time = timeutils.parse_isotime(starting_time)
elapsed = timeutils.utcnow(True) - start_time
nic_starting_wait = (
CONF.redfish.firmware_update_nic_starting_wait)
if elapsed.seconds < nic_starting_wait:
# Still within wait window, keep waiting
LOG.debug('NIC update for node %(node)s still in '
'STARTING after %(elapsed)s seconds. '
'Waiting...',
{'node': node.uuid,
'elapsed': elapsed.seconds})
return False # Keep polling
# Wait time elapsed and still STARTING - need reboot to start
LOG.info('NIC firmware update task for node %(node)s '
'remained in STARTING state for %(wait)s+ seconds. '
'Hardware requires reboot to start update. '
'Triggering reboot.',
{'node': node.uuid, 'wait': nic_starting_wait})
# Mark that we triggered a reboot to prevent repeat reboots
current_update['nic_reboot_triggered'] = True
# Clean up timestamp
current_update.pop('nic_starting_timestamp', None)
node.set_driver_internal_info('redfish_fw_updates', settings)
node.save()
# Trigger the reboot to start update
power_timeout = current_update.get('power_timeout', 0)
manager_utils.node_power_action(task, states.REBOOT,
power_timeout)
LOG.info('Reboot initiated for node %(node)s to start '
'NIC firmware update', {'node': node.uuid})
return True # Stop polling, reboot triggered
except Exception as e:
LOG.warning('Unable to check NIC task state for node '
'%(node)s: %(error)s. Will retry.',
{'node': node.uuid, 'error': e})
return False # Continue polling on error
def _handle_bios_task_starting(self, task, task_monitor, settings,
current_update):
"""Handle BIOS firmware update task when it starts.
BIOS updates require a reboot to apply the firmware, so we trigger
the reboot as soon as the update task reaches STARTING state rather
than waiting for task completion.
:param task: a TaskManager instance
:param task_monitor: the sushy task monitor
:param settings: firmware update settings
:param current_update: the current firmware update being processed
:returns: True if reboot was triggered, False otherwise
"""
node = task.node
if current_update.get('bios_reboot_triggered'):
# Already triggered, just keep polling
return False
# Upgrade lock at the start since we may modify driver_internal_info
task.upgrade_lock()
try:
sushy_task = task_monitor.get_task()
LOG.debug('BIOS update task state for node %(node)s: '
'%(state)s',
{'node': node.uuid,
'state': sushy_task.task_state})
# Check if task has started (STARTING state or beyond)
# TaskState can be: New, Starting, Running, Suspended,
# Interrupted, Pending, Stopping, Completed, Killed,
# Exception, Service, Cancelling, Cancelled
if sushy_task.task_state in [sushy.TASK_STATE_STARTING,
sushy.TASK_STATE_RUNNING,
sushy.TASK_STATE_PENDING]:
LOG.info('BIOS firmware update task has started for '
'node %(node)s (state: %(state)s). '
'Triggering reboot to apply update.',
{'node': node.uuid,
'state': sushy_task.task_state})
# Mark reboot as triggered to avoid repeated reboots
current_update['bios_reboot_triggered'] = True
node.set_driver_internal_info(
'redfish_fw_updates', settings)
node.save()
# Trigger the reboot
power_timeout = current_update.get('power_timeout', 0)
manager_utils.node_power_action(task, states.REBOOT,
power_timeout)
LOG.info('Reboot initiated for node %(node)s to apply '
'BIOS firmware update',
{'node': node.uuid})
return True
except Exception as e:
LOG.warning('Unable to check BIOS task state for node '
'%(node)s: %(error)s. Will retry.',
{'node': node.uuid, 'error': e})
return False
def _handle_wait_completion(self, task, update_service, settings,
current_update):
"""Handle firmware update wait completion.
:param task: a TaskManager instance
:param update_service: the sushy firmware update service
:param settings: firmware update settings
:param current_update: the current firmware update being processed
"""
node = task.node
# Upgrade lock at the start since we may modify driver_internal_info
task.upgrade_lock()
# Check if this is BMC version checking
if current_update.get('bmc_version_checking'):
current_update.pop('bmc_version_checking', None)
node.set_driver_internal_info(
'redfish_fw_updates', settings)
node.save()
# Continue BMC version checking
self._handle_bmc_update_completion(
task, update_service, settings, current_update)
elif current_update.get('component_type') == redfish_utils.BMC:
# BMC update wait expired - check if task is still running
# before transitioning to version checking
task_still_running = False
try:
task_monitor = redfish_utils.get_task_monitor(
node, current_update['task_monitor'])
if task_monitor.is_processing:
task_still_running = True
LOG.debug('BMC firmware update wait expired but task '
' still processing for node %(node)s. '
'Continuing to monitor task completion.',
{'node': node.uuid})
except exception.RedfishConnectionError as e:
LOG.debug('Unable to communicate with task monitor for node '
'%(node)s during wait completion: %(error)s. '
'BMC may be resetting, will transition to version '
'checking.', {'node': node.uuid, 'error': e})
except exception.RedfishError as e:
LOG.debug('Task monitor unavailable for node %(node)s: '
'%(error)s. Task may have completed, transitioning '
'to version checking.',
{'node': node.uuid, 'error': e})
if task_still_running:
# Task is still running, continue to monitor task completion
# Don't transition to version checking yet.
node.set_driver_internal_info('redfish_fw_updates', settings)
node.save()
return
# Task completed, deleted or BMC unavailable
# Transition to version checking
LOG.info('BMC firmware update wait expired for node %(node)s. '
'Task completed or unavailable. Transitioning to version '
'checking mode.',
{'node': node.uuid})
self._handle_bmc_update_completion(
task, update_service, settings, current_update)
else:
# Regular wait completion - mark reboot needed if this is the
# last update. Note: BIOS components reboot immediately when
# task starts, so they won't use this path.
if len(settings) == 1:
component = current_update.get('component', '')
component_type = redfish_utils.get_component_type(component)
# For default/unknown components, reboot may be needed
if component_type is None:
node.set_driver_internal_info(
'firmware_reboot_requested', True)
node.save()
# Continue with updates
self._continue_updates(task, update_service, settings)
@METRICS.timer('RedfishFirmware._check_node_redfish_firmware_update')
def _check_node_redfish_firmware_update(self, task):
"""Check the progress of running firmware update on a node."""
@ -563,7 +1193,9 @@ class RedfishFirmware(base.FirmwareInterface):
current_update.pop('wait', None)
current_update.pop('wait_start_time', None)
self._continue_updates(task, update_service, settings)
# Handle wait completion
self._handle_wait_completion(
task, update_service, settings, current_update)
else:
LOG.debug('Continuing to wait after firmware update '
'%(firmware_image)s on node %(node)s. '
@ -595,6 +1227,27 @@ class RedfishFirmware(base.FirmwareInterface):
self._continue_updates(task, update_service, settings)
return
# Special handling for BIOS and NIC updates
component = current_update.get('component', '')
component_type = redfish_utils.get_component_type(component)
if task_monitor.is_processing and component_type == redfish_utils.BIOS:
# For BIOS, check if task has reached STARTING state
# and trigger reboot immediately
if self._handle_bios_task_starting(task, task_monitor, settings,
current_update):
return # Reboot triggered, done
# Task is still processing, keep polling
return
if task_monitor.is_processing and component_type == redfish_utils.NIC:
# For NIC, wait 30s to see if hardware needs reboot
if self._handle_nic_task_starting(task, task_monitor, settings,
current_update):
return # Reboot triggered, done
# Task is still processing (or waiting), keep polling
return
if not task_monitor.is_processing:
# The last response does not necessarily contain a Task,
# so get it
@ -607,38 +1260,17 @@ class RedfishFirmware(base.FirmwareInterface):
sushy_task.parse_messages()
if sushy_task.messages is not None:
messages = [m.message for m in sushy_task.messages]
for m in sushy_task.messages:
msg = m.message
if not msg or msg.lower() in ['unknown', 'unknown error']:
msg = m.message_id
if msg:
messages.append(msg)
task.upgrade_lock()
if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED
and sushy_task.task_status in
[sushy.HEALTH_OK, sushy.HEALTH_WARNING]):
LOG.info('Firmware update task completed for node %(node)s, '
'firmware %(firmware_image)s: %(messages)s. '
'Starting BMC response validation.',
{'node': node.uuid,
'firmware_image': current_update['url'],
'messages': ", ".join(messages)})
# Validate BMC resources are consistently available
self._validate_resources_stability(node)
self._continue_updates(task, update_service, settings)
else:
error_msg = (_('Firmware update failed for node %(node)s, '
'firmware %(firmware_image)s. '
'Error: %(errors)s') %
{'node': node.uuid,
'firmware_image': current_update['url'],
'errors': ", ".join(messages)})
self._clear_updates(node)
if task.node.clean_step:
manager_utils.cleaning_error_handler(task, error_msg)
elif task.node.deploy_step:
manager_utils.deploying_error_handler(task, error_msg)
elif task.node.service_step:
manager_utils.servicing_error_handler(task, error_msg)
self._handle_task_completion(task, sushy_task, messages,
update_service, settings,
current_update)
else:
LOG.debug('Firmware update in progress for node %(node)s, '
'firmware %(firmware_image)s.',

View file

@ -94,11 +94,35 @@ BMC = 'bmc'
NIC_COMPONENT_PREFIX = "nic:"
"Prefix for NIC Firmware Components"
NIC = "nic"
"NIC Firmware Component type"
FIRMWARE_COMPONENTS = [BIOS, BMC]
"""Firmware Components available to update"""
def get_component_type(component):
"""Determine the type of firmware component.
Note: This helper exists primarily to handle NIC components which use
a prefix pattern (e.g., 'nic:BCM57414', 'nic:adapter1') rather than
exact string matches. This centralizes the component type detection
logic that is used in multiple places (update initiation, task
monitoring, completion handling) and provides a single source of truth
for component type classification.
:param component: The component name from settings
:returns: One of 'bios', 'bmc', 'nic', or None
"""
if component == BIOS:
return BIOS
elif component == BMC:
return BMC
elif component.startswith(NIC_COMPONENT_PREFIX):
return NIC
return None
def parse_driver_info(node):
"""Parse the information required for Ironic to connect to Redfish.

View file

@ -50,6 +50,41 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
self.node = obj_utils.create_test_node(
self.context, driver='redfish', driver_info=INFO_DICT)
def _mock_exc_fwup_side_effect(self, firmware_interface, node,
update_service, settings_list):
"""Helper to simulate _execute_firmware_update behavior.
The real _execute_firmware_update:
1. Adds a task_monitor field to the settings
2. Calls component-specific setup methods
This helper replicates that behavior for tests that mock
this method to avoid JSON serialization issues.
:param firmware_interface: The firmware interface instance (unused,
but passed by mock framework)
:param node: The node being updated
:param update_service: The update service
:param settings_list: The settings list
"""
settings_list[0]['task_monitor'] = '/redfish/v1/TaskService/Tasks/1'
# Simulate component-specific setup that now happens inside
# _execute_firmware_update
fw_upd = settings_list[0]
component = fw_upd.get('component', '')
# Call the actual setup method based on component type
# This ensures the driver_internal_info is set correctly
if component == redfish_utils.BMC:
firmware_interface._setup_bmc_update_monitoring(node, fw_upd)
elif component.startswith(redfish_utils.NIC_COMPONENT_PREFIX):
firmware_interface._setup_nic_update_monitoring(node)
elif component == redfish_utils.BIOS:
firmware_interface._setup_bios_update_monitoring(node)
else:
firmware_interface._setup_default_update_monitoring(node, fw_upd)
def test_get_properties(self):
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
@ -837,12 +872,13 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
interface._continue_updates.assert_not_called()
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
'_handle_bmc_update_completion', autospec=True)
@mock.patch.object(redfish_fw, 'LOG', autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
@mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
def test__check_node_firmware_update_done(self, tm_mock, get_us_mock,
log_mock, validate_mock):
log_mock,
bmc_completion_mock):
task_mock = mock.Mock()
task_mock.task_state = sushy.TASK_STATE_COMPLETED
task_mock.task_status = sushy.HEALTH_OK
@ -859,19 +895,23 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
task.upgrade_lock.assert_called_once_with()
info_calls = [
mock.call('Firmware update task completed for node %(node)s, '
'firmware %(firmware_image)s: %(messages)s. '
'Starting BMC response validation.',
'firmware %(firmware_image)s: %(messages)s.',
{'node': self.node.uuid,
'firmware_image': 'https://bmc/v1.0.1',
'messages': 'Firmware update done'})]
log_mock.info.assert_has_calls(info_calls)
validate_mock.assert_called_once()
# NOTE(iurygregory): _validate_resources_stability is now called
# in _continue_updates before power operations, not in
# _handle_task_completion
interface._continue_updates.assert_called_once_with(
task, get_us_mock.return_value,
# BMC updates now go through _handle_bmc_update_completion
bmc_completion_mock.assert_called_once_with(
interface, task, get_us_mock.return_value,
[{'component': 'bmc', 'url': 'https://bmc/v1.0.1',
'task_monitor': '/task/1'}]
'task_monitor': '/task/1'}],
{'component': 'bmc', 'url': 'https://bmc/v1.0.1',
'task_monitor': '/task/1'}
)
@mock.patch.object(firmware_utils, 'download_to_temp', autospec=True)
@ -964,14 +1004,19 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
]
log_mock.debug.assert_has_calls(debug_call)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(redfish_fw, 'LOG', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
autospec=True)
def test_continue_updates_last(self, cond_resume_clean_mock, log_mock):
def test_continue_updates_last(self, cond_resume_clean_mock, log_mock,
validate_mock):
self._generate_new_driver_internal_info(['bmc'])
task = self._test_continue_updates()
cond_resume_clean_mock.assert_called_once_with(task)
# Verify BMC validation was called before resuming conductor
validate_mock.assert_called_once()
info_call = [
mock.call('Firmware updates completed for node %(node)s',
@ -979,15 +1024,19 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
]
log_mock.info.assert_has_calls(info_call)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(redfish_fw, 'LOG', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_service',
autospec=True)
def test_continue_updates_last_service(self, cond_resume_service_mock,
log_mock):
log_mock, validate_mock):
self._generate_new_driver_internal_info_service(['bmc'])
task = self._test_continue_updates()
cond_resume_service_mock.assert_called_once_with(task)
# Verify BMC validation was called before resuming conductor
validate_mock.assert_called_once()
info_call = [
mock.call('Firmware updates completed for node %(node)s',
@ -995,12 +1044,15 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
]
log_mock.info.assert_has_calls(info_call)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(redfish_fw, 'LOG', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(redfish_utils, 'get_system_collection', autospec=True)
def test_continue_updates_more_updates(self, get_system_collection_mock,
node_power_action_mock,
log_mock):
log_mock,
validate_mock):
cfg.CONF.set_override('firmware_update_wait_unresponsive_bmc', 0,
'redfish')
self._generate_new_driver_internal_info(['bmc', 'bios'])
@ -1028,12 +1080,18 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
log_mock.debug.assert_has_calls(debug_calls)
self.assertEqual(
[{'component': 'bios', 'url': 'https://bios/v1.0.1',
'task_monitor': '/task/2'}],
'task_monitor': '/task/2', 'power_timeout': 300}],
task.node.driver_internal_info['redfish_fw_updates'])
update_service_mock.simple_update.assert_called_once_with(
'https://bios/v1.0.1')
task.node.save.assert_called_once_with()
node_power_action_mock.assert_called_once_with(task, states.REBOOT)
# NOTE(iurygregory): node.save() is called twice:
# 1. Inside _execute_firmware_update via setup methods
# 2. In _continue_updates after _execute_firmware_update returns
self.assertEqual(task.node.save.call_count, 2)
# Verify BMC validation was called before continuing to next update
validate_mock.assert_called_once_with(firmware, task.node)
node_power_action_mock.assert_called_once_with(task, states.REBOOT,
300)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(redfish_utils, 'get_system_collection', autospec=True)
@ -1089,12 +1147,15 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
update_service_mock.simple_update.assert_called_once_with(
'https://bios/v1.0.1', targets=[mock.ANY])
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_utils, 'get_manager', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(redfish_utils, 'get_system_collection', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
def test__execute_firmware_update_unresponsive_bmc(self, sleep_mock,
def test__execute_firmware_update_unresponsive_bmc(self,
get_sys_collec_mock,
system_mock):
system_mock,
manager_mock,
set_async_mock):
cfg.CONF.set_override('firmware_update_wait_unresponsive_bmc', 1,
'redfish')
self._generate_new_driver_internal_info(['bmc'])
@ -1106,27 +1167,31 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
system_collection_mock.get_members.return_value = resp_obj['Members']
get_sys_collec_mock.return_value = system_collection_mock
# Mock BMC version reading for setup
mock_manager = mock.Mock()
mock_manager.firmware_version = '1.0.0'
manager_mock.return_value = mock_manager
task_monitor_mock = mock.Mock()
task_monitor_mock.task_monitor_uri = '/task/2'
update_service_mock = mock.Mock()
update_service_mock.simple_update.return_value = task_monitor_mock
firmware = redfish_fw.RedfishFirmware()
settings = [{'component': 'bmc', 'url': 'https://bmc/v1.2.3'}]
firmware._execute_firmware_update(self.node, update_service_mock,
settings)
update_service_mock.simple_update.assert_called_once_with(
'https://bmc/v1.2.3')
sleep_mock.assert_called_once_with(
CONF.redfish.firmware_update_wait_unresponsive_bmc)
# Verify BMC monitoring setup was called (internally by _execute)
set_async_mock.assert_called_once_with(
self.node, reboot=False, polling=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_utils, 'get_manager', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(redfish_utils, 'get_system_collection', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
def test__execute_firmware_update_unresponsive_bmc_node_override(
self, sleep_mock, get_sys_collec_mock, system_mock):
self, get_sys_collec_mock, system_mock, manager_mock,
set_async_mock):
self._generate_new_driver_internal_info(['bmc'])
# Set a specific value for firmware_update_unresponsive_bmc_wait for
# the node
@ -1135,12 +1200,10 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
d_info['firmware_update_unresponsive_bmc_wait'] = 1
self.node.driver_info = d_info
self.node.save()
self.assertNotEqual(
CONF.redfish.firmware_update_wait_unresponsive_bmc,
self.node.driver_info.get('firmware_update_unresponsive_bmc_wait')
)
with open(
'ironic/tests/json_samples/systems_collection_single.json'
) as f:
@ -1149,21 +1212,24 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
system_collection_mock.get_members.return_value = resp_obj['Members']
get_sys_collec_mock.return_value = system_collection_mock
# Mock BMC version reading for setup
mock_manager = mock.Mock()
mock_manager.firmware_version = '1.0.0'
manager_mock.return_value = mock_manager
task_monitor_mock = mock.Mock()
task_monitor_mock.task_monitor_uri = '/task/2'
update_service_mock = mock.Mock()
update_service_mock.simple_update.return_value = task_monitor_mock
firmware = redfish_fw.RedfishFirmware()
settings = [{'component': 'bmc', 'url': 'https://bmc/v1.2.3'}]
firmware._execute_firmware_update(self.node, update_service_mock,
settings)
update_service_mock.simple_update.assert_called_once_with(
'https://bmc/v1.2.3')
sleep_mock.assert_called_once_with(
self.node.driver_info.get('firmware_update_unresponsive_bmc_wait')
)
# Verify BMC monitoring setup was called (internally by _execute)
set_async_mock.assert_called_once_with(
self.node, reboot=False, polling=True)
def test__validate_resources_stability_success(self):
"""Test successful BMC resource validation with consecutive success."""
@ -1380,7 +1446,8 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
shared=True) as task:
with mock.patch.object(redfish_utils, 'get_system',
autospec=True) as system_mock, \
mock.patch.object(time, 'time', autospec=True) as time_mock:
mock.patch.object(time, 'time', autospec=True) as time_mock, \
mock.patch.object(time, 'sleep', autospec=True):
# Mock BadRequestError from sushy with proper arguments
mock_response = mock.Mock()
@ -1396,8 +1463,8 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
firmware._validate_resources_stability,
task.node)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(redfish_utils, 'get_manager', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@ -1405,21 +1472,43 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
def test_update_bmc_uses_configured_timeout(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
"""Test BMC firmware update uses configured timeout."""
mock_get_system,
mock_get_manager):
"""Test BMC firmware update sets up version checking."""
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0'}]
# Mock system
mock_system = mock.Mock()
mock_get_system.return_value = mock_system
# Mock BMC version reading
mock_manager = mock.Mock()
mock_manager.firmware_version = '1.0.0'
mock_get_manager.return_value = mock_manager
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify configured timeout is used for BMC update
mock_reboot_to_finish.assert_called_once_with(
task, timeout=CONF.redfish.firmware_update_bmc_timeout,
disable_ramdisk=True)
# BMC uses version checking, not immediate reboot
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=False,
polling=True
)
# Verify BMC version check tracking is set up
info = task.node.driver_internal_info
self.assertIn('bmc_fw_check_start_time', info)
self.assertIn('bmc_fw_version_before_update', info)
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(redfish_utils, 'get_manager', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@ -1427,106 +1516,506 @@ class RedfishFirmwareTestCase(db_base.DbTestCase):
def test_update_bmc_uses_bmc_constant(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
mock_get_system,
mock_get_manager):
"""Test BMC firmware update detection works with BMC constant."""
settings = [{'component': redfish_utils.BMC,
'url': 'http://bmc/v1.0.0'}]
# Mock system
mock_system = mock.Mock()
mock_get_system.return_value = mock_system
# Mock BMC version reading
mock_manager = mock.Mock()
mock_manager.firmware_version = '1.0.0'
mock_get_manager.return_value = mock_manager
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify configured timeout is used
mock_reboot_to_finish.assert_called_once_with(
task, timeout=CONF.redfish.firmware_update_bmc_timeout,
disable_ramdisk=True)
# BMC uses version checking, not immediate reboot
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=False,
polling=True
)
# Verify BMC version check tracking is set up
info = task.node.driver_internal_info
self.assertIn('bmc_fw_check_start_time', info)
self.assertIn('bmc_fw_version_before_update', info)
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_non_bmc_uses_wait_parameter(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
"""Test non-BMC firmware update uses wait parameter."""
mock_set_async_flags):
"""Test non-BMC firmware update with wait parameter (obsolete)."""
# NOTE: This test is kept for historical reference but the wait
# parameter on BIOS updates is no longer used as BIOS reboots
# immediately when task starts rather than waiting
settings = [{'component': 'bios', 'url': 'http://bios/v1.0.0',
'wait': 120}]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify wait parameter is used for non-BMC update
mock_reboot_to_finish.assert_called_once_with(
task, timeout=120, disable_ramdisk=True)
# Verify reboot=True is set for BIOS
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_non_bmc_no_wait_parameter(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
"""Test non-BMC firmware update without wait parameter uses None."""
mock_set_async_flags):
"""Test non-BMC firmware update without wait parameter."""
settings = [{'component': 'bios', 'url': 'http://bios/v1.0.0'}]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify None timeout is used for non-BMC without wait parameter
mock_reboot_to_finish.assert_called_once_with(
task, timeout=None, disable_ramdisk=True)
# Verify reboot=True is set for BIOS
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_mixed_components_with_bmc(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
"""Test mixed component update with BMC and explicit wait uses wait."""
mock_set_async_flags):
"""Test mixed component update with BIOS and BMC."""
settings = [
{'component': 'bios', 'url': 'http://bios/v1.0.0', 'wait': 120},
{'component': 'bmc', 'url': 'http://bmc/v1.0.0', 'wait': 60}
]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify explicit wait parameter takes precedence over BMC timeout
mock_reboot_to_finish.assert_called_once_with(
task, timeout=120,
disable_ramdisk=True)
# First component is BIOS, so reboot=True
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_bmc_with_explicit_wait(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags,
mock_reboot_to_finish):
"""Test BMC update with explicit wait uses wait, not BMC timeout."""
mock_set_async_flags):
"""Test BMC update with explicit wait."""
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0',
'wait': 90}]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.firmware.update(task, settings)
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify explicit wait parameter takes precedence over BMC timeout
mock_reboot_to_finish.assert_called_once_with(
task, timeout=90, disable_ramdisk=True)
# BMC uses version checking, not immediate reboot
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=False,
polling=True
)
# Verify wait time is stored
info = task.node.driver_internal_info
fw_updates = info['redfish_fw_updates']
self.assertEqual(90, fw_updates[0]['wait'])
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_utils, 'get_manager', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_bmc_no_immediate_reboot(self, mock_get_update_service,
mock_execute_fw_update,
mock_get_system,
mock_get_manager,
mock_set_async_flags):
"""Test BMC firmware update does not set immediate reboot."""
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0'}]
# Mock system
mock_system = mock.Mock()
mock_get_system.return_value = mock_system
# Mock BMC version reading
mock_manager = mock.Mock()
mock_manager.firmware_version = '1.0.0'
mock_get_manager.return_value = mock_manager
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify reboot=False for BMC updates
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=False,
polling=True
)
# Verify we return wait state to keep step active
self.assertEqual(states.SERVICEWAIT, result)
# Verify BMC version check tracking is set up
info = task.node.driver_internal_info
self.assertIn('bmc_fw_check_start_time', info)
self.assertIn('bmc_fw_version_before_update', info)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_nic_no_immediate_reboot(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags):
"""Test NIC firmware update sets reboot flag, waits for task."""
settings = [{'component': 'nic:BCM57414', 'url': 'http://nic/v1.0.0'}]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify reboot=True for NIC updates (reboot is conditional)
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
# Verify we return wait state to keep step active
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_execute_firmware_update',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_update_bios_sets_reboot_flag(self, mock_get_update_service,
mock_execute_fw_update,
mock_set_async_flags):
"""Test BIOS firmware update sets reboot flag."""
settings = [{'component': 'bios', 'url': 'http://bios/v1.0.0'}]
# add task_monitor to the side effect
mock_execute_fw_update.side_effect = self._mock_exc_fwup_side_effect
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node as if in service step
task.node.service_step = {'step': 'update',
'interface': 'firmware'}
result = task.driver.firmware.update(task, settings)
# Verify reboot=True for BIOS updates
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
# Verify we return wait state to keep step active
self.assertEqual(states.SERVICEWAIT, result)
@mock.patch.object(timeutils, 'utcnow', autospec=True)
@mock.patch.object(timeutils, 'parse_isotime', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_continue_updates',
autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_get_current_bmc_version', autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_bmc_version_check_timeout_sets_reboot_flag(
self, mock_get_update_service, mock_get_bmc_version,
mock_set_async_flags, mock_continue_updates,
mock_parse_isotime, mock_utcnow):
"""Test BMC version check timeout sets reboot request flag."""
import datetime
start_time = datetime.datetime(2025, 1, 1, 0, 0, 0,
tzinfo=datetime.timezone.utc)
current_time = start_time + datetime.timedelta(seconds=301)
mock_parse_isotime.return_value = start_time
mock_utcnow.return_value = current_time
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0',
'wait': 300, 'task_monitor': '/tasks/1'}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node with BMC version checking in progress
task.node.set_driver_internal_info(
'redfish_fw_updates', settings)
task.node.set_driver_internal_info(
'bmc_fw_check_start_time', '2025-01-01T00:00:00.000000')
# Mock BMC is unresponsive
mock_get_bmc_version.return_value = None
# Call the BMC update completion handler
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._handle_bmc_update_completion(
task, mock_get_update_service.return_value,
settings, settings[0])
# Verify reboot flag is set
info = task.node.driver_internal_info
self.assertTrue(info.get('firmware_reboot_requested'))
# Verify async flags updated with reboot=True
mock_set_async_flags.assert_called_once_with(
task.node,
reboot=True,
polling=True
)
# Verify _continue_updates was called
mock_continue_updates.assert_called_once()
@mock.patch.object(redfish_fw.RedfishFirmware, '_continue_updates',
autospec=True)
@mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
@mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
def test_nic_completion_sets_reboot_flag(
self, mock_get_task_monitor, mock_get_update_service,
mock_validate_resources, mock_set_async_flags,
mock_continue_updates):
"""Test NIC firmware task completion sets reboot request flag."""
settings = [{'component': 'nic:BCM57414',
'url': 'http://nic/v1.0.0',
'task_monitor': '/tasks/1'}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node with NIC update in progress
# Set nic_needs_post_completion_reboot to simulate hardware
# that started update immediately but needs reboot after completion
settings[0]['nic_needs_post_completion_reboot'] = True
task.node.set_driver_internal_info(
'redfish_fw_updates', settings)
# Mock task completion
mock_task_monitor = mock.Mock()
mock_task_monitor.is_processing = False
mock_task = mock.Mock()
mock_task.task_state = sushy.TASK_STATE_COMPLETED
mock_task.task_status = sushy.HEALTH_OK
mock_task.messages = []
mock_task_monitor.get_task.return_value = mock_task
mock_get_task_monitor.return_value = mock_task_monitor
# Call the check method
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._check_node_redfish_firmware_update(task)
# Verify reboot flag is set
info = task.node.driver_internal_info
self.assertTrue(info.get('firmware_reboot_requested'))
# Verify _continue_updates was called
mock_continue_updates.assert_called_once()
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_clear_updates',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_final_update_with_reboot_flag_triggers_reboot(
self, mock_get_update_service, mock_clear_updates,
mock_power_action, mock_resume_clean, validate_mock):
"""Test final firmware update with reboot flag triggers reboot."""
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0',
'task_monitor': '/tasks/1'}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node as if in cleaning
task.node.clean_step = {'step': 'update', 'interface': 'firmware'}
# Set up final update with reboot requested
task.node.set_driver_internal_info(
'redfish_fw_updates', settings)
task.node.set_driver_internal_info(
'firmware_reboot_requested', True)
# Call _continue_updates with last firmware
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._continue_updates(
task, mock_get_update_service.return_value, settings)
# Verify reboot was triggered
mock_power_action.assert_called_once_with(task, states.REBOOT)
# Verify BMC validation was called before resuming conductor
validate_mock.assert_called_once()
# Verify resume clean was called
mock_resume_clean.assert_called_once_with(task)
@mock.patch.object(redfish_fw.RedfishFirmware,
'_validate_resources_stability', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(redfish_fw.RedfishFirmware, '_clear_updates',
autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
def test_final_update_without_reboot_flag_no_reboot(
self, mock_get_update_service, mock_clear_updates,
mock_power_action, mock_resume_clean, validate_mock):
"""Test final firmware update without reboot flag skips reboot."""
settings = [{'component': 'bmc', 'url': 'http://bmc/v1.0.0',
'task_monitor': '/tasks/1'}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node as if in cleaning
task.node.clean_step = {'step': 'update', 'interface': 'firmware'}
# Set up final update WITHOUT reboot requested
task.node.set_driver_internal_info(
'redfish_fw_updates', settings)
# Don't set firmware_reboot_requested
# Call _continue_updates with last firmware
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._continue_updates(
task, mock_get_update_service.return_value, settings)
# Verify reboot was NOT triggered
mock_power_action.assert_not_called()
# Verify BMC validation was called before resuming conductor
validate_mock.assert_called_once()
# Verify resume clean was still called
mock_resume_clean.assert_called_once_with(task)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
@mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
def test_bios_reboot_on_task_starting(
self, mock_get_task_monitor, mock_get_update_service,
mock_power_action):
"""Test BIOS update triggers reboot when task reaches STARTING."""
settings = [{'component': 'bios', 'url': 'http://bios/v1.0.1',
'task_monitor': '/tasks/1'}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node with BIOS update in progress
task.node.set_driver_internal_info('redfish_fw_updates', settings)
task.node.clean_step = {'step': 'update', 'interface': 'firmware'}
# Mock task monitor to return is_processing=True
mock_task_monitor = mock.Mock()
mock_task_monitor.is_processing = True
mock_get_task_monitor.return_value = mock_task_monitor
# Mock the task state as STARTING
mock_task = mock.Mock()
mock_task.task_state = sushy.TASK_STATE_STARTING
mock_task_monitor.get_task.return_value = mock_task
# Call the check method
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._check_node_redfish_firmware_update(task)
# Verify reboot was triggered
mock_power_action.assert_called_once_with(task, states.REBOOT, 0)
# Verify the flag was set to prevent repeated reboots
updated_settings = task.node.driver_internal_info[
'redfish_fw_updates']
self.assertTrue(updated_settings[0].get('bios_reboot_triggered'))
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(redfish_utils, 'get_update_service', autospec=True)
@mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
def test_bios_no_repeated_reboot_after_flag_set(
self, mock_get_task_monitor, mock_get_update_service,
mock_power_action):
"""Test BIOS update doesn't reboot again after flag is set."""
settings = [{'component': 'bios', 'url': 'http://bios/v1.0.1',
'task_monitor': '/tasks/1',
'bios_reboot_triggered': True}]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
# Set up node with BIOS update in progress and flag already set
task.node.set_driver_internal_info('redfish_fw_updates', settings)
task.node.clean_step = {'step': 'update', 'interface': 'firmware'}
# Mock task monitor to return is_processing=True
mock_task_monitor = mock.Mock()
mock_task_monitor.is_processing = True
mock_get_task_monitor.return_value = mock_task_monitor
# Call the check method
firmware_interface = redfish_fw.RedfishFirmware()
firmware_interface._check_node_redfish_firmware_update(task)
# Verify reboot was NOT triggered again
mock_power_action.assert_not_called()

View file

@ -0,0 +1,10 @@
---
fixes:
- |
Each firmware component might have different reboot requirements depending
on the hardware, this change adds two new configuration options to help
operators configure the reboot behavior for firmware updates.
``[redfish]firmware_update_reboot_delay``: reboot delay for firmware
updates of components.
``[redfish]firmware_update_bmc_version_check_interval``: delay in seconds
after the firmware update fails before the node is rebooted.