mirror of
https://git.freebsd.org/src.git
synced 2026-01-16 23:02:24 +00:00
zfsd: don't try to fix an OFFLINE condition
If the system administrator does "zpool offline", he's doing it for a reason. zfsd shouldn't consider an offline disk to be an event that requires automatic healing. Don't online it in response to a GEOM event, and don't try to activate a hotspare to take over from it. MFC after: 2 weeks Sponsored by: ConnectWise
This commit is contained in:
parent
9a1b330335
commit
a13ddd6210
6 changed files with 215 additions and 2 deletions
|
|
@ -299,6 +299,15 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
|
|||
PoolGUIDString().c_str(), VdevGUIDString().c_str());
|
||||
return (/*consumed*/false);
|
||||
}
|
||||
if (VdevState() == VDEV_STATE_OFFLINE) {
|
||||
/*
|
||||
* OFFLINE is an administrative decision. No need for zfsd to
|
||||
* do anything.
|
||||
*/
|
||||
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
|
||||
PoolGUIDString().c_str(), VdevGUIDString().c_str());
|
||||
return (/*consumed*/false);
|
||||
}
|
||||
|
||||
if (vdev != NULL
|
||||
&& ( vdev->PoolGUID() == m_poolGUID
|
||||
|
|
@ -401,7 +410,8 @@ CaseFile::ReEvaluate(const ZfsEvent &event)
|
|||
return (/*consumed*/true);
|
||||
} else if (event.Value("type") == "sysevent.fs.zfs.config_sync") {
|
||||
RefreshVdevState();
|
||||
if (VdevState() < VDEV_STATE_HEALTHY)
|
||||
if (VdevState() < VDEV_STATE_HEALTHY &&
|
||||
VdevState() != VDEV_STATE_OFFLINE)
|
||||
consumed = ActivateSpare();
|
||||
}
|
||||
|
||||
|
|
@ -694,6 +704,11 @@ CaseFile::CloseIfSolved()
|
|||
switch (VdevState()) {
|
||||
case VDEV_STATE_HEALTHY:
|
||||
/* No need to keep cases for healthy vdevs */
|
||||
case VDEV_STATE_OFFLINE:
|
||||
/*
|
||||
* Offline is a deliberate administrative action. zfsd
|
||||
* doesn't need to do anything for this state.
|
||||
*/
|
||||
Close();
|
||||
return (true);
|
||||
case VDEV_STATE_REMOVED:
|
||||
|
|
@ -710,7 +725,6 @@ CaseFile::CloseIfSolved()
|
|||
*/
|
||||
case VDEV_STATE_UNKNOWN:
|
||||
case VDEV_STATE_CLOSED:
|
||||
case VDEV_STATE_OFFLINE:
|
||||
/*
|
||||
* Keep open? This may not be the correct behavior,
|
||||
* but it's what we've always done
|
||||
|
|
|
|||
|
|
@ -355,6 +355,13 @@ ZfsEvent::Process() const
|
|||
|
||||
Vdev vdev(zpl.front(), vdevConfig);
|
||||
caseFile = &CaseFile::Create(vdev);
|
||||
if (caseFile->VdevState() == VDEV_STATE_OFFLINE) {
|
||||
/*
|
||||
* An administrator did this deliberately. It's not considered
|
||||
* an error that zfsd must fix.
|
||||
*/
|
||||
return (false);
|
||||
}
|
||||
if (caseFile->ReEvaluate(*this) == false) {
|
||||
stringstream msg;
|
||||
int priority = LOG_INFO;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ ${PACKAGE}FILES+= zfsd_hotspare_006_pos.ksh
|
|||
${PACKAGE}FILES+= zfsd_hotspare_007_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_hotspare_008_neg.ksh
|
||||
${PACKAGE}FILES+= zfsd_import_001_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_offline_001_neg.ksh
|
||||
${PACKAGE}FILES+= zfsd_offline_002_neg.ksh
|
||||
${PACKAGE}FILES+= zfsd_replace_001_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_replace_002_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_replace_003_pos.ksh
|
||||
|
|
|
|||
64
tests/sys/cddl/zfs/tests/zfsd/zfsd_offline_001_neg.ksh
Normal file
64
tests/sys/cddl/zfs/tests/zfsd/zfsd_offline_001_neg.ksh
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#!/usr/local/bin/ksh93 -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2025 ConnectWise. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
|
||||
. $STF_SUITE/tests/hotspare/hotspare.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
$ZPOOL status $TESTPOOL
|
||||
if poolexists $TESTPOOL ; then
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
|
||||
partition_cleanup
|
||||
}
|
||||
|
||||
function verify_assertion
|
||||
{
|
||||
log_must $ZPOOL offline $TESTPOOL $FAULT_DISK
|
||||
|
||||
# Wait a few seconds before verifying the state
|
||||
$SLEEP 10
|
||||
log_must check_state $TESTPOOL "$FAULT_DISK" "OFFLINE"
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "ZFSD will not automatically reactivate a disk which has been administratively offlined"
|
||||
|
||||
ensure_zfsd_running
|
||||
|
||||
typeset FAULT_DISK=$DISK0
|
||||
typeset POOLDEVS="$DISK0 $DISK1 $DISK2"
|
||||
set -A MY_KEYWORDS mirror raidz1
|
||||
for keyword in "${MY_KEYWORDS[@]}" ; do
|
||||
log_must create_pool $TESTPOOL $keyword $POOLDEVS
|
||||
verify_assertion
|
||||
|
||||
destroy_pool "$TESTPOOL"
|
||||
done
|
||||
66
tests/sys/cddl/zfs/tests/zfsd/zfsd_offline_002_neg.ksh
Normal file
66
tests/sys/cddl/zfs/tests/zfsd/zfsd_offline_002_neg.ksh
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/local/bin/ksh93 -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2025 ConnectWise. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
|
||||
. $STF_SUITE/tests/hotspare/hotspare.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
$ZPOOL status $TESTPOOL
|
||||
if poolexists $TESTPOOL ; then
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
|
||||
partition_cleanup
|
||||
}
|
||||
|
||||
function verify_assertion
|
||||
{
|
||||
log_must $ZPOOL offline $TESTPOOL $FAULT_DISK
|
||||
|
||||
# Wait a few seconds before verifying the state
|
||||
$SLEEP 10
|
||||
log_must check_state $TESTPOOL "$FAULT_DISK" "OFFLINE"
|
||||
log_must check_state $TESTPOOL "$SPARE_DISK" "AVAIL"
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "ZFSD will not automatically activate a spare when a disk has been administratively offlined"
|
||||
|
||||
ensure_zfsd_running
|
||||
|
||||
typeset FAULT_DISK=$DISK0
|
||||
typeset SPARE_DISK=$DISK3
|
||||
typeset POOLDEVS="$DISK0 $DISK1 $DISK2"
|
||||
set -A MY_KEYWORDS mirror raidz1
|
||||
for keyword in "${MY_KEYWORDS[@]}" ; do
|
||||
log_must create_pool $TESTPOOL $keyword $POOLDEVS spare $SPARE_DISK
|
||||
verify_assertion
|
||||
|
||||
destroy_pool "$TESTPOOL"
|
||||
done
|
||||
|
|
@ -483,6 +483,64 @@ zfsd_autoreplace_003_pos_cleanup()
|
|||
ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed"
|
||||
}
|
||||
|
||||
atf_test_case zfsd_offline_001_neg cleanup
|
||||
zfsd_offline_001_neg_head()
|
||||
{
|
||||
atf_set "descr" "ZFSD will not automatically reactivate a disk which has been administratively offlined"
|
||||
atf_set "require.progs" "ksh93 zpool zfs"
|
||||
}
|
||||
zfsd_offline_001_neg_body()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/../hotspare/hotspare.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
verify_disk_count "$DISKS" 3
|
||||
verify_zfsd_running
|
||||
ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed"
|
||||
ksh93 $(atf_get_srcdir)/zfsd_offline_001_neg.ksh
|
||||
if [[ $? != 0 ]]; then
|
||||
save_artifacts
|
||||
atf_fail "Testcase failed"
|
||||
fi
|
||||
}
|
||||
zfsd_offline_001_neg_cleanup()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed"
|
||||
}
|
||||
|
||||
atf_test_case zfsd_offline_002_neg cleanup
|
||||
zfsd_offline_002_neg_head()
|
||||
{
|
||||
atf_set "descr" "ZFSD will not automatically activate a spare when a disk has been administratively offlined"
|
||||
atf_set "require.progs" "ksh93 zpool zfs"
|
||||
}
|
||||
zfsd_offline_002_neg_body()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/../hotspare/hotspare.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
verify_disk_count "$DISKS" 4
|
||||
verify_zfsd_running
|
||||
ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed"
|
||||
ksh93 $(atf_get_srcdir)/zfsd_offline_002_neg.ksh
|
||||
if [[ $? != 0 ]]; then
|
||||
save_artifacts
|
||||
atf_fail "Testcase failed"
|
||||
fi
|
||||
}
|
||||
zfsd_offline_002_neg_cleanup()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed"
|
||||
}
|
||||
|
||||
atf_test_case zfsd_replace_001_pos cleanup
|
||||
zfsd_replace_001_pos_head()
|
||||
{
|
||||
|
|
@ -676,6 +734,8 @@ atf_init_test_cases()
|
|||
atf_add_test_case zfsd_autoreplace_001_neg
|
||||
atf_add_test_case zfsd_autoreplace_002_pos
|
||||
atf_add_test_case zfsd_autoreplace_003_pos
|
||||
atf_add_test_case zfsd_offline_001_neg
|
||||
atf_add_test_case zfsd_offline_002_neg
|
||||
atf_add_test_case zfsd_replace_001_pos
|
||||
atf_add_test_case zfsd_replace_002_pos
|
||||
atf_add_test_case zfsd_replace_003_pos
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue