Skip to content

Add knob to disable slow io notifications #17477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
VDEV_PROP_SLOW_IO_REPORTING,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ struct vdev {
uint64_t vdev_checksum_t;
uint64_t vdev_io_n;
uint64_t vdev_io_t;
boolean_t vdev_slow_io_reporting;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
};
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -5930,7 +5930,8 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_NUM_PROPS' value='52'/>
<enumerator name='VDEV_PROP_SLOW_IO_REPORTING' value='52'/>
<enumerator name='VDEV_NUM_PROPS' value='53'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
Expand Down
5 changes: 3 additions & 2 deletions man/man7/vdevprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ section, below.
Every vdev has a set of properties that export statistics about the vdev
as well as control various behaviors.
Properties are not inherited from top-level vdevs, with the exception of
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
checksum_n, checksum_t, io_n, io_t, slow_io_downgrade, slow_io_n, and slow_io_t.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should here and below be slow_io_reporting instead of slow_io_downgrade?

.Pp
The values of numeric properties can be specified using human-readable suffixes
.Po for example,
Expand Down Expand Up @@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations.
.Pp
The following native properties can be used to change the behavior of a vdev.
.Bl -tag -width "allocating"
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_downgrade, slow_io_n ,
.It Sy slow_io_t
Tune the fault management daemon by specifying checksum/io thresholds of <N>
errors in <T> seconds, respectively.
These properties can be set on leaf and top-level vdevs.
Expand Down
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);
zprop_register_index(VDEV_PROP_SLOW_IO_REPORTING, "slow_io_reporting",
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
"SLOW_IO_REPORTING", boolean_table, sfeatures);

/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
Expand Down
86 changes: 61 additions & 25 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,32 +427,53 @@ vdev_get_nparity(vdev_t *vd)
}

static int
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
{
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa->spa_meta_objset;
uint64_t objid;
int err;

if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
*objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
*objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
*objid = vd->vdev_leaf_zap;
} else {
return (EINVAL);
}

return (0);
}

static int
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
{
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa->spa_meta_objset;
uint64_t objid;
int err;

if (vdev_prop_get_objid(vd, &objid) != 0)
return (EINVAL);

err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
sizeof (uint64_t), 1, value);

if (err == ENOENT)
*value = vdev_prop_default_numeric(prop);

return (err);
}

static int
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
{
int err;
uint64_t ivalue;

err = vdev_prop_get_int(vd, prop, &ivalue);
*bvalue = ivalue & 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be & 1 or != 0?


return (err);
}

/*
* Get the number of data disks for a top-level vdev.
*/
Expand Down Expand Up @@ -713,8 +734,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);

vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);

vd->vdev_slow_io_reporting = vdev_prop_default_numeric(
VDEV_PROP_SLOW_IO_REPORTING);
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);

Expand Down Expand Up @@ -3852,6 +3877,11 @@ vdev_load(vdev_t *vd)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);

error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_REPORTING,
&vd->vdev_slow_io_reporting);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
&vd->vdev_slow_io_n);
if (error && error != ENOENT)
Expand Down Expand Up @@ -5899,15 +5929,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
/*
* Set vdev property values in the vdev props mos object.
*/
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
panic("unexpected vdev type");
}

mutex_enter(&spa->spa_props_lock);

Expand Down Expand Up @@ -6084,6 +6107,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
case VDEV_PROP_SLOW_IO_REPORTING:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_slow_io_reporting = intval & 1;
break;
case VDEV_PROP_SLOW_IO_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
Expand Down Expand Up @@ -6125,6 +6155,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
nvlist_t *nvprops = NULL;
uint64_t intval = 0;
boolean_t boolval = 0;
char *strval = NULL;
const char *propname = NULL;
vdev_prop_t prop;
Expand All @@ -6138,15 +6169,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);

if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
return (SET_ERROR(EINVAL));
}
ASSERT(objid != 0);

mutex_enter(&spa->spa_props_lock);
Expand Down Expand Up @@ -6455,6 +6479,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
case VDEV_PROP_SLOW_IO_REPORTING:
err = vdev_prop_get_bool(vd, prop, &boolval);
if (err && err != ENOENT)
break;

src = ZPROP_SRC_LOCAL;
if (boolval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;

vdev_prop_add_list(outnvl, propname, NULL,
boolval, src);
break;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
case VDEV_PROP_SLOW_IO_REPORTING:
propval = vd->vdev_slow_io_reporting;
break;
case VDEV_PROP_SLOW_IO_N:
propval = vd->vdev_slow_io_n;
break;
Expand Down
9 changes: 6 additions & 3 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -5414,9 +5414,12 @@ zio_done(zio_t *zio)
zio->io_vd->vdev_stat.vs_slow_ios++;
mutex_exit(&zio->io_vd->vdev_stat_lock);

(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
zio, 0);
if (zio->io_vd->vdev_slow_io_reporting) {
(void) zfs_ereport_post(
FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd,
&zio->io_bookmark, zio, 0);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ typeset -a properties=(
checksum_t
io_n
io_t
slow_io_reporting
slow_io_n
slow_io_t
trim_support
Expand Down
44 changes: 42 additions & 2 deletions tests/zfs-tests/tests/functional/events/zed_slow_io.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#
# Copyright (c) 2023, Klara Inc.
# Copyright (c) 2025, Mariusz Zaborski <oshogbo@FreeBSD.org>
#

# DESCRIPTION:
Expand Down Expand Up @@ -140,8 +141,8 @@ function slow_io_degrade
{
do_setup

zpool set slow_io_n=5 $TESTPOOL $VDEV
zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV

start_slow_io
for i in {1..16}; do
Expand Down Expand Up @@ -193,6 +194,44 @@ function slow_io_no_degrade
do_clean
}

# Change slow_io_n, slow_io_t to 5 events in 60 seconds
# fire more than 5 events. Disable slow io events.
# Should not degrade.
function slow_io_degrade_disabled
{
do_setup

log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_reporting=off $TESTPOOL $VDEV

start_slow_io
for i in {1..16}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.5
done
stop_slow_io
zpool sync

#
# wait 60 seconds to confirm that zfs.delay was not generated.
#
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
i=$((i+1))
sleep 1
done
log_note "$events delay events found"

[ $events -eq "0" ] || \
log_fail "expecting no delay events, found $events"

log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
do_clean
}

log_assert "Test ZED slow io configurability"
log_onexit cleanup

Expand All @@ -202,5 +241,6 @@ log_must zed_start
default_degrade
slow_io_degrade
slow_io_no_degrade
slow_io_degrade_disabled

log_pass "Test ZED slow io configurability"
Loading