Skip to content

Commit 69f024a

Browse files
authored
Configure zed's diagnosis engine with vdev properties
Introduce four new vdev properties: checksum_n checksum_t io_n io_t These properties can be used for configuring the thresholds of zed's diagnosis engine and are interpeted as <N> events in T <seconds>. When this property is set to a non-default value on a top-level vdev, those thresholds will also apply to its leaf vdevs. This behavior can be overridden by explicitly setting the property on the leaf vdev. Note that, these properties do not persist across vdev replacement. For this reason, it is advisable to set the property on the top-level vdev instead of the leaf vdev. The default values for zed's diagnosis engine (10 events, 600 seconds) remains unchanged. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Rob Wing <rob.wing@klarasystems.com> Sponsored-by: Seagate Technology LLC Closes #13805
1 parent f091db9 commit 69f024a

File tree

15 files changed

+618
-24
lines changed

15 files changed

+618
-24
lines changed

cmd/zed/agents/zfs_diagnosis.c

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@
3939
#include "zfs_agents.h"
4040
#include "fmd_api.h"
4141

42+
/*
43+
* Default values for the serd engine when processing checksum or io errors. The
44+
* semantics are N <events> in T <seconds>.
45+
*/
46+
#define DEFAULT_CHECKSUM_N 10 /* events */
47+
#define DEFAULT_CHECKSUM_T 600 /* seconds */
48+
#define DEFAULT_IO_N 10 /* events */
49+
#define DEFAULT_IO_T 600 /* seconds */
50+
4251
/*
4352
* Our serd engines are named 'zfs_<pool_guid>_<vdev_guid>_{checksum,io}'. This
4453
* #define reserves enough space for two 64-bit hex values plus the length of
@@ -448,6 +457,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
448457
zfs_case_t *zcp, *dcp;
449458
int32_t pool_state;
450459
uint64_t ena, pool_guid, vdev_guid;
460+
uint64_t checksum_n, checksum_t;
461+
uint64_t io_n, io_t;
451462
er_timeval_t pool_load;
452463
er_timeval_t er_when;
453464
nvlist_t *detector;
@@ -784,11 +795,21 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
784795
if (fmd_nvl_class_match(hdl, nvl,
785796
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO))) {
786797
if (zcp->zc_data.zc_serd_io[0] == '\0') {
798+
if (nvlist_lookup_uint64(nvl,
799+
FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N,
800+
&io_n) != 0) {
801+
io_n = DEFAULT_IO_N;
802+
}
803+
if (nvlist_lookup_uint64(nvl,
804+
FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T,
805+
&io_t) != 0) {
806+
io_t = DEFAULT_IO_T;
807+
}
787808
zfs_serd_name(zcp->zc_data.zc_serd_io,
788809
pool_guid, vdev_guid, "io");
789810
fmd_serd_create(hdl, zcp->zc_data.zc_serd_io,
790-
fmd_prop_get_int32(hdl, "io_N"),
791-
fmd_prop_get_int64(hdl, "io_T"));
811+
io_n,
812+
SEC2NSEC(io_t));
792813
zfs_case_serialize(zcp);
793814
}
794815
if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep))
@@ -813,12 +834,23 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
813834
}
814835

815836
if (zcp->zc_data.zc_serd_checksum[0] == '\0') {
837+
if (nvlist_lookup_uint64(nvl,
838+
FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N,
839+
&checksum_n) != 0) {
840+
checksum_n = DEFAULT_CHECKSUM_N;
841+
}
842+
if (nvlist_lookup_uint64(nvl,
843+
FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T,
844+
&checksum_t) != 0) {
845+
checksum_t = DEFAULT_CHECKSUM_T;
846+
}
847+
816848
zfs_serd_name(zcp->zc_data.zc_serd_checksum,
817849
pool_guid, vdev_guid, "checksum");
818850
fmd_serd_create(hdl,
819851
zcp->zc_data.zc_serd_checksum,
820-
fmd_prop_get_int32(hdl, "checksum_N"),
821-
fmd_prop_get_int64(hdl, "checksum_T"));
852+
checksum_n,
853+
SEC2NSEC(checksum_t));
822854
zfs_case_serialize(zcp);
823855
}
824856
if (fmd_serd_record(hdl,

include/sys/fm/fs/zfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ extern "C" {
7878
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS "vdev_read_errors"
7979
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors"
8080
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors"
81+
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N "vdev_cksum_n"
82+
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T "vdev_cksum_t"
83+
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N "vdev_io_n"
84+
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t"
8185
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays"
8286
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
8387
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"

include/sys/fs/zfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,10 @@ typedef enum {
356356
VDEV_PROP_REMOVING,
357357
VDEV_PROP_ALLOCATING,
358358
VDEV_PROP_FAILFAST,
359+
VDEV_PROP_CHECKSUM_N,
360+
VDEV_PROP_CHECKSUM_T,
361+
VDEV_PROP_IO_N,
362+
VDEV_PROP_IO_T,
359363
VDEV_NUM_PROPS
360364
} vdev_prop_t;
361365

include/sys/vdev_impl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,14 @@ struct vdev {
469469
zfs_ratelimit_t vdev_delay_rl;
470470
zfs_ratelimit_t vdev_deadman_rl;
471471
zfs_ratelimit_t vdev_checksum_rl;
472+
473+
/*
474+
* Checksum and IO thresholds for tuning ZED
475+
*/
476+
uint64_t vdev_checksum_n;
477+
uint64_t vdev_checksum_t;
478+
uint64_t vdev_io_n;
479+
uint64_t vdev_io_t;
472480
};
473481

474482
#define VDEV_PAD_SIZE (8 << 10)

lib/libzfs/libzfs.abi

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3223,7 +3223,11 @@
32233223
<enumerator name='VDEV_PROP_REMOVING' value='39'/>
32243224
<enumerator name='VDEV_PROP_ALLOCATING' value='40'/>
32253225
<enumerator name='VDEV_PROP_FAILFAST' value='41'/>
3226-
<enumerator name='VDEV_NUM_PROPS' value='42'/>
3226+
<enumerator name='VDEV_PROP_CHECKSUM_N' value='42'/>
3227+
<enumerator name='VDEV_PROP_CHECKSUM_T' value='43'/>
3228+
<enumerator name='VDEV_PROP_IO_N' value='44'/>
3229+
<enumerator name='VDEV_PROP_IO_T' value='45'/>
3230+
<enumerator name='VDEV_NUM_PROPS' value='46'/>
32273231
</enum-decl>
32283232
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
32293233
<enum-decl name='vdev_state' id='21566197'>

lib/libzfs/libzfs_pool.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5002,6 +5002,17 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
50025002
(u_longlong_t)intval);
50035003
}
50045004
break;
5005+
case VDEV_PROP_CHECKSUM_N:
5006+
case VDEV_PROP_CHECKSUM_T:
5007+
case VDEV_PROP_IO_N:
5008+
case VDEV_PROP_IO_T:
5009+
if (intval == UINT64_MAX) {
5010+
(void) strlcpy(buf, "-", len);
5011+
} else {
5012+
(void) snprintf(buf, len, "%llu",
5013+
(u_longlong_t)intval);
5014+
}
5015+
break;
50055016
case VDEV_PROP_FRAGMENTATION:
50065017
if (intval == UINT64_MAX) {
50075018
(void) strlcpy(buf, "-", len);

lib/libzfs/libzfs_util.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,6 +1681,18 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
16811681
*ivalp = UINT64_MAX;
16821682
}
16831683

1684+
/*
1685+
* Special handling for "checksum_*=none". In this case it's not
1686+
* 0 but UINT64_MAX.
1687+
*/
1688+
if ((type & ZFS_TYPE_VDEV) && isnone &&
1689+
(prop == VDEV_PROP_CHECKSUM_N ||
1690+
prop == VDEV_PROP_CHECKSUM_T ||
1691+
prop == VDEV_PROP_IO_N ||
1692+
prop == VDEV_PROP_IO_T)) {
1693+
*ivalp = UINT64_MAX;
1694+
}
1695+
16841696
/*
16851697
* Special handling for setting 'refreservation' to 'auto'. Use
16861698
* UINT64_MAX to tell the caller to use zfs_fix_auto_resv().

man/man7/vdevprops.7

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ section, below.
4343
.Ss Native Properties
4444
Every vdev has a set of properties that export statistics about the vdev
4545
as well as control various behaviors.
46-
Properties are NOT inherited from top-level vdevs.
46+
Properties are not inherited from top-level vdevs, with the exception of
47+
checksum_n, checksum_t, io_n, and io_t.
4748
.Pp
4849
The values of numeric properties can be specified using human-readable suffixes
4950
.Po for example,
@@ -114,9 +115,19 @@ The cumulative size of all operations of each type performed by this vdev
114115
If this device is currently being removed from the pool
115116
.El
116117
.Pp
117-
The following native properties can be used to change the behavior of a ZFS
118-
dataset.
118+
The following native properties can be used to change the behavior of a vdev.
119119
.Bl -tag -width "allocating"
120+
.It Sy checksum_n , checksum_t , io_n , io_t
121+
Tune the fault management daemon by specifying checksum/io thresholds of <N>
122+
errors in <T> seconds, respectively.
123+
These properties can be set on leaf and top-level vdevs.
124+
When the property is set on the leaf and top-level vdev, the value of the leaf
125+
vdev will be used.
126+
If the property is only set on the top-level vdev, this value will be used.
127+
The value of these properties do not persist across vdev replacement.
128+
For this reason, it is advisable to set the property on the top-level vdev -
129+
not on the leaf vdev itself.
130+
The default values are 10 errors in 600 seconds.
120131
.It Sy comment
121132
A text comment up to 8192 characters long
122133
.It Sy bootsize

module/zcommon/zpool_prop.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,18 @@ vdev_prop_init(void)
410410
sfeatures);
411411

412412
/* default numeric properties */
413+
zprop_register_number(VDEV_PROP_CHECKSUM_N, "checksum_n", UINT64_MAX,
414+
PROP_DEFAULT, ZFS_TYPE_VDEV, "<events>", "CKSUM_N", B_FALSE,
415+
sfeatures);
416+
zprop_register_number(VDEV_PROP_CHECKSUM_T, "checksum_t", UINT64_MAX,
417+
PROP_DEFAULT, ZFS_TYPE_VDEV, "<seconds>", "CKSUM_T", B_FALSE,
418+
sfeatures);
419+
zprop_register_number(VDEV_PROP_IO_N, "io_n", UINT64_MAX,
420+
PROP_DEFAULT, ZFS_TYPE_VDEV, "<events>", "IO_N", B_FALSE,
421+
sfeatures);
422+
zprop_register_number(VDEV_PROP_IO_T, "io_t", UINT64_MAX,
423+
PROP_DEFAULT, ZFS_TYPE_VDEV, "<seconds>", "IO_T", B_FALSE,
424+
sfeatures);
413425

414426
/* default index (boolean) properties */
415427
zprop_register_index(VDEV_PROP_REMOVING, "removing", 0,

module/zfs/vdev.c

Lines changed: 122 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,31 @@ vdev_get_nparity(vdev_t *vd)
389389
return (nparity);
390390
}
391391

392+
static int
393+
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
394+
{
395+
spa_t *spa = vd->vdev_spa;
396+
objset_t *mos = spa->spa_meta_objset;
397+
uint64_t objid;
398+
int err;
399+
400+
if (vd->vdev_top_zap != 0) {
401+
objid = vd->vdev_top_zap;
402+
} else if (vd->vdev_leaf_zap != 0) {
403+
objid = vd->vdev_leaf_zap;
404+
} else {
405+
return (EINVAL);
406+
}
407+
408+
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
409+
sizeof (uint64_t), 1, value);
410+
411+
if (err == ENOENT)
412+
*value = vdev_prop_default_numeric(prop);
413+
414+
return (err);
415+
}
416+
392417
/*
393418
* Get the number of data disks for a top-level vdev.
394419
*/
@@ -642,6 +667,14 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
642667
zfs_ratelimit_init(&vd->vdev_checksum_rl,
643668
&zfs_checksum_events_per_second, 1);
644669

670+
/*
671+
* Default Thresholds for tuning ZED
672+
*/
673+
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
674+
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
675+
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
676+
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
677+
645678
list_link_init(&vd->vdev_config_dirty_node);
646679
list_link_init(&vd->vdev_state_dirty_node);
647680
list_link_init(&vd->vdev_initialize_node);
@@ -3597,6 +3630,39 @@ vdev_load(vdev_t *vd)
35973630
}
35983631
}
35993632

3633+
if (vd->vdev_top_zap != 0 || vd->vdev_leaf_zap != 0) {
3634+
uint64_t zapobj;
3635+
3636+
if (vd->vdev_top_zap != 0)
3637+
zapobj = vd->vdev_top_zap;
3638+
else
3639+
zapobj = vd->vdev_leaf_zap;
3640+
3641+
error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_N,
3642+
&vd->vdev_checksum_n);
3643+
if (error && error != ENOENT)
3644+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3645+
"failed [error=%d]", (u_longlong_t)zapobj, error);
3646+
3647+
error = vdev_prop_get_int(vd, VDEV_PROP_CHECKSUM_T,
3648+
&vd->vdev_checksum_t);
3649+
if (error && error != ENOENT)
3650+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3651+
"failed [error=%d]", (u_longlong_t)zapobj, error);
3652+
3653+
error = vdev_prop_get_int(vd, VDEV_PROP_IO_N,
3654+
&vd->vdev_io_n);
3655+
if (error && error != ENOENT)
3656+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3657+
"failed [error=%d]", (u_longlong_t)zapobj, error);
3658+
3659+
error = vdev_prop_get_int(vd, VDEV_PROP_IO_T,
3660+
&vd->vdev_io_t);
3661+
if (error && error != ENOENT)
3662+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3663+
"failed [error=%d]", (u_longlong_t)zapobj, error);
3664+
}
3665+
36003666
/*
36013667
* If this is a top-level vdev, initialize its metaslabs.
36023668
*/
@@ -5736,6 +5802,34 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
57365802
}
57375803
vd->vdev_failfast = intval & 1;
57385804
break;
5805+
case VDEV_PROP_CHECKSUM_N:
5806+
if (nvpair_value_uint64(elem, &intval) != 0) {
5807+
error = EINVAL;
5808+
break;
5809+
}
5810+
vd->vdev_checksum_n = intval;
5811+
break;
5812+
case VDEV_PROP_CHECKSUM_T:
5813+
if (nvpair_value_uint64(elem, &intval) != 0) {
5814+
error = EINVAL;
5815+
break;
5816+
}
5817+
vd->vdev_checksum_t = intval;
5818+
break;
5819+
case VDEV_PROP_IO_N:
5820+
if (nvpair_value_uint64(elem, &intval) != 0) {
5821+
error = EINVAL;
5822+
break;
5823+
}
5824+
vd->vdev_io_n = intval;
5825+
break;
5826+
case VDEV_PROP_IO_T:
5827+
if (nvpair_value_uint64(elem, &intval) != 0) {
5828+
error = EINVAL;
5829+
break;
5830+
}
5831+
vd->vdev_io_t = intval;
5832+
break;
57395833
default:
57405834
/* Most processing is done in vdev_props_set_sync */
57415835
break;
@@ -6025,28 +6119,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
60256119
continue;
60266120
/* Numeric Properites */
60276121
case VDEV_PROP_ALLOCATING:
6028-
src = ZPROP_SRC_LOCAL;
6029-
strval = NULL;
6030-
6031-
err = zap_lookup(mos, objid, nvpair_name(elem),
6032-
sizeof (uint64_t), 1, &intval);
6033-
if (err == ENOENT) {
6034-
intval =
6035-
vdev_prop_default_numeric(prop);
6036-
err = 0;
6037-
} else if (err)
6038-
break;
6039-
if (intval == vdev_prop_default_numeric(prop))
6040-
src = ZPROP_SRC_DEFAULT;
6041-
60426122
/* Leaf vdevs cannot have this property */
60436123
if (vd->vdev_mg == NULL &&
60446124
vd->vdev_top != NULL) {
60456125
src = ZPROP_SRC_NONE;
60466126
intval = ZPROP_BOOLEAN_NA;
6127+
} else {
6128+
err = vdev_prop_get_int(vd, prop,
6129+
&intval);
6130+
if (err && err != ENOENT)
6131+
break;
6132+
6133+
if (intval ==
6134+
vdev_prop_default_numeric(prop))
6135+
src = ZPROP_SRC_DEFAULT;
6136+
else
6137+
src = ZPROP_SRC_LOCAL;
60476138
}
60486139

6049-
vdev_prop_add_list(outnvl, propname, strval,
6140+
vdev_prop_add_list(outnvl, propname, NULL,
60506141
intval, src);
60516142
break;
60526143
case VDEV_PROP_FAILFAST:
@@ -6068,6 +6159,22 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
60686159
vdev_prop_add_list(outnvl, propname, strval,
60696160
intval, src);
60706161
break;
6162+
case VDEV_PROP_CHECKSUM_N:
6163+
case VDEV_PROP_CHECKSUM_T:
6164+
case VDEV_PROP_IO_N:
6165+
case VDEV_PROP_IO_T:
6166+
err = vdev_prop_get_int(vd, prop, &intval);
6167+
if (err && err != ENOENT)
6168+
break;
6169+
6170+
if (intval == vdev_prop_default_numeric(prop))
6171+
src = ZPROP_SRC_DEFAULT;
6172+
else
6173+
src = ZPROP_SRC_LOCAL;
6174+
6175+
vdev_prop_add_list(outnvl, propname, NULL,
6176+
intval, src);
6177+
break;
60716178
/* Text Properties */
60726179
case VDEV_PROP_COMMENT:
60736180
/* Exists in the ZAP below */

0 commit comments

Comments
 (0)