Skip to content

zio: add separate pipeline stages for logical IO #17388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions include/sys/zio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

/*
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2024, Klara Inc.
* Copyright (c) 2024, 2025, Klara, Inc.
*/

#ifndef _ZIO_IMPL_H
Expand Down Expand Up @@ -156,14 +156,17 @@ enum zio_stage {

ZIO_STAGE_READY = 1 << 20, /* RWFCXT */

ZIO_STAGE_VDEV_IO_START = 1 << 21, /* RW--XT */
ZIO_STAGE_VDEV_IO_DONE = 1 << 22, /* RW--XT */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--XT */
ZIO_STAGE_LOGICAL_IO_START = 1 << 21, /* RW---- */
ZIO_STAGE_LOGICAL_IO_DONE = 1 << 22, /* RW---- */

ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */
ZIO_STAGE_DIO_CHECKSUM_VERIFY = 1 << 25, /* -W---- */
ZIO_STAGE_VDEV_IO_START = 1 << 23, /* RW--XT */
ZIO_STAGE_VDEV_IO_DONE = 1 << 24, /* RW--XT */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 25, /* RW--XT */

ZIO_STAGE_DONE = 1 << 26 /* RWFCXT */
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 26, /* R----- */
ZIO_STAGE_DIO_CHECKSUM_VERIFY = 1 << 27, /* -W---- */

ZIO_STAGE_DONE = 1 << 28 /* RWFCXT */
};

#define ZIO_ROOT_PIPELINE \
Expand All @@ -181,24 +184,30 @@ enum zio_stage {
ZIO_STAGE_VDEV_IO_DONE | \
ZIO_STAGE_VDEV_IO_ASSESS)

#define ZIO_LOGICAL_IO_STAGES \
(ZIO_STAGE_LOGICAL_IO_START | \
ZIO_STAGE_LOGICAL_IO_DONE)

#define ZIO_VDEV_CHILD_PIPELINE \
(ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_DONE)

#define ZIO_READ_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_CHECKSUM_VERIFY)

#define ZIO_READ_PHYS_PIPELINE \
ZIO_READ_COMMON_STAGES
(ZIO_READ_COMMON_STAGES | \
ZIO_VDEV_IO_STAGES)

#define ZIO_READ_PIPELINE \
(ZIO_READ_COMMON_STAGES | \
ZIO_LOGICAL_IO_STAGES | \
ZIO_STAGE_READ_BP_INIT)

#define ZIO_DDT_CHILD_READ_PIPELINE \
ZIO_READ_COMMON_STAGES
(ZIO_READ_COMMON_STAGES | \
ZIO_LOGICAL_IO_STAGES)

#define ZIO_DDT_READ_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
Expand All @@ -208,21 +217,23 @@ enum zio_stage {

#define ZIO_WRITE_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_CHECKSUM_GENERATE)

#define ZIO_WRITE_PHYS_PIPELINE \
ZIO_WRITE_COMMON_STAGES
(ZIO_WRITE_COMMON_STAGES | \
ZIO_VDEV_IO_STAGES)

#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_LOGICAL_IO_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT)

#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_LOGICAL_IO_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
Expand All @@ -235,7 +246,7 @@ enum zio_stage {

#define ZIO_DDT_CHILD_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_LOGICAL_IO_STAGES | \
ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)

Expand Down Expand Up @@ -280,7 +291,7 @@ enum zio_stage {
#define ZIO_BLOCKING_STAGES \
(ZIO_STAGE_DVA_ALLOCATE | \
ZIO_STAGE_DVA_CLAIM | \
ZIO_STAGE_VDEV_IO_START)
ZIO_STAGE_LOGICAL_IO_START)

extern void zio_inject_init(void);
extern void zio_inject_fini(void);
Expand Down
17 changes: 10 additions & 7 deletions man/man8/zpool-events.8
Original file line number Diff line number Diff line change
Expand Up @@ -426,16 +426,19 @@ ZIO_STAGE_DVA_ALLOCATE:0x00020000:-W----
ZIO_STAGE_DVA_FREE:0x00040000:--F---
ZIO_STAGE_DVA_CLAIM:0x00080000:---C--

ZIO_STAGE_READY:0x00100000:RWFCIT
ZIO_STAGE_READY:0x00100000:RWFCXT

ZIO_STAGE_VDEV_IO_START:0x00200000:RW--XT
ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--XT
ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--XT
ZIO_STAGE_LOGICAL_IO_START:0x00200000:RW----
ZIO_STAGE_LOGICAL_IO_DONE:0x00400000:RW----

ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R-----
ZIO_STAGE_DIO_CHECKSUM_VERIFY:0x02000000:-W----
ZIO_STAGE_VDEV_IO_START:0x00800000:RW--XT
ZIO_STAGE_VDEV_IO_DONE:0x01000000:RW--XT
ZIO_STAGE_VDEV_IO_ASSESS:0x02000000:RW--XT

ZIO_STAGE_DONE:0x04000000:RWFCXT
ZIO_STAGE_CHECKSUM_VERIFY:0x04000000:R-----
ZIO_STAGE_DIO_CHECKSUM_VERIFY:0x08000000:-W----

ZIO_STAGE_DONE:0x10000000:RWFCXT
.TE
.
.Sh I/O FLAGS
Expand Down
2 changes: 2 additions & 0 deletions module/zcommon/zfs_valstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ _VALSTR_BITFIELD_IMPL(zio_stage,
{ 'D', "DF", "DVA_FREE" },
{ 'D', "DC", "DVA_CLAIM" },
{ 'R', "R ", "READY" },
{ 'L', "LS", "LOGICAL_IO_START" },
{ 'L', "LD", "LOGICAL_IO_DONE" },
{ 'V', "VS", "VDEV_IO_START" },
{ 'V', "VD", "VDEV_IO_DONE" },
{ 'V', "VA", "VDEV_IO_ASSESS" },
Expand Down
16 changes: 13 additions & 3 deletions module/zfs/vdev_mirror.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

/*
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
*/

#include <sys/zfs_context.h>
Expand Down Expand Up @@ -270,7 +271,15 @@ vdev_mirror_map_init(zio_t *zio)
vdev_t *vd = zio->io_vd;
int c;

if (vd == NULL) {
ASSERT3P(vd, !=, NULL);

if (vd == zio->io_spa->spa_root_vdev) {
/*
* Special case for "root" IO handling. We create a mirror map
* that points to multiple locations within the same top-level
* vdev, rather than the same location on multiple vdevs.
*/

dva_t *dva = zio->io_bp->blk_dva;
spa_t *spa = zio->io_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
Expand Down Expand Up @@ -746,7 +755,7 @@ vdev_mirror_io_done(zio_t *zio)
/*
* Always require at least one good copy.
*
* For ditto blocks (io_vd == NULL), require
* For ditto blocks (root vdev), require
* all copies to be good.
*
* XXX -- for replacing vdevs, there's no great answer.
Expand All @@ -757,7 +766,8 @@ vdev_mirror_io_done(zio_t *zio)
* to be able to detach it -- which requires all
* writes to the old device to have succeeded.
*/
if (good_copies == 0 || zio->io_vd == NULL)
if (good_copies == 0 ||
zio->io_vd == zio->io_spa->spa_root_vdev)
zio->io_error = vdev_mirror_worst_error(mm);
}
return;
Expand Down
25 changes: 23 additions & 2 deletions module/zfs/vdev_root.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

/*
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
*/

#include <sys/zfs_context.h>
Expand Down Expand Up @@ -142,6 +143,26 @@ vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
}
}

/*
* "Root" IO just hands off to vdev_mirror, because handling multiple DVAs in
* a single BP can be thought of as just another kind of mirror.
*/
static void
vdev_root_io_start(zio_t *zio)
{
ASSERT3U(zio->io_error, ==, 0);
ASSERT3U(zio->io_child_error[ZIO_CHILD_VDEV], ==, 0);
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);

vdev_mirror_ops.vdev_op_io_start(zio);
}

static void
vdev_root_io_done(zio_t *zio)
{
vdev_mirror_ops.vdev_op_io_done(zio);
}

vdev_ops_t vdev_root_ops = {
.vdev_op_init = NULL,
.vdev_op_fini = NULL,
Expand All @@ -151,8 +172,8 @@ vdev_ops_t vdev_root_ops = {
.vdev_op_asize_to_psize = vdev_default_psize,
.vdev_op_min_asize = vdev_default_min_asize,
.vdev_op_min_alloc = NULL,
.vdev_op_io_start = NULL, /* not applicable to the root */
.vdev_op_io_done = NULL, /* not applicable to the root */
.vdev_op_io_start = vdev_root_io_start,
.vdev_op_io_done = vdev_root_io_done,
.vdev_op_state_change = vdev_root_state_change,
.vdev_op_need_resilver = NULL,
.vdev_op_hold = NULL,
Expand Down
Loading
Loading