Skip to content

Commit

Permalink
md/raid5: Keep a reference to last stripe_head for batch
Browse files Browse the repository at this point in the history
When batching, every stripe head has to find the previous stripe head to
add to the batch list. This involves taking the hash lock which is
highly contended during IO.

Instead of finding the previous stripe_head each time, store a
reference to the previous stripe_head in a pointer so that it doesn't
require taking the contended lock another time.

The reference to the previous stripe must be released before scheduling
and waiting for work to get done. Otherwise, it can hold up
raid5_activate_delayed() and deadlock.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
lsgunth authored and axboe committed Aug 2, 2022
1 parent 0a2d169 commit 3312e6c
Showing 1 changed file with 40 additions and 12 deletions.
52 changes: 40 additions & 12 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,8 @@ static bool stripe_can_batch(struct stripe_head *sh)
}

/* we only do back search */
static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
static void stripe_add_to_batch_list(struct r5conf *conf,
struct stripe_head *sh, struct stripe_head *last_sh)
{
struct stripe_head *head;
sector_t head_sector, tmp_sec;
Expand All @@ -856,15 +857,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
return;
head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);

hash = stripe_hash_locks_hash(conf, head_sector);
spin_lock_irq(conf->hash_locks + hash);
head = find_get_stripe(conf, head_sector, conf->generation, hash);
spin_unlock_irq(conf->hash_locks + hash);

if (!head)
return;
if (!stripe_can_batch(head))
goto out;
if (last_sh && head_sector == last_sh->sector) {
head = last_sh;
atomic_inc(&head->count);
} else {
hash = stripe_hash_locks_hash(conf, head_sector);
spin_lock_irq(conf->hash_locks + hash);
head = find_get_stripe(conf, head_sector, conf->generation,
hash);
spin_unlock_irq(conf->hash_locks + hash);
if (!head)
return;
if (!stripe_can_batch(head))
goto out;
}

lock_two_stripes(head, sh);
/* clear_batch_ready clear the flag */
Expand Down Expand Up @@ -5795,6 +5801,8 @@ enum stripe_result {
};

struct stripe_request_ctx {
/* a reference to the last stripe_head for batching */
struct stripe_head *batch_last;
/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
bool do_flush;
};
Expand Down Expand Up @@ -5889,8 +5897,13 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
goto out_release;
}

if (stripe_can_batch(sh))
stripe_add_to_batch_list(conf, sh);
if (stripe_can_batch(sh)) {
stripe_add_to_batch_list(conf, sh, ctx->batch_last);
if (ctx->batch_last)
raid5_release_stripe(ctx->batch_last);
atomic_inc(&sh->count);
ctx->batch_last = sh;
}

if (ctx->do_flush) {
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
Expand Down Expand Up @@ -5985,6 +5998,18 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
continue;

if (res == STRIPE_SCHEDULE_AND_RETRY) {
/*
* Must release the reference to batch_last before
* scheduling and waiting for work to be done,
* otherwise the batch_last stripe head could prevent
* raid5_activate_delayed() from making progress
* and thus deadlocking.
*/
if (ctx.batch_last) {
raid5_release_stripe(ctx.batch_last);
ctx.batch_last = NULL;
}

schedule();
prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE);
Expand All @@ -5996,6 +6021,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)

finish_wait(&conf->wait_for_overlap, &w);

if (ctx.batch_last)
raid5_release_stripe(ctx.batch_last);

if (rw == WRITE)
md_write_end(mddev);
bio_endio(bi);
Expand Down

0 comments on commit 3312e6c

Please sign in to comment.