Skip to content

Commit

Permalink
Merge tag 'md-6.9-20240306' of https://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/song/md into for-6.9/block

Pull MD atomic queue limits changes from Song.

* tag 'md-6.9-20240306' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  block: remove disk_stack_limits
  md: remove mddev->queue
  md: don't initialize queue limits
  md/raid10: use the atomic queue limit update APIs
  md/raid5: use the atomic queue limit update APIs
  md/raid1: use the atomic queue limit update APIs
  md/raid0: use the atomic queue limit update APIs
  md: add queue limit helpers
  md: add a mddev_is_dm helper
  md: add a mddev_add_trace_msg helper
  md: add a mddev_trace_remap helper
  • Loading branch information
axboe committed Mar 6, 2024
2 parents 34a2cf3 + dd27a84 commit d37977f
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 242 deletions.
24 changes: 0 additions & 24 deletions block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,30 +916,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
}
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);

/**
* disk_stack_limits - adjust queue limits for stacked drivers
* @disk: MD/DM gendisk (top)
* @bdev: the underlying block device (bottom)
* @offset: offset to beginning of data within component device
*
* Description:
* Merges the limits for a top level gendisk and a bottom level
* block_device.
*/
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
sector_t offset)
{
struct request_queue *t = disk->queue;

if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
get_start_sect(bdev) + (offset >> 9)) < 0)
pr_notice("%s: Warning: Device %pg is misaligned\n",
disk->disk_name, bdev);

disk_update_readahead(disk);
}
EXPORT_SYMBOL(disk_stack_limits);

/**
* blk_queue_update_dma_pad - update pad mask
* @q: the request queue for the device
Expand Down
9 changes: 3 additions & 6 deletions drivers/md/md-bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1046,9 +1046,8 @@ void md_bitmap_unplug(struct bitmap *bitmap)
if (dirty || need_write) {
if (!writing) {
md_bitmap_wait_writes(bitmap);
if (bitmap->mddev->queue)
blk_add_trace_msg(bitmap->mddev->queue,
"md bitmap_unplug");
mddev_add_trace_msg(bitmap->mddev,
"md bitmap_unplug");
}
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
filemap_write_page(bitmap, i, false);
Expand Down Expand Up @@ -1319,9 +1318,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
}
bitmap->allclean = 1;

if (bitmap->mddev->queue)
blk_add_trace_msg(bitmap->mddev->queue,
"md bitmap_daemon_work");
mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");

/* Any file-page which is PENDING now needs to be written.
* So set NEEDWRITE now, then after we make any last-minute changes
Expand Down
89 changes: 64 additions & 25 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
#include <linux/percpu-refcount.h>
#include <linux/part_stat.h>

#include <trace/events/block.h>
#include "md.h"
#include "md-bitmap.h"
#include "md-cluster.h"
Expand Down Expand Up @@ -2411,7 +2410,7 @@ int md_integrity_register(struct mddev *mddev)

if (list_empty(&mddev->disks))
return 0; /* nothing to do */
if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
return 0; /* shouldn't register, or already is */
rdev_for_each(rdev, mddev) {
/* skip spares and non-functional disks */
Expand Down Expand Up @@ -2464,7 +2463,7 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
struct blk_integrity *bi_mddev;

if (!mddev->gendisk)
if (mddev_is_dm(mddev))
return 0;

bi_mddev = blk_get_integrity(mddev->gendisk);
Expand Down Expand Up @@ -2857,8 +2856,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev), mddev->in_sync);

if (mddev->queue)
blk_add_trace_msg(mddev->queue, "md md_update_sb");
mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
md_bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
Expand Down Expand Up @@ -4166,7 +4164,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
del_timer_sync(&mddev->safemode_timer);
}
blk_set_stacking_limits(&mddev->queue->limits);
pers->run(mddev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (!mddev->thread)
Expand Down Expand Up @@ -5753,6 +5750,51 @@ static const struct kobj_type md_ktype = {

int mdp_major = 0;

/* stack the limit for all rdevs into lim */
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
{
struct md_rdev *rdev;

rdev_for_each(rdev, mddev) {
queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
}
}
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);

/* apply the extra stacking limits from a new rdev into mddev */
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
{
struct queue_limits lim;

if (mddev_is_dm(mddev))
return 0;

lim = queue_limits_start_update(mddev->gendisk->queue);
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
return queue_limits_commit_update(mddev->gendisk->queue, &lim);
}
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);

/* update the optimal I/O size after a reshape */
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes)
{
struct queue_limits lim;

if (mddev_is_dm(mddev))
return;

/* don't bother updating io_opt if we can't suspend the array */
if (mddev_suspend(mddev, false) < 0)
return;
lim = queue_limits_start_update(mddev->gendisk->queue);
lim.io_opt = lim.io_min * nr_stripes;
queue_limits_commit_update(mddev->gendisk->queue, &lim);
mddev_resume(mddev);
}
EXPORT_SYMBOL_GPL(mddev_update_io_opt);

static void mddev_delayed_delete(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
Expand Down Expand Up @@ -5835,9 +5877,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
disk->fops = &md_fops;
disk->private_data = mddev;

mddev->queue = disk->queue;
blk_set_stacking_limits(&mddev->queue->limits);
blk_queue_write_cache(mddev->queue, true, true);
blk_queue_write_cache(disk->queue, true, true);
disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk;
error = add_disk(disk);
Expand Down Expand Up @@ -5979,7 +6019,7 @@ int md_run(struct mddev *mddev)
invalidate_bdev(rdev->bdev);
if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
mddev->ro = MD_RDONLY;
if (mddev->gendisk)
if (!mddev_is_dm(mddev))
set_disk_ro(mddev->gendisk, 1);
}

Expand Down Expand Up @@ -6141,7 +6181,8 @@ int md_run(struct mddev *mddev)
}
}

if (mddev->queue) {
if (!mddev_is_dm(mddev)) {
struct request_queue *q = mddev->gendisk->queue;
bool nonrot = true;

rdev_for_each(rdev, mddev) {
Expand All @@ -6153,14 +6194,14 @@ int md_run(struct mddev *mddev)
if (mddev->degraded)
nonrot = false;
if (nonrot)
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);

/* Set the NOWAIT flags if all underlying devices support it */
if (nowait)
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
Expand Down Expand Up @@ -6406,8 +6447,10 @@ static void mddev_detach(struct mddev *mddev)
mddev->pers->quiesce(mddev, 0);
}
md_unregister_thread(mddev, &mddev->thread);
if (mddev->queue)
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/

/* the unplug fn references 'conf' */
if (!mddev_is_dm(mddev))
blk_sync_queue(mddev->gendisk->queue);
}

static void __md_stop(struct mddev *mddev)
Expand Down Expand Up @@ -7125,7 +7168,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
if (!bdev_nowait(rdev->bdev)) {
pr_info("%s: Disabling nowait because %pg does not support nowait\n",
mdname(mddev), rdev->bdev);
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
}
/*
* Kick recovery, maybe this spare has to be added to the
Expand Down Expand Up @@ -7362,10 +7405,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (!rv) {
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
else if (mddev->queue) {
else if (!mddev_is_dm(mddev))
set_capacity_and_notify(mddev->gendisk,
mddev->array_sectors);
}
}
return rv;
}
Expand Down Expand Up @@ -8686,10 +8728,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,

bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(discard_bio,
disk_devt(mddev->gendisk),
bio->bi_iter.bi_sector);
mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
submit_bio_noacct(discard_bio);
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
Expand Down Expand Up @@ -9182,7 +9221,7 @@ void md_do_sync(struct md_thread *thread)
mddev->delta_disks > 0 &&
mddev->pers->finish_reshape &&
mddev->pers->size &&
mddev->queue) {
!mddev_is_dm(mddev)) {
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
Expand Down
28 changes: 26 additions & 2 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <trace/events/block.h>
#include "md-cluster.h"

#define MaxSector (~(sector_t)0)
Expand Down Expand Up @@ -479,7 +480,6 @@ struct mddev {
struct timer_list safemode_timer;
struct percpu_ref writes_pending;
int sync_checkers; /* # of threads checking writes_pending */
struct request_queue *queue; /* for plugging ... */

struct bitmap *bitmap; /* the bitmap for the device */
struct {
Expand Down Expand Up @@ -868,7 +868,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
{
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
!bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
mddev->queue->limits.max_write_zeroes_sectors = 0;
mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0;
}

static inline int mddev_suspend_and_lock(struct mddev *mddev)
Expand Down Expand Up @@ -907,7 +907,31 @@ void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
int do_md_run(struct mddev *mddev);
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);

extern const struct block_device_operations md_fops;

/*
* MD devices can be used undeneath by DM, in which case ->gendisk is NULL.
*/
static inline bool mddev_is_dm(struct mddev *mddev)
{
return !mddev->gendisk;
}

static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
sector_t sector)
{
if (!mddev_is_dm(mddev))
trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
}

#define mddev_add_trace_msg(mddev, fmt, args...) \
do { \
if (!mddev_is_dm(mddev)) \
blk_add_trace_msg((mddev)->gendisk->queue, fmt, ##args); \
} while (0)

#endif /* _MD_MD_H */
42 changes: 22 additions & 20 deletions drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,19 @@ static void raid0_free(struct mddev *mddev, void *priv)
free_conf(mddev, conf);
}

static int raid0_set_limits(struct mddev *mddev)
{
struct queue_limits lim;

blk_set_stacking_limits(&lim);
lim.max_hw_sectors = mddev->chunk_sectors;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
mddev_stack_rdev_limits(mddev, &lim);
return queue_limits_set(mddev->gendisk->queue, &lim);
}

static int raid0_run(struct mddev *mddev)
{
struct r0conf *conf;
Expand All @@ -399,20 +412,10 @@ static int raid0_run(struct mddev *mddev)
mddev->private = conf;
}
conf = mddev->private;
if (mddev->queue) {
struct md_rdev *rdev;

blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);

blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
blk_queue_io_opt(mddev->queue,
(mddev->chunk_sectors << 9) * mddev->raid_disks);

rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
}
if (!mddev_is_dm(mddev)) {
ret = raid0_set_limits(mddev);
if (ret)
goto out_free_conf;
}

/* calculate array device size */
Expand All @@ -426,8 +429,10 @@ static int raid0_run(struct mddev *mddev)

ret = md_integrity_register(mddev);
if (ret)
free_conf(mddev, conf);

goto out_free_conf;
return 0;
out_free_conf:
free_conf(mddev, conf);
return ret;
}

Expand Down Expand Up @@ -578,10 +583,7 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
bio_set_dev(bio, tmp_dev->bdev);
bio->bi_iter.bi_sector = sector + zone->dev_start +
tmp_dev->data_offset;

if (mddev->gendisk)
trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
bio_sector);
mddev_trace_remap(mddev, bio, bio_sector);
mddev_check_write_zeroes(mddev, bio);
submit_bio_noacct(bio);
}
Expand Down
Loading

0 comments on commit d37977f

Please sign in to comment.