Skip to content

Commit d23977f

Browse files
shroffniaxboe
authored andcommitted
block: remove q->sysfs_lock for attributes which don't need it
There're few sysfs attributes in block layer which don't really need acquiring q->sysfs_lock while accessing it. The reason being, reading/ writing a value from/to such attributes are either atomic or could be easily protected using READ_ONCE()/WRITE_ONCE(). Moreover, sysfs attributes are inherently protected with sysfs/kernfs internal locking. So this change help segregate all existing sysfs attributes for which we could avoid acquiring q->sysfs_lock. For all read-only attributes we removed the q->sysfs_lock from show method of such attributes. In case attribute is read/write then we removed the q->sysfs_lock from both show and store methods of these attributes. We audited all block sysfs attributes and found following list of attributes which shouldn't require q->sysfs_lock protection: 1. io_poll: Write to this attribute is ignored. So, we don't need q->sysfs_lock. 2. io_poll_delay: Write to this attribute is NOP, so we don't need q->sysfs_lock. 3. io_timeout: Write to this attribute updates q->rq_timeout and read of this attribute returns the value stored in q->rq_timeout Moreover, the q->rq_timeout is set only once when we init the queue (under blk_mq_ init_allocated_queue()) even before disk is added. So that means that we don't need to protect it with q->sysfs_lock. As this attribute is not directly correlated with anything else simply using READ_ONCE/WRITE_ONCE should be enough. 4. nomerges: Write to this attribute file updates two q->flags : QUEUE_FLAG_ NOMERGES and QUEUE_FLAG_NOXMERGES. These flags are accessed during bio-merge which anyways doesn't run with q->sysfs_lock held. Moreover, the q->flags are updated/accessed with bitops which are atomic. So, protecting it with q->sysfs_lock is not necessary. 5. rq_affinity: Write to this attribute file makes atomic updates to q->flags: QUEUE_FLAG_SAME_COMP and QUEUE_FLAG_SAME_FORCE. These flags are also accessed from blk_mq_complete_need_ipi() using test_bit macro. As read/write to q->flags uses bitops which are atomic, protecting it with q->stsys_lock is not necessary. 6. nr_zones: Write to this attribute happens in the driver probe method (except nvme) before disk is added and outside of q->sysfs_lock or any other lock. Moreover nr_zones is defined as "unsigned int" and so reading this attribute, even when it's simultaneously being updated on other cpu, should not return torn value on any architecture supported by linux. So we can avoid using q->sysfs_lock or any other lock/ protection while reading this attribute. 7. discard_zeroes_data: Reading of this attribute always returns 0, so we don't require holding q->sysfs_lock. 8. write_same_max_bytes Reading of this attribute always returns 0, so we don't require holding q->sysfs_lock. Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Link: https://lore.kernel.org/r/20250304102551.2533767-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent b07a889 commit d23977f

File tree

2 files changed

+29
-54
lines changed

2 files changed

+29
-54
lines changed

block/blk-settings.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
2323
{
24-
q->rq_timeout = timeout;
24+
WRITE_ONCE(q->rq_timeout, timeout);
2525
}
2626
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
2727

block/blk-sysfs.c

Lines changed: 28 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,7 @@ QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
172172
#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
173173
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
174174
{ \
175-
ssize_t ret; \
176-
\
177-
mutex_lock(&disk->queue->sysfs_lock); \
178-
ret = sysfs_emit(page, "%d\n", _val); \
179-
mutex_unlock(&disk->queue->sysfs_lock); \
180-
return ret; \
175+
return sysfs_emit(page, "%d\n", _val); \
181176
}
182177

183178
/* deprecated fields */
@@ -266,17 +261,11 @@ QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
266261

267262
static ssize_t queue_poll_show(struct gendisk *disk, char *page)
268263
{
269-
ssize_t ret;
264+
if (queue_is_mq(disk->queue))
265+
return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
270266

271-
mutex_lock(&disk->queue->sysfs_lock);
272-
if (queue_is_mq(disk->queue)) {
273-
ret = sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
274-
} else {
275-
ret = sysfs_emit(page, "%u\n",
267+
return sysfs_emit(page, "%u\n",
276268
!!(disk->queue->limits.features & BLK_FEAT_POLL));
277-
}
278-
mutex_unlock(&disk->queue->sysfs_lock);
279-
return ret;
280269
}
281270

282271
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
@@ -288,12 +277,7 @@ static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
288277

289278
static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
290279
{
291-
ssize_t ret;
292-
293-
mutex_lock(&disk->queue->sysfs_lock);
294-
ret = queue_var_show(disk_nr_zones(disk), page);
295-
mutex_unlock(&disk->queue->sysfs_lock);
296-
return ret;
280+
return queue_var_show(disk_nr_zones(disk), page);
297281
}
298282

299283
static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
@@ -320,13 +304,8 @@ static int queue_iostats_passthrough_store(struct gendisk *disk,
320304

321305
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
322306
{
323-
ssize_t ret;
324-
325-
mutex_lock(&disk->queue->sysfs_lock);
326-
ret = queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
307+
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
327308
blk_queue_noxmerges(disk->queue), page);
328-
mutex_unlock(&disk->queue->sysfs_lock);
329-
return ret;
330309
}
331310

332311
static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
@@ -340,7 +319,6 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
340319
if (ret < 0)
341320
return ret;
342321

343-
mutex_lock(&q->sysfs_lock);
344322
memflags = blk_mq_freeze_queue(q);
345323
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
346324
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@@ -349,22 +327,16 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
349327
else if (nm)
350328
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
351329
blk_mq_unfreeze_queue(q, memflags);
352-
mutex_unlock(&q->sysfs_lock);
353330

354331
return ret;
355332
}
356333

357334
static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
358335
{
359-
ssize_t ret;
360-
bool set, force;
336+
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
337+
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
361338

362-
mutex_lock(&disk->queue->sysfs_lock);
363-
set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
364-
force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
365-
ret = queue_var_show(set << force, page);
366-
mutex_unlock(&disk->queue->sysfs_lock);
367-
return ret;
339+
return queue_var_show(set << force, page);
368340
}
369341

370342
static ssize_t
@@ -380,7 +352,12 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
380352
if (ret < 0)
381353
return ret;
382354

383-
mutex_lock(&q->sysfs_lock);
355+
/*
356+
* Here we update two queue flags each using atomic bitops, although
357+
* updating two flags isn't atomic it should be harmless as those flags
358+
* are accessed individually using atomic test_bit operation. So we
359+
* don't grab any lock while updating these flags.
360+
*/
384361
memflags = blk_mq_freeze_queue(q);
385362
if (val == 2) {
386363
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@@ -393,7 +370,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
393370
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
394371
}
395372
blk_mq_unfreeze_queue(q, memflags);
396-
mutex_unlock(&q->sysfs_lock);
397373
#endif
398374
return ret;
399375
}
@@ -411,30 +387,23 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
411387
ssize_t ret = count;
412388
struct request_queue *q = disk->queue;
413389

414-
mutex_lock(&q->sysfs_lock);
415390
memflags = blk_mq_freeze_queue(q);
416391
if (!(q->limits.features & BLK_FEAT_POLL)) {
417392
ret = -EINVAL;
418393
goto out;
419394
}
395+
420396
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
421397
pr_info_ratelimited("please use driver specific parameters instead.\n");
422398
out:
423399
blk_mq_unfreeze_queue(q, memflags);
424-
mutex_unlock(&q->sysfs_lock);
425-
426400
return ret;
427401
}
428402

429403
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
430404
{
431-
ssize_t ret;
432-
433-
mutex_lock(&disk->queue->sysfs_lock);
434-
ret = sysfs_emit(page, "%u\n",
435-
jiffies_to_msecs(disk->queue->rq_timeout));
436-
mutex_unlock(&disk->queue->sysfs_lock);
437-
return ret;
405+
return sysfs_emit(page, "%u\n",
406+
jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
438407
}
439408

440409
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
@@ -448,11 +417,9 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
448417
if (err || val == 0)
449418
return -EINVAL;
450419

451-
mutex_lock(&q->sysfs_lock);
452420
memflags = blk_mq_freeze_queue(q);
453421
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
454422
blk_mq_unfreeze_queue(q, memflags);
455-
mutex_unlock(&q->sysfs_lock);
456423

457424
return count;
458425
}
@@ -706,6 +673,10 @@ static struct attribute *queue_attrs[] = {
706673
* Attributes which are protected with q->sysfs_lock.
707674
*/
708675
&queue_ra_entry.attr,
676+
677+
/*
678+
* Attributes which don't require locking.
679+
*/
709680
&queue_discard_zeroes_data_entry.attr,
710681
&queue_write_same_max_entry.attr,
711682
&queue_nr_zones_entry.attr,
@@ -723,11 +694,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
723694
*/
724695
&queue_requests_entry.attr,
725696
&elv_iosched_entry.attr,
726-
&queue_rq_affinity_entry.attr,
727-
&queue_io_timeout_entry.attr,
728697
#ifdef CONFIG_BLK_WBT
729698
&queue_wb_lat_entry.attr,
730699
#endif
700+
/*
701+
* Attributes which don't require locking.
702+
*/
703+
&queue_rq_affinity_entry.attr,
704+
&queue_io_timeout_entry.attr,
705+
731706
NULL,
732707
};
733708

0 commit comments

Comments
 (0)