Skip to content

Commit 73c1010

Browse files
author
Jens Axboe
committed
block: initial patch for on-stack per-task plugging
This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
1 parent a488e74 commit 73c1010

File tree

10 files changed

+344
-101
lines changed

10 files changed

+344
-101
lines changed

block/blk-core.c

+271-98
Large diffs are not rendered by default.

block/blk-flush.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q)
264264
static void flush_data_end_io(struct request *rq, int error)
265265
{
266266
struct request_queue *q = rq->q;
267-
bool was_empty = elv_queue_empty(q);
268267

269268
/* after populating an empty queue, kick it to avoid stall */
270-
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
269+
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
271270
__blk_run_queue(q);
272271
}
273272

block/elevator.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
113113
}
114114
EXPORT_SYMBOL(elv_rq_merge_ok);
115115

116-
static inline int elv_try_merge(struct request *__rq, struct bio *bio)
116+
int elv_try_merge(struct request *__rq, struct bio *bio)
117117
{
118118
int ret = ELEVATOR_NO_MERGE;
119119

@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
421421
struct list_head *entry;
422422
int stop_flags;
423423

424+
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
425+
424426
if (q->last_merge == rq)
425427
q->last_merge = NULL;
426428

@@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
696698
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
697699
int plug)
698700
{
701+
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
702+
699703
if (rq->cmd_flags & REQ_SOFTBARRIER) {
700704
/* barriers are scheduling boundary, update end_sector */
701705
if (rq->cmd_type == REQ_TYPE_FS ||

include/linux/blk_types.h

+2
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ enum rq_flag_bits {
152152
__REQ_IO_STAT, /* account I/O stat */
153153
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
154154
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
155+
__REQ_ON_PLUG, /* on plug list */
155156
__REQ_NR_BITS, /* stops here */
156157
};
157158

@@ -193,5 +194,6 @@ enum rq_flag_bits {
193194
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
194195
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
195196
#define REQ_SECURE (1 << __REQ_SECURE)
197+
#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
196198

197199
#endif /* __LINUX_BLK_TYPES_H */

include/linux/blkdev.h

+42
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
871871
struct request_queue *blk_alloc_queue_node(gfp_t, int);
872872
extern void blk_put_queue(struct request_queue *);
873873

874+
struct blk_plug {
875+
unsigned long magic;
876+
struct list_head list;
877+
unsigned int should_sort;
878+
};
879+
880+
extern void blk_start_plug(struct blk_plug *);
881+
extern void blk_finish_plug(struct blk_plug *);
882+
extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
883+
884+
static inline void blk_flush_plug(struct task_struct *tsk)
885+
{
886+
struct blk_plug *plug = tsk->plug;
887+
888+
if (unlikely(plug))
889+
__blk_flush_plug(tsk, plug);
890+
}
891+
892+
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
893+
{
894+
struct blk_plug *plug = tsk->plug;
895+
896+
return plug && !list_empty(&plug->list);
897+
}
898+
874899
/*
875900
* tag stuff
876901
*/
@@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void)
12941319
return 0;
12951320
}
12961321

1322+
static inline void blk_start_plug(struct list_head *list)
1323+
{
1324+
}
1325+
1326+
static inline void blk_finish_plug(struct list_head *list)
1327+
{
1328+
}
1329+
1330+
static inline void blk_flush_plug(struct task_struct *tsk)
1331+
{
1332+
}
1333+
1334+
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1335+
{
1336+
return false;
1337+
}
1338+
12971339
#endif /* CONFIG_BLOCK */
12981340

12991341
#endif

include/linux/elevator.h

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int);
105105
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
106106
extern void elv_insert(struct request_queue *, struct request *, int);
107107
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
108+
extern int elv_try_merge(struct request *, struct bio *);
108109
extern void elv_merge_requests(struct request_queue *, struct request *,
109110
struct request *);
110111
extern void elv_merged_request(struct request_queue *, struct request *, int);

include/linux/sched.h

+6
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ struct robust_list_head;
9999
struct bio_list;
100100
struct fs_struct;
101101
struct perf_event_context;
102+
struct blk_plug;
102103

103104
/*
104105
* List of flags we want to share for kernel threads,
@@ -1429,6 +1430,11 @@ struct task_struct {
14291430
/* stacked block device info */
14301431
struct bio_list *bio_list;
14311432

1433+
#ifdef CONFIG_BLOCK
1434+
/* stack plugging */
1435+
struct blk_plug *plug;
1436+
#endif
1437+
14321438
/* VM state */
14331439
struct reclaim_state *reclaim_state;
14341440

kernel/exit.c

+1
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908908
profile_task_exit(tsk);
909909

910910
WARN_ON(atomic_read(&tsk->fs_excl));
911+
WARN_ON(blk_needs_flush_plug(tsk));
911912

912913
if (unlikely(in_interrupt()))
913914
panic("Aiee, killing interrupt handler!");

kernel/fork.c

+3
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
12041204
* Clear TID on mm_release()?
12051205
*/
12061206
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1207+
#ifdef CONFIG_BLOCK
1208+
p->plug = NULL;
1209+
#endif
12071210
#ifdef CONFIG_FUTEX
12081211
p->robust_list = NULL;
12091212
#ifdef CONFIG_COMPAT

kernel/sched.c

+12
Original file line numberDiff line numberDiff line change
@@ -3978,6 +3978,16 @@ asmlinkage void __sched schedule(void)
39783978
switch_count = &prev->nvcsw;
39793979
}
39803980

3981+
/*
3982+
* If we are going to sleep and we have plugged IO queued, make
3983+
* sure to submit it to avoid deadlocks.
3984+
*/
3985+
if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
3986+
raw_spin_unlock(&rq->lock);
3987+
blk_flush_plug(prev);
3988+
raw_spin_lock(&rq->lock);
3989+
}
3990+
39813991
pre_schedule(rq, prev);
39823992

39833993
if (unlikely(!rq->nr_running))
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
53335343

53345344
delayacct_blkio_start();
53355345
atomic_inc(&rq->nr_iowait);
5346+
blk_flush_plug(current);
53365347
current->in_iowait = 1;
53375348
schedule();
53385349
current->in_iowait = 0;
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
53485359

53495360
delayacct_blkio_start();
53505361
atomic_inc(&rq->nr_iowait);
5362+
blk_flush_plug(current);
53515363
current->in_iowait = 1;
53525364
ret = schedule_timeout(timeout);
53535365
current->in_iowait = 0;

0 commit comments

Comments
 (0)