Skip to content

Commit

Permalink
block: change the request allocation/congestion logic to be sync/asyn…
Browse files Browse the repository at this point in the history
…c based

This makes sure that we never wait on async IO for sync requests, instead
of doing the split on writes vs reads.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Jens Axboe authored and torvalds committed Apr 6, 2009
1 parent 0221c81 commit 1faa16d
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 84 deletions.
70 changes: 35 additions & 35 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q)
{
struct request_list *rl = &q->rq;

rl->count[READ] = rl->count[WRITE] = 0;
rl->starved[READ] = rl->starved[WRITE] = 0;
rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
rl->elvpriv = 0;
init_waitqueue_head(&rl->wait[READ]);
init_waitqueue_head(&rl->wait[WRITE]);
init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);

rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
mempool_free_slab, request_cachep, q->node);
Expand Down Expand Up @@ -699,37 +699,37 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
ioc->last_waited = jiffies;
}

static void __freed_request(struct request_queue *q, int rw)
static void __freed_request(struct request_queue *q, int sync)
{
struct request_list *rl = &q->rq;

if (rl->count[rw] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, rw);
if (rl->count[sync] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, sync);

if (rl->count[rw] + 1 <= q->nr_requests) {
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
if (rl->count[sync] + 1 <= q->nr_requests) {
if (waitqueue_active(&rl->wait[sync]))
wake_up(&rl->wait[sync]);

blk_clear_queue_full(q, rw);
blk_clear_queue_full(q, sync);
}
}

/*
* A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock.
*/
static void freed_request(struct request_queue *q, int rw, int priv)
static void freed_request(struct request_queue *q, int sync, int priv)
{
struct request_list *rl = &q->rq;

rl->count[rw]--;
rl->count[sync]--;
if (priv)
rl->elvpriv--;

__freed_request(q, rw);
__freed_request(q, sync);

if (unlikely(rl->starved[rw ^ 1]))
__freed_request(q, rw ^ 1);
if (unlikely(rl->starved[sync ^ 1]))
__freed_request(q, sync ^ 1);
}

/*
Expand All @@ -743,25 +743,25 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
struct request *rq = NULL;
struct request_list *rl = &q->rq;
struct io_context *ioc = NULL;
const int rw = rw_flags & 0x01;
const bool is_sync = rw_is_sync(rw_flags) != 0;
int may_queue, priv;

may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO)
goto rq_starved;

if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[rw]+1 >= q->nr_requests) {
if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[is_sync]+1 >= q->nr_requests) {
ioc = current_io_context(GFP_ATOMIC, q->node);
/*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
* This process will be allowed to complete a batch of
* requests, others will be blocked.
*/
if (!blk_queue_full(q, rw)) {
if (!blk_queue_full(q, is_sync)) {
ioc_set_batching(q, ioc);
blk_set_queue_full(q, rw);
blk_set_queue_full(q, is_sync);
} else {
if (may_queue != ELV_MQUEUE_MUST
&& !ioc_batching(q, ioc)) {
Expand All @@ -774,19 +774,19 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
}
}
}
blk_set_queue_congested(q, rw);
blk_set_queue_congested(q, is_sync);
}

/*
* Only allow batching queuers to allocate up to 50% over the defined
* limit of requests, otherwise we could have thousands of requests
* allocated with any setting of ->nr_requests
*/
if (rl->count[rw] >= (3 * q->nr_requests / 2))
if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
goto out;

rl->count[rw]++;
rl->starved[rw] = 0;
rl->count[is_sync]++;
rl->starved[is_sync] = 0;

priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
if (priv)
Expand All @@ -804,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* wait queue, but this is pretty rare.
*/
spin_lock_irq(q->queue_lock);
freed_request(q, rw, priv);
freed_request(q, is_sync, priv);

/*
* in the very unlikely event that allocation failed and no
Expand All @@ -814,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* rq mempool into READ and WRITE
*/
rq_starved:
if (unlikely(rl->count[rw] == 0))
rl->starved[rw] = 1;
if (unlikely(rl->count[is_sync] == 0))
rl->starved[is_sync] = 1;

goto out;
}
Expand All @@ -829,7 +829,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;

trace_block_getrq(q, bio, rw);
trace_block_getrq(q, bio, rw_flags & 1);
out:
return rq;
}
Expand All @@ -843,7 +843,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct bio *bio)
{
const int rw = rw_flags & 0x01;
const bool is_sync = rw_is_sync(rw_flags) != 0;
struct request *rq;

rq = get_request(q, rw_flags, bio, GFP_NOIO);
Expand All @@ -852,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct io_context *ioc;
struct request_list *rl = &q->rq;

prepare_to_wait_exclusive(&rl->wait[rw], &wait,
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
TASK_UNINTERRUPTIBLE);

trace_block_sleeprq(q, bio, rw);
trace_block_sleeprq(q, bio, rw_flags & 1);

__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
Expand All @@ -871,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
ioc_set_batching(q, ioc);

spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[rw], &wait);
finish_wait(&rl->wait[is_sync], &wait);

rq = get_request(q, rw_flags, bio, GFP_NOIO);
};
Expand Down Expand Up @@ -1070,14 +1070,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
* it didn't come out of our reserved rq pools
*/
if (req->cmd_flags & REQ_ALLOCED) {
int rw = rq_data_dir(req);
int is_sync = rq_is_sync(req) != 0;
int priv = req->cmd_flags & REQ_ELVPRIV;

BUG_ON(!list_empty(&req->queuelist));
BUG_ON(!hlist_unhashed(&req->hash));

blk_free_request(q, req);
freed_request(q, rw, priv);
freed_request(q, is_sync, priv);
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
Expand Down
40 changes: 20 additions & 20 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
q->nr_requests = nr;
blk_queue_congestion_threshold(q);

if (rl->count[READ] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, READ);
else if (rl->count[READ] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, READ);

if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, WRITE);
else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, WRITE);

if (rl->count[READ] >= q->nr_requests) {
blk_set_queue_full(q, READ);
} else if (rl->count[READ]+1 <= q->nr_requests) {
blk_clear_queue_full(q, READ);
wake_up(&rl->wait[READ]);
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_SYNC);
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_SYNC);

if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_ASYNC);
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_ASYNC);

if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
blk_set_queue_full(q, BLK_RW_SYNC);
} else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
blk_clear_queue_full(q, BLK_RW_SYNC);
wake_up(&rl->wait[BLK_RW_SYNC]);
}

if (rl->count[WRITE] >= q->nr_requests) {
blk_set_queue_full(q, WRITE);
} else if (rl->count[WRITE]+1 <= q->nr_requests) {
blk_clear_queue_full(q, WRITE);
wake_up(&rl->wait[WRITE]);
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
blk_set_queue_full(q, BLK_RW_ASYNC);
} else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
blk_clear_queue_full(q, BLK_RW_ASYNC);
wake_up(&rl->wait[BLK_RW_ASYNC]);
}
spin_unlock_irq(q->queue_lock);
return ret;
Expand Down
2 changes: 1 addition & 1 deletion block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
}

if (unplug_it && blk_queue_plugged(q)) {
int nrq = q->rq.count[READ] + q->rq.count[WRITE]
int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
- q->in_flight;

if (nrq >= q->unplug_thresh)
Expand Down
12 changes: 6 additions & 6 deletions include/linux/backing-dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ struct dentry;
*/
enum bdi_state {
BDI_pdflush, /* A pdflush thread is working this device */
BDI_write_congested, /* The write queue is getting full */
BDI_read_congested, /* The read queue is getting full */
BDI_async_congested, /* The async (write) queue is getting full */
BDI_sync_congested, /* The sync queue is getting full */
BDI_unused, /* Available bits start here */
};

Expand Down Expand Up @@ -215,18 +215,18 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)

static inline int bdi_read_congested(struct backing_dev_info *bdi)
{
return bdi_congested(bdi, 1 << BDI_read_congested);
return bdi_congested(bdi, 1 << BDI_sync_congested);
}

static inline int bdi_write_congested(struct backing_dev_info *bdi)
{
return bdi_congested(bdi, 1 << BDI_write_congested);
return bdi_congested(bdi, 1 << BDI_async_congested);
}

static inline int bdi_rw_congested(struct backing_dev_info *bdi)
{
return bdi_congested(bdi, (1 << BDI_read_congested)|
(1 << BDI_write_congested));
return bdi_congested(bdi, (1 << BDI_sync_congested) |
(1 << BDI_async_congested));
}

void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
Expand Down
52 changes: 35 additions & 17 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ struct request;
typedef void (rq_end_io_fn)(struct request *, int);

struct request_list {
/*
* count[], starved[], and wait[] are indexed by
* BLK_RW_SYNC/BLK_RW_ASYNC
*/
int count[2];
int starved[2];
int elvpriv;
Expand Down Expand Up @@ -66,6 +70,11 @@ enum rq_cmd_type_bits {
REQ_TYPE_ATA_PC,
};

enum {
BLK_RW_ASYNC = 0,
BLK_RW_SYNC = 1,
};

/*
* For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
* sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
Expand Down Expand Up @@ -103,7 +112,7 @@ enum rq_flag_bits {
__REQ_QUIET, /* don't worry about errors */
__REQ_PREEMPT, /* set for "ide_preempt" requests */
__REQ_ORDERED_COLOR, /* is before or after barrier */
__REQ_RW_SYNC, /* request is sync (O_DIRECT) */
__REQ_RW_SYNC, /* request is sync (sync write or read) */
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_RW_META, /* metadata io request */
__REQ_COPY_USER, /* contains copies of user pages */
Expand Down Expand Up @@ -438,8 +447,8 @@ struct request_queue
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_READFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */
#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
Expand Down Expand Up @@ -611,32 +620,41 @@ enum {
#define rq_data_dir(rq) ((rq)->cmd_flags & 1)

/*
* We regard a request as sync, if it's a READ or a SYNC write.
* We regard a request as sync, if either a read or a sync write
*/
#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
static inline bool rw_is_sync(unsigned int rw_flags)
{
return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
}

static inline bool rq_is_sync(struct request *rq)
{
return rw_is_sync(rq->cmd_flags);
}

#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)

static inline int blk_queue_full(struct request_queue *q, int rw)
static inline int blk_queue_full(struct request_queue *q, int sync)
{
if (rw == READ)
return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
if (sync)
return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
}

static inline void blk_set_queue_full(struct request_queue *q, int rw)
static inline void blk_set_queue_full(struct request_queue *q, int sync)
{
if (rw == READ)
queue_flag_set(QUEUE_FLAG_READFULL, q);
if (sync)
queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
else
queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
}

static inline void blk_clear_queue_full(struct request_queue *q, int rw)
static inline void blk_clear_queue_full(struct request_queue *q, int sync)
{
if (rw == READ)
queue_flag_clear(QUEUE_FLAG_READFULL, q);
if (sync)
queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
else
queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
}


Expand Down
Loading

0 comments on commit 1faa16d

Please sign in to comment.