Skip to content

Commit 82c2294

Browse files
David Jefferyaxboe
authored andcommitted
blk-mq: avoid double ->queue_rq() because of early timeout
David Jeffery found one double ->queue_rq() issue, so far it can be triggered in VM use case because of long vmexit latency or preempt latency of vCPU pthread or long page fault in vCPU pthread, then block IO req could be timed out before queuing the request to hardware but after calling blk_mq_start_request() during ->queue_rq(), then timeout handler may handle it by requeue, then double ->queue_rq() is caused, and kernel panic. So far, it is driver's responsibility to cover the race between timeout and completion, so it seems supposed to be solved in driver in theory, given driver has enough knowledge. But it is really one common problem, lots of driver could have similar issue, and could be hard to fix all affected drivers, even it isn't easy for driver to handle the race. So David suggests this patch by draining in-progress ->queue_rq() for solving this issue. Cc: Stefan Hajnoczi <stefanha@redhat.com> Cc: Keith Busch <kbusch@kernel.org> Cc: virtualization@lists.linux-foundation.org Cc: Bart Van Assche <bvanassche@acm.org> Signed-off-by: David Jeffery <djeffery@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Link: https://lore.kernel.org/r/20221026051957.358818-1-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 9546531 commit 82c2294

File tree

1 file changed

+44
-12
lines changed

1 file changed

+44
-12
lines changed

block/blk-mq.c

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,7 +1523,13 @@ static void blk_mq_rq_timed_out(struct request *req)
15231523
blk_add_timer(req);
15241524
}
15251525

1526-
static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
1526+
struct blk_expired_data {
1527+
bool has_timedout_rq;
1528+
unsigned long next;
1529+
unsigned long timeout_start;
1530+
};
1531+
1532+
static bool blk_mq_req_expired(struct request *rq, struct blk_expired_data *expired)
15271533
{
15281534
unsigned long deadline;
15291535

@@ -1533,13 +1539,13 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
15331539
return false;
15341540

15351541
deadline = READ_ONCE(rq->deadline);
1536-
if (time_after_eq(jiffies, deadline))
1542+
if (time_after_eq(expired->timeout_start, deadline))
15371543
return true;
15381544

1539-
if (*next == 0)
1540-
*next = deadline;
1541-
else if (time_after(*next, deadline))
1542-
*next = deadline;
1545+
if (expired->next == 0)
1546+
expired->next = deadline;
1547+
else if (time_after(expired->next, deadline))
1548+
expired->next = deadline;
15431549
return false;
15441550
}
15451551

@@ -1555,7 +1561,7 @@ void blk_mq_put_rq_ref(struct request *rq)
15551561

15561562
static bool blk_mq_check_expired(struct request *rq, void *priv)
15571563
{
1558-
unsigned long *next = priv;
1564+
struct blk_expired_data *expired = priv;
15591565

15601566
/*
15611567
* blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
@@ -1564,7 +1570,18 @@ static bool blk_mq_check_expired(struct request *rq, void *priv)
15641570
* it was completed and reallocated as a new request after returning
15651571
* from blk_mq_check_expired().
15661572
*/
1567-
if (blk_mq_req_expired(rq, next))
1573+
if (blk_mq_req_expired(rq, expired)) {
1574+
expired->has_timedout_rq = true;
1575+
return false;
1576+
}
1577+
return true;
1578+
}
1579+
1580+
static bool blk_mq_handle_expired(struct request *rq, void *priv)
1581+
{
1582+
struct blk_expired_data *expired = priv;
1583+
1584+
if (blk_mq_req_expired(rq, expired))
15681585
blk_mq_rq_timed_out(rq);
15691586
return true;
15701587
}
@@ -1573,7 +1590,9 @@ static void blk_mq_timeout_work(struct work_struct *work)
15731590
{
15741591
struct request_queue *q =
15751592
container_of(work, struct request_queue, timeout_work);
1576-
unsigned long next = 0;
1593+
struct blk_expired_data expired = {
1594+
.timeout_start = jiffies,
1595+
};
15771596
struct blk_mq_hw_ctx *hctx;
15781597
unsigned long i;
15791598

@@ -1593,10 +1612,23 @@ static void blk_mq_timeout_work(struct work_struct *work)
15931612
if (!percpu_ref_tryget(&q->q_usage_counter))
15941613
return;
15951614

1596-
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
1615+
/* check if there is any timed-out request */
1616+
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &expired);
1617+
if (expired.has_timedout_rq) {
1618+
/*
1619+
* Before walking tags, we must ensure any submit started
1620+
* before the current time has finished. Since the submit
1621+
* uses srcu or rcu, wait for a synchronization point to
1622+
* ensure all running submits have finished
1623+
*/
1624+
blk_mq_wait_quiesce_done(q);
1625+
1626+
expired.next = 0;
1627+
blk_mq_queue_tag_busy_iter(q, blk_mq_handle_expired, &expired);
1628+
}
15971629

1598-
if (next != 0) {
1599-
mod_timer(&q->timeout, next);
1630+
if (expired.next != 0) {
1631+
mod_timer(&q->timeout, expired.next);
16001632
} else {
16011633
/*
16021634
* Request timeouts are handled as a forward rolling timer. If

0 commit comments

Comments
 (0)