Skip to content

Commit

Permalink
io_uring: merge iopoll and normal completion paths
Browse files Browse the repository at this point in the history
io_do_iopoll() and io_submit_flush_completions() are pretty similar,
both filling CQEs and then free a list of requests. Don't duplicate it
and make iopoll use __io_submit_flush_completions(), which also helps
with inlining and other optimisations.

For that, we need to first find all completed iopoll requests and splice
them from the iopoll list and then pass it down. This adds one extra
list traversal, which should be fine as requests will stay hot in cache.

CQ locking is already conditional, introduce ->lockless_cq and skip
locking for IOPOLL as it's protected by ->uring_lock.

We also add a wakeup optimisation for IOPOLL to __io_cq_unlock_post(),
so it works just like io_cqring_ev_posted_iopoll().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/3840473f5e8a960de35b77292026691880f6bdbc.1692916914.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
isilence authored and axboe committed Aug 24, 2023
1 parent 54927ba commit ec26c22
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 26 deletions.
1 change: 1 addition & 0 deletions include/linux/io_uring_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ struct io_ring_ctx {
unsigned int has_evfd: 1;
/* all CQEs should be posted only by the submitter task */
unsigned int task_complete: 1;
unsigned int lockless_cq: 1;
unsigned int syscall_iopoll: 1;
unsigned int poll_activated: 1;
unsigned int drain_disabled: 1;
Expand Down
18 changes: 12 additions & 6 deletions io_uring/io_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
bool cancel_all);

static void io_queue_sqe(struct io_kiocb *req);
static void __io_submit_flush_completions(struct io_ring_ctx *ctx);

struct kmem_cache *req_cachep;

Expand Down Expand Up @@ -616,7 +615,7 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)

static inline void __io_cq_lock(struct io_ring_ctx *ctx)
{
if (!ctx->task_complete)
if (!ctx->lockless_cq)
spin_lock(&ctx->completion_lock);
}

Expand All @@ -630,8 +629,11 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
{
io_commit_cqring(ctx);
if (!ctx->task_complete) {
spin_unlock(&ctx->completion_lock);
io_cqring_wake(ctx);
if (!ctx->lockless_cq)
spin_unlock(&ctx->completion_lock);
/* IOPOLL rings only need to wake up if it's also SQPOLL */
if (!ctx->syscall_iopoll)
io_cqring_wake(ctx);
}
io_commit_cqring_flush(ctx);
}
Expand Down Expand Up @@ -1485,7 +1487,8 @@ void io_queue_next(struct io_kiocb *req)
io_req_task_queue(nxt);
}

void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
static void io_free_batch_list(struct io_ring_ctx *ctx,
struct io_wq_work_node *node)
__must_hold(&ctx->uring_lock)
{
do {
Expand Down Expand Up @@ -1522,7 +1525,7 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
} while (node);
}

static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
struct io_submit_state *state = &ctx->submit_state;
Expand Down Expand Up @@ -3836,6 +3839,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
!(ctx->flags & IORING_SETUP_SQPOLL))
ctx->task_complete = true;

if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
ctx->lockless_cq = true;

/*
* lazy poll_wq activation relies on ->task_complete for synchronisation
* purposes, see io_activate_pollwq()
Expand Down
2 changes: 1 addition & 1 deletion io_uring/io_uring.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
void __io_submit_flush_completions(struct io_ring_ctx *ctx);
int io_req_prep_async(struct io_kiocb *req);

struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
Expand Down
24 changes: 5 additions & 19 deletions io_uring/rw.c
Original file line number Diff line number Diff line change
Expand Up @@ -983,13 +983,6 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}

static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
if (ctx->flags & IORING_SETUP_SQPOLL)
io_cqring_wake(ctx);
io_commit_cqring_flush(ctx);
}

void io_rw_fail(struct io_kiocb *req)
{
int res;
Expand Down Expand Up @@ -1060,24 +1053,17 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
if (!smp_load_acquire(&req->iopoll_completed))
break;
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;

req->cqe.flags = io_put_kbuf(req, 0);
if (unlikely(!io_fill_cqe_req(ctx, req))) {
spin_lock(&ctx->completion_lock);
io_req_cqe_overflow(req);
spin_unlock(&ctx->completion_lock);
}
}

if (unlikely(!nr_events))
return 0;

io_commit_cqring(ctx);
io_cqring_ev_posted_iopoll(ctx);
pos = start ? start->next : ctx->iopoll_list.first;
wq_list_cut(&ctx->iopoll_list, prev, start);
io_free_batch_list(ctx, pos);

if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
return 0;
ctx->submit_state.compl_reqs.first = pos;
__io_submit_flush_completions(ctx);
return nr_events;
}

0 comments on commit ec26c22

Please sign in to comment.