Skip to content

Commit

Permalink
aio-posix: add io_uring fd monitoring implementation
Browse files Browse the repository at this point in the history
The recent Linux io_uring API has several advantages over ppoll(2) and
epoll(2).  Details are given in the source code.

Add an io_uring implementation and make it the default on Linux.
Performance is the same as with epoll(7) but later patches add
optimizations that take advantage of io_uring.

It is necessary to change how aio_set_fd_handler() deals with deleting
AioHandlers since removing monitored file descriptors is asynchronous in
io_uring.  fdmon_io_uring_remove() marks the AioHandler deleted and
aio_set_fd_handler() will let it handle deletion in that case.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Link: https://lore.kernel.org/r/20200305170806.1313245-6-stefanha@redhat.com
Message-Id: <20200305170806.1313245-6-stefanha@redhat.com>
  • Loading branch information
stefanhaRH committed Mar 9, 2020
1 parent b321051 commit 73fd282
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 5 deletions.
5 changes: 5 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -4093,6 +4093,11 @@ if test "$linux_io_uring" != "no" ; then
linux_io_uring_cflags=$($pkg_config --cflags liburing)
linux_io_uring_libs=$($pkg_config --libs liburing)
linux_io_uring=yes

# io_uring is used in libqemuutil.a where per-file -libs variables are not
# seen by programs linking the archive. It's not ideal, but just add the
# library dependency globally.
LIBS="$linux_io_uring_libs $LIBS"
else
if test "$linux_io_uring" = "yes" ; then
feature_not_found "linux io_uring" "Install liburing devel"
Expand Down
9 changes: 9 additions & 0 deletions include/block/aio.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#ifndef QEMU_AIO_H
#define QEMU_AIO_H

#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
Expand Down Expand Up @@ -96,6 +99,8 @@ struct BHListSlice {
QSIMPLEQ_ENTRY(BHListSlice) next;
};

typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;

struct AioContext {
GSource source;

Expand Down Expand Up @@ -181,6 +186,10 @@ struct AioContext {
* locking.
*/
struct LuringState *linux_io_uring;

/* State for file descriptor monitoring using Linux io_uring */
struct io_uring fdmon_io_uring;
AioHandlerSList submit_list;
#endif

/* TimerLists for calling timers - one per clock type. Has its own
Expand Down
1 change: 1 addition & 0 deletions util/Makefile.objs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ util-obj-$(call lnot,$(CONFIG_ATOMIC64)) += atomic64.o
util-obj-$(CONFIG_POSIX) += aio-posix.o
util-obj-$(CONFIG_POSIX) += fdmon-poll.o
util-obj-$(CONFIG_EPOLL_CREATE1) += fdmon-epoll.o
util-obj-$(CONFIG_LINUX_IO_URING) += fdmon-io_uring.o
util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
Expand Down
20 changes: 16 additions & 4 deletions util/aio-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,16 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
g_source_remove_poll(&ctx->source, &node->pfd);
}

node->pfd.revents = 0;

/* If the fd monitor has already marked it deleted, leave it alone */
if (QLIST_IS_INSERTED(node, node_deleted)) {
return false;
}

/* If a read is in progress, just mark the node as deleted */
if (qemu_lockcnt_count(&ctx->list_lock)) {
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
node->pfd.revents = 0;
return false;
}
/* Otherwise, delete it for real. We can't just mark it as
Expand Down Expand Up @@ -126,9 +132,6 @@ void aio_set_fd_handler(AioContext *ctx,

QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
}
if (node) {
deleted = aio_remove_fd_handler(ctx, node);
}

/* No need to order poll_disable_cnt writes against other updates;
* the counter is only used to avoid wasting time and latency on
Expand All @@ -140,6 +143,9 @@ void aio_set_fd_handler(AioContext *ctx,
atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);

ctx->fdmon_ops->update(ctx, node, new_node);
if (node) {
deleted = aio_remove_fd_handler(ctx, node);
}
qemu_lockcnt_unlock(&ctx->list_lock);
aio_notify(ctx);

Expand Down Expand Up @@ -565,11 +571,17 @@ void aio_context_setup(AioContext *ctx)
ctx->fdmon_ops = &fdmon_poll_ops;
ctx->epollfd = -1;

/* Use the fastest fd monitoring implementation if available */
if (fdmon_io_uring_setup(ctx)) {
return;
}

fdmon_epoll_setup(ctx);
}

void aio_context_destroy(AioContext *ctx)
{
fdmon_io_uring_destroy(ctx);
fdmon_epoll_disable(ctx);
}

Expand Down
20 changes: 19 additions & 1 deletion util/aio-posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ struct AioHandler {
IOHandler *io_poll_begin;
IOHandler *io_poll_end;
void *opaque;
bool is_external;
QLIST_ENTRY(AioHandler) node;
QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
QLIST_ENTRY(AioHandler) node_deleted;
#ifdef CONFIG_LINUX_IO_URING
QSLIST_ENTRY(AioHandler) node_submitted;
unsigned flags; /* see fdmon-io_uring.c */
#endif
bool is_external;
};

/* Add a handler to a ready list */
Expand Down Expand Up @@ -58,4 +62,18 @@ static inline void fdmon_epoll_disable(AioContext *ctx)
}
#endif /* !CONFIG_EPOLL_CREATE1 */

#ifdef CONFIG_LINUX_IO_URING
bool fdmon_io_uring_setup(AioContext *ctx);
void fdmon_io_uring_destroy(AioContext *ctx);
#else
static inline bool fdmon_io_uring_setup(AioContext *ctx)
{
return false;
}

static inline void fdmon_io_uring_destroy(AioContext *ctx)
{
}
#endif /* !CONFIG_LINUX_IO_URING */

#endif /* AIO_POSIX_H */
Loading

0 comments on commit 73fd282

Please sign in to comment.