Skip to content

Commit a8edbb4

Browse files
Chenliang Liaxboe
authored andcommitted
io_uring/rsrc: enable multi-hugepage buffer coalescing
Add support for checking and coalescing multi-hugepage-backed fixed buffers. The coalescing optimizes both time and space consumption caused by mapping and storing multi-hugepage fixed buffers. A coalescable multi-hugepage buffer should fully cover its folios (except potentially the first and last one), and these folios should have the same size. These requirements are for easier processing later, also we need same size'd chunks in io_import_fixed for fast iov_iter adjust. Signed-off-by: Chenliang Li <cliang01.li@samsung.com> Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/20240731090133.4106-3-cliang01.li@samsung.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 3d6106a commit a8edbb4

File tree

2 files changed

+110
-32
lines changed

2 files changed

+110
-32
lines changed

io_uring/rsrc.c

Lines changed: 102 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,98 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
849849
return ret;
850850
}
851851

852+
static bool io_do_coalesce_buffer(struct page ***pages, int *nr_pages,
853+
struct io_imu_folio_data *data, int nr_folios)
854+
{
855+
struct page **page_array = *pages, **new_array = NULL;
856+
int nr_pages_left = *nr_pages, i, j;
857+
858+
/* Store head pages only*/
859+
new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
860+
GFP_KERNEL);
861+
if (!new_array)
862+
return false;
863+
864+
new_array[0] = compound_head(page_array[0]);
865+
/*
866+
* The pages are bound to the folio, it doesn't
867+
* actually unpin them but drops all but one reference,
868+
* which is usually put down by io_buffer_unmap().
869+
* Note, needs a better helper.
870+
*/
871+
if (data->nr_pages_head > 1)
872+
unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
873+
874+
j = data->nr_pages_head;
875+
nr_pages_left -= data->nr_pages_head;
876+
for (i = 1; i < nr_folios; i++) {
877+
unsigned int nr_unpin;
878+
879+
new_array[i] = page_array[j];
880+
nr_unpin = min_t(unsigned int, nr_pages_left - 1,
881+
data->nr_pages_mid - 1);
882+
if (nr_unpin)
883+
unpin_user_pages(&page_array[j+1], nr_unpin);
884+
j += data->nr_pages_mid;
885+
nr_pages_left -= data->nr_pages_mid;
886+
}
887+
kvfree(page_array);
888+
*pages = new_array;
889+
*nr_pages = nr_folios;
890+
return true;
891+
}
892+
893+
static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
894+
struct io_imu_folio_data *data)
895+
{
896+
struct page **page_array = *pages;
897+
struct folio *folio = page_folio(page_array[0]);
898+
unsigned int count = 1, nr_folios = 1;
899+
int i;
900+
901+
if (*nr_pages <= 1)
902+
return false;
903+
904+
data->nr_pages_mid = folio_nr_pages(folio);
905+
if (data->nr_pages_mid == 1)
906+
return false;
907+
908+
data->folio_shift = folio_shift(folio);
909+
/*
910+
* Check if pages are contiguous inside a folio, and all folios have
911+
* the same page count except for the head and tail.
912+
*/
913+
for (i = 1; i < *nr_pages; i++) {
914+
if (page_folio(page_array[i]) == folio &&
915+
page_array[i] == page_array[i-1] + 1) {
916+
count++;
917+
continue;
918+
}
919+
920+
if (nr_folios == 1) {
921+
if (folio_page_idx(folio, page_array[i-1]) !=
922+
data->nr_pages_mid - 1)
923+
return false;
924+
925+
data->nr_pages_head = count;
926+
} else if (count != data->nr_pages_mid) {
927+
return false;
928+
}
929+
930+
folio = page_folio(page_array[i]);
931+
if (folio_size(folio) != (1UL << data->folio_shift) ||
932+
folio_page_idx(folio, page_array[i]) != 0)
933+
return false;
934+
935+
count = 1;
936+
nr_folios++;
937+
}
938+
if (nr_folios == 1)
939+
data->nr_pages_head = count;
940+
941+
return io_do_coalesce_buffer(pages, nr_pages, data, nr_folios);
942+
}
943+
852944
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
853945
struct io_mapped_ubuf **pimu,
854946
struct page **last_hpage)
@@ -858,7 +950,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
858950
unsigned long off;
859951
size_t size;
860952
int ret, nr_pages, i;
861-
struct folio *folio = NULL;
953+
struct io_imu_folio_data data;
954+
bool coalesced;
862955

863956
*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
864957
if (!iov->iov_base)
@@ -873,31 +966,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
873966
goto done;
874967
}
875968

876-
/* If it's a huge page, try to coalesce them into a single bvec entry */
877-
if (nr_pages > 1) {
878-
folio = page_folio(pages[0]);
879-
for (i = 1; i < nr_pages; i++) {
880-
/*
881-
* Pages must be consecutive and on the same folio for
882-
* this to work
883-
*/
884-
if (page_folio(pages[i]) != folio ||
885-
pages[i] != pages[i - 1] + 1) {
886-
folio = NULL;
887-
break;
888-
}
889-
}
890-
if (folio) {
891-
/*
892-
* The pages are bound to the folio, it doesn't
893-
* actually unpin them but drops all but one reference,
894-
* which is usually put down by io_buffer_unmap().
895-
* Note, needs a better helper.
896-
*/
897-
unpin_user_pages(&pages[1], nr_pages - 1);
898-
nr_pages = 1;
899-
}
900-
}
969+
/* If it's huge page(s), try to coalesce them into fewer bvec entries */
970+
coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
901971

902972
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
903973
if (!imu)
@@ -909,25 +979,25 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
909979
goto done;
910980
}
911981

912-
off = (unsigned long) iov->iov_base & ~PAGE_MASK;
913982
size = iov->iov_len;
914983
/* store original address for later verification */
915984
imu->ubuf = (unsigned long) iov->iov_base;
916985
imu->ubuf_end = imu->ubuf + iov->iov_len;
917986
imu->nr_bvecs = nr_pages;
918987
imu->folio_shift = PAGE_SHIFT;
919988
imu->folio_mask = PAGE_MASK;
989+
if (coalesced) {
990+
imu->folio_shift = data.folio_shift;
991+
imu->folio_mask = ~((1UL << data.folio_shift) - 1);
992+
}
993+
off = (unsigned long) iov->iov_base & ~imu->folio_mask;
920994
*pimu = imu;
921995
ret = 0;
922996

923-
if (folio) {
924-
bvec_set_page(&imu->bvec[0], pages[0], size, off);
925-
goto done;
926-
}
927997
for (i = 0; i < nr_pages; i++) {
928998
size_t vec_len;
929999

930-
vec_len = min_t(size_t, size, PAGE_SIZE - off);
1000+
vec_len = min_t(size_t, size, (1UL << imu->folio_shift) - off);
9311001
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
9321002
off = 0;
9331003
size -= vec_len;

io_uring/rsrc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,14 @@ struct io_mapped_ubuf {
5252
struct bio_vec bvec[] __counted_by(nr_bvecs);
5353
};
5454

55+
struct io_imu_folio_data {
56+
/* Head folio can be partially included in the fixed buf */
57+
unsigned int nr_pages_head;
58+
/* For non-head/tail folios, has to be fully included */
59+
unsigned int nr_pages_mid;
60+
unsigned int folio_shift;
61+
};
62+
5563
void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
5664
void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *ref_node);
5765
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);

0 commit comments

Comments
 (0)