99#include <linux/hugetlb.h>
1010#include <linux/compat.h>
1111#include <linux/io_uring.h>
12+ #include <linux/io_uring/cmd.h>
1213
1314#include <uapi/linux/io_uring.h>
1415
@@ -101,17 +102,23 @@ static int io_buffer_validate(struct iovec *iov)
101102 return 0 ;
102103}
103104
104- static void io_buffer_unmap ( struct io_ring_ctx * ctx , struct io_rsrc_node * node )
105+ static void io_release_ubuf ( void * priv )
105106{
106- struct io_mapped_ubuf * imu = node -> buf ;
107+ struct io_mapped_ubuf * imu = priv ;
107108 unsigned int i ;
108109
109- if (!refcount_dec_and_test (& imu -> refs ))
110- return ;
111110 for (i = 0 ; i < imu -> nr_bvecs ; i ++ )
112111 unpin_user_page (imu -> bvec [i ].bv_page );
112+ }
113+
114+ static void io_buffer_unmap (struct io_ring_ctx * ctx , struct io_mapped_ubuf * imu )
115+ {
116+ if (!refcount_dec_and_test (& imu -> refs ))
117+ return ;
118+
113119 if (imu -> acct_pages )
114120 io_unaccount_mem (ctx , imu -> acct_pages );
121+ imu -> release (imu -> priv );
115122 kvfree (imu );
116123}
117124
@@ -451,7 +458,7 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
451458 break ;
452459 case IORING_RSRC_BUFFER :
453460 if (node -> buf )
454- io_buffer_unmap (ctx , node );
461+ io_buffer_unmap (ctx , node -> buf );
455462 break ;
456463 default :
457464 WARN_ON_ONCE (1 );
@@ -761,6 +768,10 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
761768 imu -> len = iov -> iov_len ;
762769 imu -> nr_bvecs = nr_pages ;
763770 imu -> folio_shift = PAGE_SHIFT ;
771+ imu -> release = io_release_ubuf ;
772+ imu -> priv = imu ;
773+ imu -> is_kbuf = false;
774+ imu -> dir = IO_IMU_DEST | IO_IMU_SOURCE ;
764775 if (coalesced )
765776 imu -> folio_shift = data .folio_shift ;
766777 refcount_set (& imu -> refs , 1 );
@@ -857,6 +868,95 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
857868 return ret ;
858869}
859870
871+ int io_buffer_register_bvec (struct io_uring_cmd * cmd , struct request * rq ,
872+ void (* release )(void * ), unsigned int index ,
873+ unsigned int issue_flags )
874+ {
875+ struct io_ring_ctx * ctx = cmd_to_io_kiocb (cmd )-> ctx ;
876+ struct io_rsrc_data * data = & ctx -> buf_table ;
877+ struct req_iterator rq_iter ;
878+ struct io_mapped_ubuf * imu ;
879+ struct io_rsrc_node * node ;
880+ struct bio_vec bv , * bvec ;
881+ u16 nr_bvecs ;
882+ int ret = 0 ;
883+
884+ io_ring_submit_lock (ctx , issue_flags );
885+ if (index >= data -> nr ) {
886+ ret = - EINVAL ;
887+ goto unlock ;
888+ }
889+ index = array_index_nospec (index , data -> nr );
890+
891+ if (data -> nodes [index ]) {
892+ ret = - EBUSY ;
893+ goto unlock ;
894+ }
895+
896+ node = io_rsrc_node_alloc (IORING_RSRC_BUFFER );
897+ if (!node ) {
898+ ret = - ENOMEM ;
899+ goto unlock ;
900+ }
901+
902+ nr_bvecs = blk_rq_nr_phys_segments (rq );
903+ imu = kvmalloc (struct_size (imu , bvec , nr_bvecs ), GFP_KERNEL );
904+ if (!imu ) {
905+ kfree (node );
906+ ret = - ENOMEM ;
907+ goto unlock ;
908+ }
909+
910+ imu -> ubuf = 0 ;
911+ imu -> len = blk_rq_bytes (rq );
912+ imu -> acct_pages = 0 ;
913+ imu -> folio_shift = PAGE_SHIFT ;
914+ imu -> nr_bvecs = nr_bvecs ;
915+ refcount_set (& imu -> refs , 1 );
916+ imu -> release = release ;
917+ imu -> priv = rq ;
918+ imu -> is_kbuf = true;
919+
920+ if (op_is_write (req_op (rq )))
921+ imu -> dir = IO_IMU_SOURCE ;
922+ else
923+ imu -> dir = IO_IMU_DEST ;
924+
925+ bvec = imu -> bvec ;
926+ rq_for_each_bvec (bv , rq , rq_iter )
927+ * bvec ++ = bv ;
928+
929+ node -> buf = imu ;
930+ data -> nodes [index ] = node ;
931+ unlock :
932+ io_ring_submit_unlock (ctx , issue_flags );
933+ return ret ;
934+ }
935+ EXPORT_SYMBOL_GPL (io_buffer_register_bvec );
936+
937+ void io_buffer_unregister_bvec (struct io_uring_cmd * cmd , unsigned int index ,
938+ unsigned int issue_flags )
939+ {
940+ struct io_ring_ctx * ctx = cmd_to_io_kiocb (cmd )-> ctx ;
941+ struct io_rsrc_data * data = & ctx -> buf_table ;
942+ struct io_rsrc_node * node ;
943+
944+ io_ring_submit_lock (ctx , issue_flags );
945+ if (index >= data -> nr )
946+ goto unlock ;
947+ index = array_index_nospec (index , data -> nr );
948+
949+ node = data -> nodes [index ];
950+ if (!node || !node -> buf -> is_kbuf )
951+ goto unlock ;
952+
953+ io_put_rsrc_node (ctx , node );
954+ data -> nodes [index ] = NULL ;
955+ unlock :
956+ io_ring_submit_unlock (ctx , issue_flags );
957+ }
958+ EXPORT_SYMBOL_GPL (io_buffer_unregister_bvec );
959+
860960static int io_import_fixed (int ddir , struct iov_iter * iter ,
861961 struct io_mapped_ubuf * imu ,
862962 u64 buf_addr , size_t len )
@@ -871,6 +971,8 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
871971 /* not inside the mapped region */
872972 if (unlikely (buf_addr < imu -> ubuf || buf_end > (imu -> ubuf + imu -> len )))
873973 return - EFAULT ;
974+ if (!(imu -> dir & (1 << ddir )))
975+ return - EFAULT ;
874976
875977 /*
876978 * Might not be a start of buffer, set size appropriately
@@ -883,8 +985,8 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
883985 /*
884986 * Don't use iov_iter_advance() here, as it's really slow for
885987 * using the latter parts of a big fixed buffer - it iterates
886- * over each segment manually. We can cheat a bit here, because
887- * we know that:
988+ * over each segment manually. We can cheat a bit here for user
989+ * registered nodes, because we know that:
888990 *
889991 * 1) it's a BVEC iter, we set it up
890992 * 2) all bvecs are the same in size, except potentially the
@@ -898,8 +1000,15 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
8981000 */
8991001 const struct bio_vec * bvec = imu -> bvec ;
9001002
1003+ /*
1004+ * Kernel buffer bvecs, on the other hand, don't necessarily
1005+ * have the size property of user registered ones, so we have
1006+ * to use the slow iter advance.
1007+ */
9011008 if (offset < bvec -> bv_len ) {
9021009 iter -> iov_offset = offset ;
1010+ } else if (imu -> is_kbuf ) {
1011+ iov_iter_advance (iter , offset );
9031012 } else {
9041013 unsigned long seg_skip ;
9051014
0 commit comments