1111
1212#include <linux/fs.h>
1313#include <linux/io_uring/cmd.h>
14+ #include <linux/page-flags.h>
1415
1516static bool __read_mostly enable_uring ;
1617module_param (enable_uring , bool , 0644 );
1718MODULE_PARM_DESC (enable_uring ,
1819 "Enable userspace communication through io-uring" );
1920
2021#define FUSE_URING_IOV_SEGS 2 /* header and payload */
22+ #define FUSE_RING_HEADER_PG 0
23+ #define FUSE_RING_PAYLOAD_PG 1
2124
2225/* redfs only to allow patch backports */
2326#define IO_URING_F_TASK_DEAD (1 << 13)
@@ -155,6 +158,21 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
155158 }
156159}
157160
161+ /*
162+ * Copy from memmap.c, should be exported
163+ */
164+ static void io_pages_free (struct page * * * pages , int npages )
165+ {
166+ struct page * * page_array = * pages ;
167+
168+ if (!page_array )
169+ return ;
170+
171+ unpin_user_pages (page_array , npages );
172+ kvfree (page_array );
173+ * pages = NULL ;
174+ }
175+
158176void fuse_uring_destruct (struct fuse_conn * fc )
159177{
160178 struct fuse_ring * ring = fc -> ring ;
@@ -178,6 +196,9 @@ void fuse_uring_destruct(struct fuse_conn *fc)
178196 list_for_each_entry_safe (ent , next , & queue -> ent_released ,
179197 list ) {
180198 list_del_init (& ent -> list );
199+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
200+ io_pages_free (& ent -> payload_pages ,
201+ ent -> nr_payload_pages );
181202 kfree (ent );
182203 }
183204
@@ -569,13 +590,67 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
569590 fuse_copy_init (& cs , 0 , & iter );
570591 cs .is_uring = 1 ;
571592 cs .req = req ;
593+ if (ent -> payload_pages )
594+ cs .ring .pages = ent -> payload_pages ;
572595
573596 return fuse_copy_out_args (& cs , args , ring_in_out .payload_sz );
574597}
575598
576- /*
577- * Copy data from the req to the ring buffer
578- */
599+ /*
600+ * Copy data from the req to the ring buffer
601+ * In order to be able to write into the ring buffer from the application,
602+ * i.e. to avoid io_uring_cmd_complete_in_task(), the header needs to be
603+ * pinned as well.
604+ */
605+ static int fuse_uring_args_to_ring_pages (struct fuse_ring * ring ,
606+ struct fuse_req * req ,
607+ struct fuse_ring_ent * ent ,
608+ struct fuse_uring_req_header * headers )
609+ {
610+ struct fuse_copy_state cs ;
611+ struct fuse_args * args = req -> args ;
612+ struct fuse_in_arg * in_args = args -> in_args ;
613+ int num_args = args -> in_numargs ;
614+ int err ;
615+
616+ struct fuse_uring_ent_in_out ent_in_out = {
617+ .flags = 0 ,
618+ .commit_id = req -> in .h .unique ,
619+ };
620+
621+ fuse_copy_init (& cs , 1 , NULL );
622+ cs .is_uring = 1 ;
623+ cs .req = req ;
624+ cs .ring .pages = ent -> payload_pages ;
625+
626+ if (num_args > 0 ) {
627+ /*
628+ * Expectation is that the first argument is the per op header.
629+ * Some op code have that as zero size.
630+ */
631+ if (args -> in_args [0 ].size > 0 ) {
632+ memcpy (& headers -> op_in , in_args -> value , in_args -> size );
633+ }
634+ in_args ++ ;
635+ num_args -- ;
636+ }
637+
638+ /* copy the payload */
639+ err = fuse_copy_args (& cs , num_args , args -> in_pages ,
640+ (struct fuse_arg * )in_args , 0 );
641+ if (err ) {
642+ pr_info_ratelimited ("%s fuse_copy_args failed\n" , __func__ );
643+ return err ;
644+ }
645+
646+ ent_in_out .payload_sz = cs .ring .copied_sz ;
647+ memcpy (& headers -> ring_ent_in_out , & ent_in_out , sizeof (ent_in_out ));
648+ return err ;
649+ }
650+
651+ /*
652+ * Copy data from the req to the ring buffer
653+ */
579654static int fuse_uring_args_to_ring (struct fuse_ring * ring , struct fuse_req * req ,
580655 struct fuse_ring_ent * ent )
581656{
@@ -599,6 +674,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
599674 fuse_copy_init (& cs , 1 , & iter );
600675 cs .is_uring = 1 ;
601676 cs .req = req ;
677+ if (ent -> payload_pages )
678+ cs .ring .pages = ent -> payload_pages ;
602679
603680 if (num_args > 0 ) {
604681 /*
@@ -638,6 +715,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
638715 struct fuse_ring_queue * queue = ent -> queue ;
639716 struct fuse_ring * ring = queue -> ring ;
640717 int err ;
718+ struct fuse_uring_req_header * headers = NULL ;
641719
642720 err = - EIO ;
643721 if (WARN_ON (ent -> state != FRRS_FUSE_REQ )) {
@@ -650,22 +728,29 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
650728 if (WARN_ON (req -> in .h .unique == 0 ))
651729 return err ;
652730
653- /* copy the request */
654- err = fuse_uring_args_to_ring (ring , req , ent );
655- if (unlikely (err )) {
656- pr_info_ratelimited ("Copy to ring failed: %d\n" , err );
657- return err ;
658- }
659-
660731 /* copy fuse_in_header */
661- err = copy_to_user (& ent -> headers -> in_out , & req -> in .h ,
662- sizeof (req -> in .h ));
663- if (err ) {
664- err = - EFAULT ;
665- return err ;
732+ if (ent -> header_pages ) {
733+ headers = kmap_local_page (
734+ ent -> header_pages [FUSE_RING_HEADER_PG ]);
735+
736+ memcpy (& headers -> in_out , & req -> in .h , sizeof (req -> in .h ));
737+
738+ err = fuse_uring_args_to_ring_pages (ring , req , ent , headers );
739+ kunmap_local (headers );
740+ } else {
741+ /* copy the request */
742+ err = fuse_uring_args_to_ring (ring , req , ent );
743+ if (unlikely (err )) {
744+ pr_info_ratelimited ("Copy to ring failed: %d\n" , err );
745+ return err ;
746+ }
747+ err = copy_to_user (& ent -> headers -> in_out , & req -> in .h ,
748+ sizeof (req -> in .h ));
749+ if (err )
750+ err = - EFAULT ;
666751 }
667752
668- return 0 ;
753+ return err ;
669754}
670755
671756static int fuse_uring_prepare_send (struct fuse_ring_ent * ent ,
@@ -979,6 +1064,45 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent,
9791064 }
9801065}
9811066
1067+ /*
1068+ * Copy from memmap.c, should be exported there
1069+ */
1070+ static struct page * * io_pin_pages (unsigned long uaddr , unsigned long len ,
1071+ int * npages )
1072+ {
1073+ unsigned long start , end , nr_pages ;
1074+ struct page * * pages ;
1075+ int ret ;
1076+
1077+ end = (uaddr + len + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
1078+ start = uaddr >> PAGE_SHIFT ;
1079+ nr_pages = end - start ;
1080+ if (WARN_ON_ONCE (!nr_pages ))
1081+ return ERR_PTR (- EINVAL );
1082+
1083+ pages = kvmalloc_array (nr_pages , sizeof (struct page * ), GFP_KERNEL );
1084+ if (!pages )
1085+ return ERR_PTR (- ENOMEM );
1086+
1087+ ret = pin_user_pages_fast (uaddr , nr_pages , FOLL_WRITE | FOLL_LONGTERM ,
1088+ pages );
1089+ /* success, mapped all pages */
1090+ if (ret == nr_pages ) {
1091+ * npages = nr_pages ;
1092+ return pages ;
1093+ }
1094+
1095+ /* partial map, or didn't map anything */
1096+ if (ret >= 0 ) {
1097+ /* if we did partial map, release any pages we did get */
1098+ if (ret )
1099+ unpin_user_pages (pages , ret );
1100+ ret = - EFAULT ;
1101+ }
1102+ kvfree (pages );
1103+ return ERR_PTR (ret );
1104+ }
1105+
9821106/*
9831107 * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
9841108 * the payload
@@ -1005,6 +1129,59 @@ static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
10051129 return 0 ;
10061130}
10071131
1132+ static int fuse_uring_pin_pages (struct fuse_ring_ent * ent )
1133+ {
1134+ struct fuse_ring * ring = ent -> queue -> ring ;
1135+ int err ;
1136+
1137+ /*
1138+ * This needs to do locked memory accounting, for now privileged servers
1139+ * only.
1140+ */
1141+ if (!capable (CAP_SYS_ADMIN ))
1142+ return 0 ;
1143+
1144+ /* Pin header pages */
1145+ if (!PAGE_ALIGNED (ent -> headers )) {
1146+ pr_info_ratelimited ("ent->headers is not page-aligned: %p\n" ,
1147+ ent -> headers );
1148+ return - EINVAL ;
1149+ }
1150+
1151+ ent -> header_pages = io_pin_pages ((unsigned long )ent -> headers ,
1152+ sizeof (struct fuse_uring_req_header ),
1153+ & ent -> nr_header_pages );
1154+ if (IS_ERR (ent -> header_pages )) {
1155+ err = PTR_ERR (ent -> header_pages );
1156+ pr_info_ratelimited ("Failed to pin header pages, err=%d\n" ,
1157+ err );
1158+ ent -> header_pages = NULL ;
1159+ return err ;
1160+ }
1161+
1162+ if (ent -> nr_header_pages != 1 ) {
1163+ pr_info_ratelimited ("Header pages not pinned as one page\n" );
1164+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
1165+ ent -> header_pages = NULL ;
1166+ return - EINVAL ;
1167+ }
1168+
1169+ /* Pin payload pages */
1170+ ent -> payload_pages = io_pin_pages ((unsigned long )ent -> payload ,
1171+ ring -> max_payload_sz ,
1172+ & ent -> nr_payload_pages );
1173+ if (IS_ERR (ent -> payload_pages )) {
1174+ err = PTR_ERR (ent -> payload_pages );
1175+ pr_info_ratelimited ("Failed to pin payload pages, err=%d\n" ,
1176+ err );
1177+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
1178+ ent -> payload_pages = NULL ;
1179+ return err ;
1180+ }
1181+
1182+ return 0 ;
1183+ }
1184+
10081185static struct fuse_ring_ent *
10091186fuse_uring_create_ring_ent (struct io_uring_cmd * cmd ,
10101187 struct fuse_ring_queue * queue )
@@ -1046,6 +1223,12 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
10461223 ent -> headers = iov [0 ].iov_base ;
10471224 ent -> payload = iov [1 ].iov_base ;
10481225
1226+ err = fuse_uring_pin_pages (ent );
1227+ if (err ) {
1228+ kfree (ent );
1229+ return ERR_PTR (err );
1230+ }
1231+
10491232 atomic_inc (& ring -> queue_refs );
10501233 return ent ;
10511234}
0 commit comments