Skip to content

Commit dd3bb14

Browse files
author
Miklos Szeredi
committed
fuse: support splice() writing to fuse device
Allow userspace filesystem implementation to use splice() to write to the fuse device. The semantics of using splice() are: 1) buffer the message header and data in a temporary pipe 2) with a *single* splice() call move the message from the temporary pipe to the fuse device The READ reply message has the most interesting use for this, since now the data from an arbitrary file descriptor (which could be a regular file, a block device or a socket) can be tranferred into the fuse device without having to go through a userspace buffer. It will also allow zero copy moving of pages. One caveat is that the protocol on the fuse device requires the length of the whole message to be written into the header. But the length of the data transferred into the temporary pipe may not be known in advance. The current library implementation works around this by using vmplice to write the header and modifying the header after splicing the data into the pipe (error handling omitted): struct fuse_out_header out; iov.iov_base = &out; iov.iov_len = sizeof(struct fuse_out_header); vmsplice(pip[1], &iov, 1, 0); len = splice(input_fd, input_offset, pip[1], NULL, len, 0); /* retrospectively modify the header: */ out.len = len + sizeof(struct fuse_out_header); splice(pip[0], NULL, fuse_chan_fd(req->ch), NULL, out.len, flags); This works since vmsplice only saves a pointer to the data, it does not copy the data itself. Since pipes are currently limited to 16 pages and messages need to be spliced atomically, the length of the data is limited to 15 pages (or 60kB for 4k pages). Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
1 parent b5dd328 commit dd3bb14

File tree

2 files changed

+148
-32
lines changed

2 files changed

+148
-32
lines changed

fs/fuse/dev.c

Lines changed: 144 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/pagemap.h>
1717
#include <linux/file.h>
1818
#include <linux/slab.h>
19+
#include <linux/pipe_fs_i.h>
1920

2021
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
2122

@@ -498,6 +499,9 @@ struct fuse_copy_state {
498499
int write;
499500
struct fuse_req *req;
500501
const struct iovec *iov;
502+
struct pipe_buffer *pipebufs;
503+
struct pipe_buffer *currbuf;
504+
struct pipe_inode_info *pipe;
501505
unsigned long nr_segs;
502506
unsigned long seglen;
503507
unsigned long addr;
@@ -522,7 +526,14 @@ static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
522526
/* Unmap and put previous page of userspace buffer */
523527
static void fuse_copy_finish(struct fuse_copy_state *cs)
524528
{
525-
if (cs->mapaddr) {
529+
if (cs->currbuf) {
530+
struct pipe_buffer *buf = cs->currbuf;
531+
532+
buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
533+
534+
cs->currbuf = NULL;
535+
cs->mapaddr = NULL;
536+
} else if (cs->mapaddr) {
526537
kunmap_atomic(cs->mapaddr, KM_USER0);
527538
if (cs->write) {
528539
flush_dcache_page(cs->pg);
@@ -544,23 +555,39 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
544555

545556
unlock_request(cs->fc, cs->req);
546557
fuse_copy_finish(cs);
547-
if (!cs->seglen) {
558+
if (cs->pipebufs) {
559+
struct pipe_buffer *buf = cs->pipebufs;
560+
561+
err = buf->ops->confirm(cs->pipe, buf);
562+
if (err)
563+
return err;
564+
548565
BUG_ON(!cs->nr_segs);
549-
cs->seglen = cs->iov[0].iov_len;
550-
cs->addr = (unsigned long) cs->iov[0].iov_base;
551-
cs->iov++;
566+
cs->currbuf = buf;
567+
cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
568+
cs->len = buf->len;
569+
cs->buf = cs->mapaddr + buf->offset;
570+
cs->pipebufs++;
552571
cs->nr_segs--;
572+
} else {
573+
if (!cs->seglen) {
574+
BUG_ON(!cs->nr_segs);
575+
cs->seglen = cs->iov[0].iov_len;
576+
cs->addr = (unsigned long) cs->iov[0].iov_base;
577+
cs->iov++;
578+
cs->nr_segs--;
579+
}
580+
err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
581+
if (err < 0)
582+
return err;
583+
BUG_ON(err != 1);
584+
offset = cs->addr % PAGE_SIZE;
585+
cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
586+
cs->buf = cs->mapaddr + offset;
587+
cs->len = min(PAGE_SIZE - offset, cs->seglen);
588+
cs->seglen -= cs->len;
589+
cs->addr += cs->len;
553590
}
554-
err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
555-
if (err < 0)
556-
return err;
557-
BUG_ON(err != 1);
558-
offset = cs->addr % PAGE_SIZE;
559-
cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
560-
cs->buf = cs->mapaddr + offset;
561-
cs->len = min(PAGE_SIZE - offset, cs->seglen);
562-
cs->seglen -= cs->len;
563-
cs->addr += cs->len;
564591

565592
return lock_request(cs->fc, cs->req);
566593
}
@@ -984,23 +1011,17 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
9841011
* it from the list and copy the rest of the buffer to the request.
9851012
* The request is finished by calling request_end()
9861013
*/
987-
static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
988-
unsigned long nr_segs, loff_t pos)
1014+
static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1015+
struct fuse_copy_state *cs, size_t nbytes)
9891016
{
9901017
int err;
991-
size_t nbytes = iov_length(iov, nr_segs);
9921018
struct fuse_req *req;
9931019
struct fuse_out_header oh;
994-
struct fuse_copy_state cs;
995-
struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
996-
if (!fc)
997-
return -EPERM;
9981020

999-
fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
10001021
if (nbytes < sizeof(struct fuse_out_header))
10011022
return -EINVAL;
10021023

1003-
err = fuse_copy_one(&cs, &oh, sizeof(oh));
1024+
err = fuse_copy_one(cs, &oh, sizeof(oh));
10041025
if (err)
10051026
goto err_finish;
10061027

@@ -1013,7 +1034,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
10131034
* and error contains notification code.
10141035
*/
10151036
if (!oh.unique) {
1016-
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
1037+
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
10171038
return err ? err : nbytes;
10181039
}
10191040

@@ -1032,7 +1053,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
10321053

10331054
if (req->aborted) {
10341055
spin_unlock(&fc->lock);
1035-
fuse_copy_finish(&cs);
1056+
fuse_copy_finish(cs);
10361057
spin_lock(&fc->lock);
10371058
request_end(fc, req);
10381059
return -ENOENT;
@@ -1049,19 +1070,19 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
10491070
queue_interrupt(fc, req);
10501071

10511072
spin_unlock(&fc->lock);
1052-
fuse_copy_finish(&cs);
1073+
fuse_copy_finish(cs);
10531074
return nbytes;
10541075
}
10551076

10561077
req->state = FUSE_REQ_WRITING;
10571078
list_move(&req->list, &fc->io);
10581079
req->out.h = oh;
10591080
req->locked = 1;
1060-
cs.req = req;
1081+
cs->req = req;
10611082
spin_unlock(&fc->lock);
10621083

1063-
err = copy_out_args(&cs, &req->out, nbytes);
1064-
fuse_copy_finish(&cs);
1084+
err = copy_out_args(cs, &req->out, nbytes);
1085+
fuse_copy_finish(cs);
10651086

10661087
spin_lock(&fc->lock);
10671088
req->locked = 0;
@@ -1077,10 +1098,101 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
10771098
err_unlock:
10781099
spin_unlock(&fc->lock);
10791100
err_finish:
1080-
fuse_copy_finish(&cs);
1101+
fuse_copy_finish(cs);
10811102
return err;
10821103
}
10831104

1105+
static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1106+
unsigned long nr_segs, loff_t pos)
1107+
{
1108+
struct fuse_copy_state cs;
1109+
struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1110+
if (!fc)
1111+
return -EPERM;
1112+
1113+
fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1114+
1115+
return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1116+
}
1117+
1118+
static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1119+
struct file *out, loff_t *ppos,
1120+
size_t len, unsigned int flags)
1121+
{
1122+
unsigned nbuf;
1123+
unsigned idx;
1124+
struct pipe_buffer *bufs;
1125+
struct fuse_copy_state cs;
1126+
struct fuse_conn *fc;
1127+
size_t rem;
1128+
ssize_t ret;
1129+
1130+
fc = fuse_get_conn(out);
1131+
if (!fc)
1132+
return -EPERM;
1133+
1134+
bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
1135+
if (!bufs)
1136+
return -ENOMEM;
1137+
1138+
pipe_lock(pipe);
1139+
nbuf = 0;
1140+
rem = 0;
1141+
for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1142+
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1143+
1144+
ret = -EINVAL;
1145+
if (rem < len) {
1146+
pipe_unlock(pipe);
1147+
goto out;
1148+
}
1149+
1150+
rem = len;
1151+
while (rem) {
1152+
struct pipe_buffer *ibuf;
1153+
struct pipe_buffer *obuf;
1154+
1155+
BUG_ON(nbuf >= pipe->buffers);
1156+
BUG_ON(!pipe->nrbufs);
1157+
ibuf = &pipe->bufs[pipe->curbuf];
1158+
obuf = &bufs[nbuf];
1159+
1160+
if (rem >= ibuf->len) {
1161+
*obuf = *ibuf;
1162+
ibuf->ops = NULL;
1163+
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1164+
pipe->nrbufs--;
1165+
} else {
1166+
ibuf->ops->get(pipe, ibuf);
1167+
*obuf = *ibuf;
1168+
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1169+
obuf->len = rem;
1170+
ibuf->offset += obuf->len;
1171+
ibuf->len -= obuf->len;
1172+
}
1173+
nbuf++;
1174+
rem -= obuf->len;
1175+
}
1176+
pipe_unlock(pipe);
1177+
1178+
memset(&cs, 0, sizeof(struct fuse_copy_state));
1179+
cs.fc = fc;
1180+
cs.write = 0;
1181+
cs.pipebufs = bufs;
1182+
cs.nr_segs = nbuf;
1183+
cs.pipe = pipe;
1184+
1185+
ret = fuse_dev_do_write(fc, &cs, len);
1186+
1187+
for (idx = 0; idx < nbuf; idx++) {
1188+
struct pipe_buffer *buf = &bufs[idx];
1189+
buf->ops->release(pipe, buf);
1190+
}
1191+
out:
1192+
kfree(bufs);
1193+
return ret;
1194+
}
1195+
10841196
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
10851197
{
10861198
unsigned mask = POLLOUT | POLLWRNORM;
@@ -1224,6 +1336,7 @@ const struct file_operations fuse_dev_operations = {
12241336
.aio_read = fuse_dev_read,
12251337
.write = do_sync_write,
12261338
.aio_write = fuse_dev_write,
1339+
.splice_write = fuse_dev_splice_write,
12271340
.poll = fuse_dev_poll,
12281341
.release = fuse_dev_release,
12291342
.fasync = fuse_dev_fasync,

include/linux/fuse.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
* 7.13
3535
* - make max number of background requests and congestion threshold
3636
* tunables
37+
*
38+
* 7.14
39+
* - add splice support to fuse device
3740
*/
3841

3942
#ifndef _LINUX_FUSE_H
@@ -65,7 +68,7 @@
6568
#define FUSE_KERNEL_VERSION 7
6669

6770
/** Minor version number of this interface */
68-
#define FUSE_KERNEL_MINOR_VERSION 13
71+
#define FUSE_KERNEL_MINOR_VERSION 14
6972

7073
/** The node ID of the root inode */
7174
#define FUSE_ROOT_ID 1

0 commit comments

Comments
 (0)