Skip to content

Commit bd194b1

Browse files
dhowellsaxboe
authored andcommitted
shmem: Implement splice-read
The new filemap_splice_read() has an implicit expectation via filemap_get_pages() that ->read_folio() exists if ->readahead() doesn't fully populate the pagecache of the file it is reading from[1], potentially leading to a jump to NULL if this doesn't exist. shmem, however, (and by extension, tmpfs, ramfs and rootfs), doesn't have ->read_folio(), Work around this by equipping shmem with its own splice-read implementation, based on filemap_splice_read(), but able to paste in zero_page when there's a page missing. Signed-off-by: David Howells <dhowells@redhat.com> cc: Daniel Golle <daniel@makrotopia.org> cc: Guenter Roeck <groeck7@gmail.com> cc: Christoph Hellwig <hch@lst.de> cc: Jens Axboe <axboe@kernel.dk> cc: Al Viro <viro@zeniv.linux.org.uk> cc: John Hubbard <jhubbard@nvidia.com> cc: David Hildenbrand <david@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Hugh Dickins <hughd@google.com> cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/Y+pdHFFTk1TTEBsO@makrotopia.org/ [1] Link: https://lore.kernel.org/r/20230522135018.2742245-10-dhowells@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent b85930a commit bd194b1

File tree

1 file changed

+133
-1
lines changed

1 file changed

+133
-1
lines changed

mm/shmem.c

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
27312731
return retval ? retval : error;
27322732
}
27332733

2734+
static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
2735+
struct pipe_buffer *buf)
2736+
{
2737+
return true;
2738+
}
2739+
2740+
static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
2741+
struct pipe_buffer *buf)
2742+
{
2743+
}
2744+
2745+
static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
2746+
struct pipe_buffer *buf)
2747+
{
2748+
return false;
2749+
}
2750+
2751+
static const struct pipe_buf_operations zero_pipe_buf_ops = {
2752+
.release = zero_pipe_buf_release,
2753+
.try_steal = zero_pipe_buf_try_steal,
2754+
.get = zero_pipe_buf_get,
2755+
};
2756+
2757+
static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
2758+
loff_t fpos, size_t size)
2759+
{
2760+
size_t offset = fpos & ~PAGE_MASK;
2761+
2762+
size = min_t(size_t, size, PAGE_SIZE - offset);
2763+
2764+
if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
2765+
struct pipe_buffer *buf = pipe_head_buf(pipe);
2766+
2767+
*buf = (struct pipe_buffer) {
2768+
.ops = &zero_pipe_buf_ops,
2769+
.page = ZERO_PAGE(0),
2770+
.offset = offset,
2771+
.len = size,
2772+
};
2773+
pipe->head++;
2774+
}
2775+
2776+
return size;
2777+
}
2778+
2779+
static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
2780+
struct pipe_inode_info *pipe,
2781+
size_t len, unsigned int flags)
2782+
{
2783+
struct inode *inode = file_inode(in);
2784+
struct address_space *mapping = inode->i_mapping;
2785+
struct folio *folio = NULL;
2786+
size_t total_spliced = 0, used, npages, n, part;
2787+
loff_t isize;
2788+
int error = 0;
2789+
2790+
/* Work out how much data we can actually add into the pipe */
2791+
used = pipe_occupancy(pipe->head, pipe->tail);
2792+
npages = max_t(ssize_t, pipe->max_usage - used, 0);
2793+
len = min_t(size_t, len, npages * PAGE_SIZE);
2794+
2795+
do {
2796+
if (*ppos >= i_size_read(inode))
2797+
break;
2798+
2799+
error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
2800+
if (error) {
2801+
if (error == -EINVAL)
2802+
error = 0;
2803+
break;
2804+
}
2805+
if (folio) {
2806+
folio_unlock(folio);
2807+
2808+
if (folio_test_hwpoison(folio)) {
2809+
error = -EIO;
2810+
break;
2811+
}
2812+
}
2813+
2814+
/*
2815+
* i_size must be checked after we know the pages are Uptodate.
2816+
*
2817+
* Checking i_size after the check allows us to calculate
2818+
* the correct value for "nr", which means the zero-filled
2819+
* part of the page is not copied back to userspace (unless
2820+
* another truncate extends the file - this is desired though).
2821+
*/
2822+
isize = i_size_read(inode);
2823+
if (unlikely(*ppos >= isize))
2824+
break;
2825+
part = min_t(loff_t, isize - *ppos, len);
2826+
2827+
if (folio) {
2828+
/*
2829+
* If users can be writing to this page using arbitrary
2830+
* virtual addresses, take care about potential aliasing
2831+
* before reading the page on the kernel side.
2832+
*/
2833+
if (mapping_writably_mapped(mapping))
2834+
flush_dcache_folio(folio);
2835+
folio_mark_accessed(folio);
2836+
/*
2837+
* Ok, we have the page, and it's up-to-date, so we can
2838+
* now splice it into the pipe.
2839+
*/
2840+
n = splice_folio_into_pipe(pipe, folio, *ppos, part);
2841+
folio_put(folio);
2842+
folio = NULL;
2843+
} else {
2844+
n = splice_zeropage_into_pipe(pipe, *ppos, len);
2845+
}
2846+
2847+
if (!n)
2848+
break;
2849+
len -= n;
2850+
total_spliced += n;
2851+
*ppos += n;
2852+
in->f_ra.prev_pos = *ppos;
2853+
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
2854+
break;
2855+
2856+
cond_resched();
2857+
} while (len);
2858+
2859+
if (folio)
2860+
folio_put(folio);
2861+
2862+
file_accessed(in);
2863+
return total_spliced ? total_spliced : error;
2864+
}
2865+
27342866
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
27352867
{
27362868
struct address_space *mapping = file->f_mapping;
@@ -3971,7 +4103,7 @@ static const struct file_operations shmem_file_operations = {
39714103
.read_iter = shmem_file_read_iter,
39724104
.write_iter = generic_file_write_iter,
39734105
.fsync = noop_fsync,
3974-
.splice_read = generic_file_splice_read,
4106+
.splice_read = shmem_file_splice_read,
39754107
.splice_write = iter_file_splice_write,
39764108
.fallocate = shmem_fallocate,
39774109
#endif

0 commit comments

Comments
 (0)