Skip to content

Commit

Permalink
Merge tag 'ovl-fixes-4.19-rc4' of git://git.kernel.org/pub/scm/linux/…
Browse files Browse the repository at this point in the history
…kernel/git/mszeredi/vfs

Pull overlayfs fixes from Miklos Szeredi:
 "This fixes a regression in the recent file stacking update, reported
  and fixed by Amir Goldstein. The fix is fairly trivial, but involves
  adding a fadvise() f_op and the associated churn in the vfs. As
  discussed on -fsdevel, there are other possible uses for this method,
  than allowing proper stacking for overlays.

  And there's one other fix for a syzkaller detected oops"

* tag 'ovl-fixes-4.19-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: fix oopses in ovl_fill_super() failure paths
  ovl: add ovl_fadvise()
  vfs: implement readahead(2) using POSIX_FADV_WILLNEED
  vfs: add the fadvise() file operation
  Documentation/filesystems: update documentation of file_operations
  ovl: fix GPF in swapfile_activate of file from overlayfs over xfs
  ovl: respect FIEMAP_FLAG_SYNC flag
  • Loading branch information
torvalds committed Sep 14, 2018
2 parents 4d8d9f5 + 8c25741 commit 48751b5
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 80 deletions.
21 changes: 19 additions & 2 deletions Documentation/filesystems/vfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ struct file_operations
----------------------

This describes how the VFS can manipulate an open file. As of kernel
4.1, the following members are defined:
4.18, the following members are defined:

struct file_operations {
struct module *owner;
Expand All @@ -858,11 +858,11 @@ struct file_operations {
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*mremap)(struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
Expand All @@ -882,6 +882,10 @@ struct file_operations {
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
int (*fadvise)(struct file *, loff_t, loff_t, int);
};

Again, all methods are called without any locks being held, unless
Expand All @@ -899,6 +903,9 @@ otherwise noted.

iterate: called when the VFS needs to read the directory contents

iterate_shared: called when the VFS needs to read the directory contents
when filesystem supports concurrent dir iterators

poll: called by the VFS when a process wants to check if there is
activity on this file and (optionally) go to sleep until there
is activity. Called by the select(2) and poll(2) system calls
Expand Down Expand Up @@ -951,6 +958,16 @@ otherwise noted.

fallocate: called by the VFS to preallocate blocks or punch a hole.

copy_file_range: called by the copy_file_range(2) system call.

clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
FICLONE commands.

dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
command.

fadvise: possibly called by the fadvise64() system call.

Note that the file operations are implemented by the specific
filesystem in which the inode resides. When opening a device node
(character or block special) most filesystems will call special
Expand Down
23 changes: 20 additions & 3 deletions fs/overlayfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,6 @@ static int ovl_open(struct inode *inode, struct file *file)
if (IS_ERR(realfile))
return PTR_ERR(realfile);

/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
file->f_mapping = realfile->f_mapping;

file->private_data = realfile;

return 0;
Expand Down Expand Up @@ -334,6 +331,25 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
return ret;
}

static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
struct fd real;
const struct cred *old_cred;
int ret;

ret = ovl_real_fdget(file, &real);
if (ret)
return ret;

old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_fadvise(real.file, offset, len, advice);
revert_creds(old_cred);

fdput(real);

return ret;
}

static long ovl_real_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
Expand Down Expand Up @@ -502,6 +518,7 @@ const struct file_operations ovl_file_operations = {
.fsync = ovl_fsync,
.mmap = ovl_mmap,
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
.unlocked_ioctl = ovl_ioctl,
.compat_ioctl = ovl_compat_ioctl,

Expand Down
10 changes: 10 additions & 0 deletions fs/overlayfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,10 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -EOPNOTSUPP;

old_cred = ovl_override_creds(inode->i_sb);

if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
filemap_write_and_wait(realinode->i_mapping);

err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
revert_creds(old_cred);

Expand Down Expand Up @@ -500,6 +504,11 @@ static const struct inode_operations ovl_special_inode_operations = {
.update_time = ovl_update_time,
};

const struct address_space_operations ovl_aops = {
/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
.direct_IO = noop_direct_IO,
};

/*
* It is possible to stack overlayfs instance on top of another
* overlayfs instance as lower layer. We need to annonate the
Expand Down Expand Up @@ -571,6 +580,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
inode->i_fop = &ovl_file_operations;
inode->i_mapping->a_ops = &ovl_aops;
break;

case S_IFDIR:
Expand Down
26 changes: 14 additions & 12 deletions fs/overlayfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -982,16 +982,6 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
if (err)
goto out;

err = -EBUSY;
if (ovl_inuse_trylock(upperpath->dentry)) {
ofs->upperdir_locked = true;
} else if (ofs->config.index) {
pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
goto out;
} else {
pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}

upper_mnt = clone_private_mount(upperpath);
err = PTR_ERR(upper_mnt);
if (IS_ERR(upper_mnt)) {
Expand All @@ -1002,6 +992,17 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
/* Don't inherit atime flags */
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
ofs->upper_mnt = upper_mnt;

err = -EBUSY;
if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
ofs->upperdir_locked = true;
} else if (ofs->config.index) {
pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
goto out;
} else {
pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}

err = 0;
out:
return err;
Expand Down Expand Up @@ -1101,8 +1102,10 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
goto out;
}

ofs->workbasedir = dget(workpath.dentry);

err = -EBUSY;
if (ovl_inuse_trylock(workpath.dentry)) {
if (ovl_inuse_trylock(ofs->workbasedir)) {
ofs->workdir_locked = true;
} else if (ofs->config.index) {
pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
Expand All @@ -1111,7 +1114,6 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}

ofs->workbasedir = dget(workpath.dentry);
err = ovl_make_workdir(ofs, &workpath);
if (err)
goto out;
Expand Down
5 changes: 5 additions & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1763,6 +1763,7 @@ struct file_operations {
u64);
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
u64);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;

struct inode_operations {
Expand Down Expand Up @@ -3459,4 +3460,8 @@ static inline bool dir_relax_shared(struct inode *inode)
extern bool path_noexec(const struct path *path);
extern void inode_nohighmem(struct inode *inode);

/* mm/fadvise.c */
extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
int advice);

#endif /* _LINUX_FS_H */
3 changes: 1 addition & 2 deletions mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o
endif

obj-y := filemap.o mempool.o oom_kill.o \
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
Expand All @@ -49,7 +49,6 @@ else
obj-y += bootmem.o
endif

obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o
ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
endif
Expand Down
81 changes: 48 additions & 33 deletions mm/fadvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,32 +27,24 @@
* deactivate the pages and clear PG_Referenced.
*/

int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
static int generic_fadvise(struct file *file, loff_t offset, loff_t len,
int advice)
{
struct fd f = fdget(fd);
struct inode *inode;
struct address_space *mapping;
struct backing_dev_info *bdi;
loff_t endbyte; /* inclusive */
pgoff_t start_index;
pgoff_t end_index;
unsigned long nrpages;
int ret = 0;

if (!f.file)
return -EBADF;

inode = file_inode(f.file);
if (S_ISFIFO(inode->i_mode)) {
ret = -ESPIPE;
goto out;
}
inode = file_inode(file);
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;

mapping = f.file->f_mapping;
if (!mapping || len < 0) {
ret = -EINVAL;
goto out;
}
mapping = file->f_mapping;
if (!mapping || len < 0)
return -EINVAL;

bdi = inode_to_bdi(mapping->host);

Expand All @@ -67,9 +59,9 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
/* no bad return value, but ignore advice */
break;
default:
ret = -EINVAL;
return -EINVAL;
}
goto out;
return 0;
}

/*
Expand All @@ -85,21 +77,21 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)

switch (advice) {
case POSIX_FADV_NORMAL:
f.file->f_ra.ra_pages = bdi->ra_pages;
spin_lock(&f.file->f_lock);
f.file->f_mode &= ~FMODE_RANDOM;
spin_unlock(&f.file->f_lock);
file->f_ra.ra_pages = bdi->ra_pages;
spin_lock(&file->f_lock);
file->f_mode &= ~FMODE_RANDOM;
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_RANDOM:
spin_lock(&f.file->f_lock);
f.file->f_mode |= FMODE_RANDOM;
spin_unlock(&f.file->f_lock);
spin_lock(&file->f_lock);
file->f_mode |= FMODE_RANDOM;
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_SEQUENTIAL:
f.file->f_ra.ra_pages = bdi->ra_pages * 2;
spin_lock(&f.file->f_lock);
f.file->f_mode &= ~FMODE_RANDOM;
spin_unlock(&f.file->f_lock);
file->f_ra.ra_pages = bdi->ra_pages * 2;
spin_lock(&file->f_lock);
file->f_mode &= ~FMODE_RANDOM;
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_WILLNEED:
/* First and last PARTIAL page! */
Expand All @@ -115,8 +107,7 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
* Ignore return value because fadvise() shall return
* success even if filesystem can't retrieve a hint,
*/
force_page_cache_readahead(mapping, f.file, start_index,
nrpages);
force_page_cache_readahead(mapping, file, start_index, nrpages);
break;
case POSIX_FADV_NOREUSE:
break;
Expand Down Expand Up @@ -183,9 +174,32 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
}
break;
default:
ret = -EINVAL;
return -EINVAL;
}
out:
return 0;
}

int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
if (file->f_op->fadvise)
return file->f_op->fadvise(file, offset, len, advice);

return generic_fadvise(file, offset, len, advice);
}
EXPORT_SYMBOL(vfs_fadvise);

#ifdef CONFIG_ADVISE_SYSCALLS

int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
{
struct fd f = fdget(fd);
int ret;

if (!f.file)
return -EBADF;

ret = vfs_fadvise(f.file, offset, len, advice);

fdput(f);
return ret;
}
Expand All @@ -203,3 +217,4 @@ SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
}

#endif
#endif
Loading

0 comments on commit 48751b5

Please sign in to comment.