Skip to content

Commit

Permalink
Merge tag 'pull-fixes.pathwalk-rcu-2' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/viro/vfs

Pull RCU pathwalk fixes from Al Viro:
 "We still have some races in filesystem methods when exposed to RCU
  pathwalk. This series is a result of code audit (the second round of
  it) and it should deal with most of that stuff.

  Still pending: ntfs3 ->d_hash()/->d_compare() and ceph_d_revalidate().
  Up to maintainers (a note for NTFS folks - when documentation says
  that a method may not block, it *does* imply that blocking allocations
  are to be avoided. Really)"

[ More explanations for people who aren't familiar with the vagaries of
  RCU path walking: most of it is hidden from filesystems, but if a
  filesystem actively participates in the low-level path walking it
  needs to make sure the fields involved in that walk are RCU-safe.

  That "actively participate in low-level path walking" includes things
  like having its own ->d_hash()/->d_compare() routines, or by having
  its own directory permission function that doesn't just use the common
  helpers.  Having a ->d_revalidate() function will also have this issue.

  Note that instead of making everything RCU safe you can also choose to
  abort the RCU pathwalk if your operation cannot be done safely under
  RCU, but that obviously comes with a performance penalty. One common
  pattern is to allow the simple cases under RCU, and abort only if you
  need to do something more complicated.

  So not everything needs to be RCU-safe, and things like the inode etc
  that the VFS itself maintains obviously already are. But these fixes
  tend to be about properly RCU-delaying things like ->s_fs_info that
  are maintained by the filesystem and that got potentially released too
  early.   - Linus ]

* tag 'pull-fixes.pathwalk-rcu-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ext4_get_link(): fix breakage in RCU mode
  cifs_get_link(): bail out in unsafe case
  fuse: fix UAF in rcu pathwalks
  procfs: make freeing proc_fs_info rcu-delayed
  procfs: move dropping pde and pid from ->evict_inode() to ->free_inode()
  nfs: fix UAF on pathwalk running into umount
  nfs: make nfs_set_verifier() safe for use in RCU pathwalk
  afs: fix __afs_break_callback() / afs_drop_open_mmap() race
  hfsplus: switch to rcu-delayed unloading of nls and freeing ->s_fs_info
  exfat: move freeing sbi, upcase table and dropping nls into rcu-delayed helper
  affs: free affs_sb_info with kfree_rcu()
  rcu pathwalk: prevent bogus hard errors from may_lookup()
  fs/super.c: don't drop ->s_user_ns until we free struct super_block itself
  • Loading branch information
torvalds committed Feb 25, 2024
2 parents 9b24349 + 9fa8e28 commit 66a97c2
Show file tree
Hide file tree
Showing 22 changed files with 88 additions and 63 deletions.
1 change: 1 addition & 0 deletions fs/affs/affs.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct affs_sb_info {
int work_queued; /* non-zero delayed work is queued */
struct delayed_work sb_work; /* superblock flush delayed work */
spinlock_t work_lock; /* protects sb_work and work_queued */
struct rcu_head rcu;
};

#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
Expand Down
2 changes: 1 addition & 1 deletion fs/affs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ static void affs_kill_sb(struct super_block *sb)
affs_brelse(sbi->s_root_bh);
kfree(sbi->s_prefix);
mutex_destroy(&sbi->s_bmlock);
kfree(sbi);
kfree_rcu(sbi, rcu);
}
}

Expand Down
8 changes: 6 additions & 2 deletions fs/afs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,13 +417,17 @@ static void afs_add_open_mmap(struct afs_vnode *vnode)

static void afs_drop_open_mmap(struct afs_vnode *vnode)
{
if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1))
return;

down_write(&vnode->volume->open_mmaps_lock);

if (atomic_read(&vnode->cb_nr_mmap) == 0)
read_seqlock_excl(&vnode->cb_lock);
// the only place where ->cb_nr_mmap may hit 0
// see __afs_break_callback() for the other side...
if (atomic_dec_and_test(&vnode->cb_nr_mmap))
list_del_init(&vnode->cb_mmap_link);
read_sequnlock_excl(&vnode->cb_lock);

up_write(&vnode->volume->open_mmaps_lock);
flush_work(&vnode->cb_work);
Expand Down
1 change: 1 addition & 0 deletions fs/exfat/exfat_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ struct exfat_sb_info {

spinlock_t inode_hash_lock;
struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
struct rcu_head rcu;
};

#define EXFAT_CACHE_VALID 0
Expand Down
14 changes: 4 additions & 10 deletions fs/exfat/nls.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,6 @@ static int exfat_load_upcase_table(struct super_block *sb,
unsigned int sect_size = sb->s_blocksize;
unsigned int i, index = 0;
u32 chksum = 0;
int ret;
unsigned char skip = false;
unsigned short *upcase_table;

Expand All @@ -673,8 +672,7 @@ static int exfat_load_upcase_table(struct super_block *sb,
if (!bh) {
exfat_err(sb, "failed to read sector(0x%llx)",
(unsigned long long)sector);
ret = -EIO;
goto free_table;
return -EIO;
}
sector++;
for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
Expand All @@ -701,15 +699,12 @@ static int exfat_load_upcase_table(struct super_block *sb,

exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)",
index, chksum, utbl_checksum);
ret = -EINVAL;
free_table:
exfat_free_upcase_table(sbi);
return ret;
return -EINVAL;
}

static int exfat_load_default_upcase_table(struct super_block *sb)
{
int i, ret = -EIO;
int i;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
unsigned char skip = false;
unsigned short uni = 0, *upcase_table;
Expand Down Expand Up @@ -740,8 +735,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb)
return 0;

/* FATAL error: default upcase table has error */
exfat_free_upcase_table(sbi);
return ret;
return -EIO;
}

int exfat_create_upcase_table(struct super_block *sb)
Expand Down
20 changes: 11 additions & 9 deletions fs/exfat/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ static void exfat_put_super(struct super_block *sb)
exfat_free_bitmap(sbi);
brelse(sbi->boot_bh);
mutex_unlock(&sbi->s_lock);

unload_nls(sbi->nls_io);
exfat_free_upcase_table(sbi);
}

static int exfat_sync_fs(struct super_block *sb, int wait)
Expand Down Expand Up @@ -600,7 +597,7 @@ static int __exfat_fill_super(struct super_block *sb)
ret = exfat_load_bitmap(sb);
if (ret) {
exfat_err(sb, "failed to load alloc-bitmap");
goto free_upcase_table;
goto free_bh;
}

ret = exfat_count_used_clusters(sb, &sbi->used_clusters);
Expand All @@ -613,8 +610,6 @@ static int __exfat_fill_super(struct super_block *sb)

free_alloc_bitmap:
exfat_free_bitmap(sbi);
free_upcase_table:
exfat_free_upcase_table(sbi);
free_bh:
brelse(sbi->boot_bh);
return ret;
Expand Down Expand Up @@ -701,12 +696,10 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_root = NULL;

free_table:
exfat_free_upcase_table(sbi);
exfat_free_bitmap(sbi);
brelse(sbi->boot_bh);

check_nls_io:
unload_nls(sbi->nls_io);
return err;
}

Expand Down Expand Up @@ -771,13 +764,22 @@ static int exfat_init_fs_context(struct fs_context *fc)
return 0;
}

static void delayed_free(struct rcu_head *p)
{
struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);

unload_nls(sbi->nls_io);
exfat_free_upcase_table(sbi);
exfat_free_sbi(sbi);
}

static void exfat_kill_sb(struct super_block *sb)
{
struct exfat_sb_info *sbi = sb->s_fs_info;

kill_block_super(sb);
if (sbi)
exfat_free_sbi(sbi);
call_rcu(&sbi->rcu, delayed_free);
}

static struct file_system_type exfat_fs_type = {
Expand Down
8 changes: 5 additions & 3 deletions fs/ext4/symlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,

if (!dentry) {
bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
if (IS_ERR(bh))
return ERR_CAST(bh);
if (!bh || !ext4_buffer_uptodate(bh))
if (IS_ERR(bh) || !bh)
return ERR_PTR(-ECHILD);
if (!ext4_buffer_uptodate(bh)) {
brelse(bh);
return ERR_PTR(-ECHILD);
}
} else {
bh = ext4_bread(NULL, inode, 0, 0);
if (IS_ERR(bh))
Expand Down
3 changes: 1 addition & 2 deletions fs/fuse/cuse.c
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,7 @@ static int cuse_send_init(struct cuse_conn *cc)

static void cuse_fc_release(struct fuse_conn *fc)
{
struct cuse_conn *cc = fc_to_cc(fc);
kfree_rcu(cc, fc.rcu);
kfree(fc_to_cc(fc));
}

/**
Expand Down
1 change: 1 addition & 0 deletions fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,7 @@ struct fuse_mount {

/* Entry on fc->mounts */
struct list_head fc_entry;
struct rcu_head rcu;
};

static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
Expand Down
15 changes: 11 additions & 4 deletions fs/fuse/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,14 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
}
EXPORT_SYMBOL_GPL(fuse_conn_init);

static void delayed_release(struct rcu_head *p)
{
struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);

put_user_ns(fc->user_ns);
fc->release(fc);
}

void fuse_conn_put(struct fuse_conn *fc)
{
if (refcount_dec_and_test(&fc->count)) {
Expand All @@ -941,13 +949,12 @@ void fuse_conn_put(struct fuse_conn *fc)
if (fiq->ops->release)
fiq->ops->release(fiq);
put_pid_ns(fc->pid_ns);
put_user_ns(fc->user_ns);
bucket = rcu_dereference_protected(fc->curr_bucket, 1);
if (bucket) {
WARN_ON(atomic_read(&bucket->count) != 1);
kfree(bucket);
}
fc->release(fc);
call_rcu(&fc->rcu, delayed_release);
}
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
Expand Down Expand Up @@ -1366,7 +1373,7 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
void fuse_free_conn(struct fuse_conn *fc)
{
WARN_ON(!list_empty(&fc->devices));
kfree_rcu(fc, rcu);
kfree(fc);
}
EXPORT_SYMBOL_GPL(fuse_free_conn);

Expand Down Expand Up @@ -1902,7 +1909,7 @@ static void fuse_sb_destroy(struct super_block *sb)
void fuse_mount_destroy(struct fuse_mount *fm)
{
fuse_conn_put(fm->fc);
kfree(fm);
kfree_rcu(fm, rcu);
}
EXPORT_SYMBOL(fuse_mount_destroy);

Expand Down
1 change: 1 addition & 0 deletions fs/hfsplus/hfsplus_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ struct hfsplus_sb_info {
int work_queued; /* non-zero delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */
spinlock_t work_lock; /* protects sync_work and work_queued */
struct rcu_head rcu;
};

#define HFSPLUS_SB_WRITEBACKUP 0
Expand Down
12 changes: 9 additions & 3 deletions fs/hfsplus/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,14 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb)
spin_unlock(&sbi->work_lock);
}

static void delayed_free(struct rcu_head *p)
{
struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu);

unload_nls(sbi->nls);
kfree(sbi);
}

static void hfsplus_put_super(struct super_block *sb)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
Expand All @@ -302,9 +310,7 @@ static void hfsplus_put_super(struct super_block *sb)
hfs_btree_close(sbi->ext_tree);
kfree(sbi->s_vhdr_buf);
kfree(sbi->s_backup_vhdr_buf);
unload_nls(sbi->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
call_rcu(&sbi->rcu, delayed_free);
}

static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
Expand Down
6 changes: 5 additions & 1 deletion fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1717,7 +1717,11 @@ static inline int may_lookup(struct mnt_idmap *idmap,
{
if (nd->flags & LOOKUP_RCU) {
int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD || !try_to_unlazy(nd))
if (!err) // success, keep going
return 0;
if (!try_to_unlazy(nd))
return -ECHILD; // redo it all non-lazy
if (err != -ECHILD) // hard error
return err;
}
return inode_permission(idmap, nd->inode, MAY_EXEC);
Expand Down
13 changes: 10 additions & 3 deletions fs/nfs/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp)
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
kfree(clp);
kfree_rcu(clp, rcu);
}
EXPORT_SYMBOL_GPL(nfs_free_client);

Expand Down Expand Up @@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void)
}
EXPORT_SYMBOL_GPL(nfs_alloc_server);

static void delayed_free(struct rcu_head *p)
{
struct nfs_server *server = container_of(p, struct nfs_server, rcu);

nfs_free_iostats(server->io_stats);
kfree(server);
}

/*
* Free up a server record
*/
Expand All @@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server)

ida_destroy(&server->lockowner_id);
ida_destroy(&server->openowner_id);
nfs_free_iostats(server->io_stats);
put_cred(server->cred);
kfree(server);
nfs_release_automount_timer();
call_rcu(&server->rcu, delayed_free);
}
EXPORT_SYMBOL_GPL(nfs_free_server);

Expand Down
4 changes: 2 additions & 2 deletions fs/nfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1431,9 +1431,9 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry)
static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
{
struct inode *inode = d_inode(dentry);
struct inode *dir = d_inode(dentry->d_parent);
struct inode *dir = d_inode_rcu(dentry->d_parent);

if (!nfs_verify_change_attribute(dir, verf))
if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
nfs_set_verifier_delegated(&verf);
Expand Down
2 changes: 0 additions & 2 deletions fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -1878,8 +1878,6 @@ void proc_pid_evict_inode(struct proc_inode *ei)
hlist_del_init_rcu(&ei->sibling_inodes);
spin_unlock(&pid->lock);
}

put_pid(pid);
}

struct inode *proc_pid_make_inode(struct super_block *sb,
Expand Down
19 changes: 8 additions & 11 deletions fs/proc/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,15 @@

static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
struct proc_inode *ei = PROC_I(inode);

truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);

/* Stop tracking associated processes */
if (ei->pid) {
if (ei->pid)
proc_pid_evict_inode(ei);
ei->pid = NULL;
}

/* Let go of any associated proc directory entry */
de = ei->pde;
if (de) {
pde_put(de);
ei->pde = NULL;
}

head = ei->sysctl;
if (head) {
Expand Down Expand Up @@ -80,6 +70,13 @@ static struct inode *proc_alloc_inode(struct super_block *sb)

static void proc_free_inode(struct inode *inode)
{
struct proc_inode *ei = PROC_I(inode);

if (ei->pid)
put_pid(ei->pid);
/* Let go of any associated proc directory entry */
if (ei->pde)
pde_put(ei->pde);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}

Expand Down
2 changes: 1 addition & 1 deletion fs/proc/root.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb)

kill_anon_super(sb);
put_pid_ns(fs_info->pid_ns);
kfree(fs_info);
kfree_rcu(fs_info, rcu);
}

static struct file_system_type proc_fs_type = {
Expand Down
Loading

0 comments on commit 66a97c2

Please sign in to comment.