Skip to content

Commit 4f9020f

Browse files
committed
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs fixes from Al Viro: "Assorted fixes that sat in -next for a while, all over the place" * 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: aio: Fix locking in aio_poll() exec: Fix mem leak in kernel_read_file copy_mount_string: Limit string length to PATH_MAX cgroup: saner refcounting for cgroup_root fix cgroup_do_mount() handling of failure exits
2 parents 736706b + d3d6a18 commit 4f9020f

File tree

7 files changed

+44
-65
lines changed

7 files changed

+44
-65
lines changed

fs/aio.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
16661666
struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
16671667
struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
16681668
__poll_t mask = key_to_poll(key);
1669+
unsigned long flags;
16691670

16701671
req->woken = true;
16711672

@@ -1674,10 +1675,15 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
16741675
if (!(mask & req->events))
16751676
return 0;
16761677

1677-
/* try to complete the iocb inline if we can: */
1678-
if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
1678+
/*
1679+
* Try to complete the iocb inline if we can. Use
1680+
* irqsave/irqrestore because not all filesystems (e.g. fuse)
1681+
* call this function with IRQs disabled and because IRQs
1682+
* have to be disabled before ctx_lock is obtained.
1683+
*/
1684+
if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
16791685
list_del(&iocb->ki_list);
1680-
spin_unlock(&iocb->ki_ctx->ctx_lock);
1686+
spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
16811687

16821688
list_del_init(&req->wait.entry);
16831689
aio_poll_complete(iocb, mask);

fs/exec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
932932
bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
933933
if (bytes < 0) {
934934
ret = bytes;
935-
goto out;
935+
goto out_free;
936936
}
937937

938938
if (bytes == 0)

fs/kernfs/mount.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
196196
return dentry;
197197

198198
knparent = find_next_ancestor(kn, NULL);
199-
if (WARN_ON(!knparent))
199+
if (WARN_ON(!knparent)) {
200+
dput(dentry);
200201
return ERR_PTR(-EINVAL);
202+
}
201203

202204
do {
203205
struct dentry *dtmp;
@@ -206,8 +208,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
206208
if (kn == knparent)
207209
return dentry;
208210
kntmp = find_next_ancestor(kn, knparent);
209-
if (WARN_ON(!kntmp))
211+
if (WARN_ON(!kntmp)) {
212+
dput(dentry);
210213
return ERR_PTR(-EINVAL);
214+
}
211215
dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
212216
strlen(kntmp->name));
213217
dput(dentry);

fs/namespace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2744,7 +2744,7 @@ void *copy_mount_options(const void __user * data)
27442744

27452745
char *copy_mount_string(const void __user *data)
27462746
{
2747-
return data ? strndup_user(data, PAGE_SIZE) : NULL;
2747+
return data ? strndup_user(data, PATH_MAX) : NULL;
27482748
}
27492749

27502750
/*

kernel/cgroup/cgroup-internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
198198

199199
void cgroup_free_root(struct cgroup_root *root);
200200
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
201-
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
201+
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
202202
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
203203
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
204204
struct cgroup_root *root, unsigned long magic,

kernel/cgroup/cgroup-v1.c

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
11161116
void *data, unsigned long magic,
11171117
struct cgroup_namespace *ns)
11181118
{
1119-
struct super_block *pinned_sb = NULL;
11201119
struct cgroup_sb_opts opts;
11211120
struct cgroup_root *root;
11221121
struct cgroup_subsys *ss;
11231122
struct dentry *dentry;
11241123
int i, ret;
1125-
bool new_root = false;
11261124

11271125
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
11281126

@@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
11841182
if (root->flags ^ opts.flags)
11851183
pr_warn("new mount options do not match the existing superblock, will be ignored\n");
11861184

1187-
/*
1188-
* We want to reuse @root whose lifetime is governed by its
1189-
* ->cgrp. Let's check whether @root is alive and keep it
1190-
* that way. As cgroup_kill_sb() can happen anytime, we
1191-
* want to block it by pinning the sb so that @root doesn't
1192-
* get killed before mount is complete.
1193-
*
1194-
* With the sb pinned, tryget_live can reliably indicate
1195-
* whether @root can be reused. If it's being killed,
1196-
* drain it. We can use wait_queue for the wait but this
1197-
* path is super cold. Let's just sleep a bit and retry.
1198-
*/
1199-
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1200-
if (IS_ERR(pinned_sb) ||
1201-
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1202-
mutex_unlock(&cgroup_mutex);
1203-
if (!IS_ERR_OR_NULL(pinned_sb))
1204-
deactivate_super(pinned_sb);
1205-
msleep(10);
1206-
ret = restart_syscall();
1207-
goto out_free;
1208-
}
1209-
12101185
ret = 0;
12111186
goto out_unlock;
12121187
}
@@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
12321207
ret = -ENOMEM;
12331208
goto out_unlock;
12341209
}
1235-
new_root = true;
12361210

12371211
init_cgroup_root(root, &opts);
12381212

1239-
ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
1213+
ret = cgroup_setup_root(root, opts.subsys_mask);
12401214
if (ret)
12411215
cgroup_free_root(root);
12421216

12431217
out_unlock:
1218+
if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1219+
mutex_unlock(&cgroup_mutex);
1220+
msleep(10);
1221+
ret = restart_syscall();
1222+
goto out_free;
1223+
}
12441224
mutex_unlock(&cgroup_mutex);
12451225
out_free:
12461226
kfree(opts.release_agent);
@@ -1252,25 +1232,13 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
12521232
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
12531233
CGROUP_SUPER_MAGIC, ns);
12541234

1255-
/*
1256-
* There's a race window after we release cgroup_mutex and before
1257-
* allocating a superblock. Make sure a concurrent process won't
1258-
* be able to re-use the root during this window by delaying the
1259-
* initialization of root refcnt.
1260-
*/
1261-
if (new_root) {
1262-
mutex_lock(&cgroup_mutex);
1263-
percpu_ref_reinit(&root->cgrp.self.refcnt);
1264-
mutex_unlock(&cgroup_mutex);
1235+
if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
1236+
struct super_block *sb = dentry->d_sb;
1237+
dput(dentry);
1238+
deactivate_locked_super(sb);
1239+
msleep(10);
1240+
dentry = ERR_PTR(restart_syscall());
12651241
}
1266-
1267-
/*
1268-
* If @pinned_sb, we're reusing an existing root and holding an
1269-
* extra ref on its sb. Mount is complete. Put the extra ref.
1270-
*/
1271-
if (pinned_sb)
1272-
deactivate_super(pinned_sb);
1273-
12741242
return dentry;
12751243
}
12761244

kernel/cgroup/cgroup.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
19271927
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
19281928
}
19291929

1930-
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
1930+
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
19311931
{
19321932
LIST_HEAD(tmp_links);
19331933
struct cgroup *root_cgrp = &root->cgrp;
@@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
19441944
root_cgrp->ancestor_ids[0] = ret;
19451945

19461946
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1947-
ref_flags, GFP_KERNEL);
1947+
0, GFP_KERNEL);
19481948
if (ret)
19491949
goto out;
19501950

@@ -2033,7 +2033,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
20332033
struct cgroup_namespace *ns)
20342034
{
20352035
struct dentry *dentry;
2036-
bool new_sb;
2036+
bool new_sb = false;
20372037

20382038
dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
20392039

@@ -2043,6 +2043,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
20432043
*/
20442044
if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
20452045
struct dentry *nsdentry;
2046+
struct super_block *sb = dentry->d_sb;
20462047
struct cgroup *cgrp;
20472048

20482049
mutex_lock(&cgroup_mutex);
@@ -2053,12 +2054,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
20532054
spin_unlock_irq(&css_set_lock);
20542055
mutex_unlock(&cgroup_mutex);
20552056

2056-
nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
2057+
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
20572058
dput(dentry);
2059+
if (IS_ERR(nsdentry))
2060+
deactivate_locked_super(sb);
20582061
dentry = nsdentry;
20592062
}
20602063

2061-
if (IS_ERR(dentry) || !new_sb)
2064+
if (!new_sb)
20622065
cgroup_put(&root->cgrp);
20632066

20642067
return dentry;
@@ -2118,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb)
21182121
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
21192122

21202123
/*
2121-
* If @root doesn't have any mounts or children, start killing it.
2124+
* If @root doesn't have any children, start killing it.
21222125
* This prevents new mounts by disabling percpu_ref_tryget_live().
21232126
* cgroup_mount() may wait for @root's release.
21242127
*
21252128
* And don't kill the default root.
21262129
*/
2127-
if (!list_empty(&root->cgrp.self.children) ||
2128-
root == &cgrp_dfl_root)
2129-
cgroup_put(&root->cgrp);
2130-
else
2130+
if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2131+
!percpu_ref_is_dying(&root->cgrp.self.refcnt))
21312132
percpu_ref_kill(&root->cgrp.self.refcnt);
2132-
2133+
cgroup_put(&root->cgrp);
21332134
kernfs_kill_sb(sb);
21342135
}
21352136

@@ -5399,7 +5400,7 @@ int __init cgroup_init(void)
53995400
hash_add(css_set_table, &init_css_set.hlist,
54005401
css_set_hash(init_css_set.subsys));
54015402

5402-
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
5403+
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
54035404

54045405
mutex_unlock(&cgroup_mutex);
54055406

0 commit comments

Comments
 (0)