Skip to content

Commit 493a4be

Browse files
author
Al Viro
committed
don't have mounts pin their parents
Simplify the rules for mount refcounts. Current rules include: * being a namespace root => +1 * being someone's child => +1 * being someone's child => +1 to parent's refcount, unless you've already been through umount_tree(). The last part is not needed at all. It makes for more places where need to decrement refcounts and it creates an asymmetry between the situations for something that has never been a part of a namespace and something that left one, both for no good reason. If mount's refcount has additions from its children, we know that * it's either someone's child itself (and will remain so until umount_tree(), at which point contributions from children will disappear), or * or is the root of namespace (and will remain such until it either becomes someone's child in another namespace or goes through umount_tree()), or * it is the root of some tree copy, and is currently pinned by the caller of copy_tree() (and remains such until it either gets into namespace, or goes to umount_tree()). In all cases we already have contribution(s) to refcount that will last as long as the contribution from children remains. In other words, the lifetime is not affected by refcount contributions from children. It might be useful for "is it busy" checks, but those are actually no harder to express without it. NB: propagate_mnt_busy() part is an equivalent transformation, ugly as it is; the current logics is actually wrong and may give false negatives, but fixing that is for a separate patch (probably earlier in the queue). Reviewed-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent d72c773 commit 493a4be

File tree

2 files changed

+26
-54
lines changed

2 files changed

+26
-54
lines changed

fs/namespace.c

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,6 @@ void mnt_set_mountpoint(struct mount *mnt,
10751075
struct mountpoint *mp,
10761076
struct mount *child_mnt)
10771077
{
1078-
mnt_add_count(mnt, 1); /* essentially, that's mntget */
10791078
child_mnt->mnt_mountpoint = mp->m_dentry;
10801079
child_mnt->mnt_parent = mnt;
10811080
child_mnt->mnt_mp = mp;
@@ -1118,7 +1117,6 @@ static void attach_mnt(struct mount *mnt, struct mount *parent,
11181117
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
11191118
{
11201119
struct mountpoint *old_mp = mnt->mnt_mp;
1121-
struct mount *old_parent = mnt->mnt_parent;
11221120

11231121
list_del_init(&mnt->mnt_child);
11241122
hlist_del_init(&mnt->mnt_mp_list);
@@ -1127,7 +1125,6 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
11271125
attach_mnt(mnt, parent, mp);
11281126

11291127
maybe_free_mountpoint(old_mp, &ex_mountpoints);
1130-
mnt_add_count(old_parent, -1);
11311128
}
11321129

11331130
static inline struct mount *node_to_mount(struct rb_node *node)
@@ -1652,23 +1649,19 @@ const struct seq_operations mounts_op = {
16521649
int may_umount_tree(struct vfsmount *m)
16531650
{
16541651
struct mount *mnt = real_mount(m);
1655-
int actual_refs = 0;
1656-
int minimum_refs = 0;
1657-
struct mount *p;
1658-
BUG_ON(!m);
1652+
bool busy = false;
16591653

16601654
/* write lock needed for mnt_get_count */
16611655
lock_mount_hash();
1662-
for (p = mnt; p; p = next_mnt(p, mnt)) {
1663-
actual_refs += mnt_get_count(p);
1664-
minimum_refs += 2;
1656+
for (struct mount *p = mnt; p; p = next_mnt(p, mnt)) {
1657+
if (mnt_get_count(p) > (p == mnt ? 2 : 1)) {
1658+
busy = true;
1659+
break;
1660+
}
16651661
}
16661662
unlock_mount_hash();
16671663

1668-
if (actual_refs > minimum_refs)
1669-
return 0;
1670-
1671-
return 1;
1664+
return !busy;
16721665
}
16731666

16741667
EXPORT_SYMBOL(may_umount_tree);
@@ -1869,7 +1862,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
18691862

18701863
disconnect = disconnect_mount(p, how);
18711864
if (mnt_has_parent(p)) {
1872-
mnt_add_count(p->mnt_parent, -1);
18731865
if (!disconnect) {
18741866
/* Don't forget about p */
18751867
list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
@@ -1946,7 +1938,7 @@ static int do_umount(struct mount *mnt, int flags)
19461938
* all race cases, but it's a slowpath.
19471939
*/
19481940
lock_mount_hash();
1949-
if (mnt_get_count(mnt) != 2) {
1941+
if (!list_empty(&mnt->mnt_mounts) || mnt_get_count(mnt) != 2) {
19501942
unlock_mount_hash();
19511943
return -EBUSY;
19521944
}
@@ -3683,9 +3675,7 @@ static int do_move_mount(struct path *old_path,
36833675
out:
36843676
unlock_mount(&mp);
36853677
if (!err) {
3686-
if (!is_anon_ns(ns)) {
3687-
mntput_no_expire(parent);
3688-
} else {
3678+
if (is_anon_ns(ns)) {
36893679
/* Make sure we notice when we leak mounts. */
36903680
VFS_WARN_ON_ONCE(!mnt_ns_empty(ns));
36913681
free_mnt_ns(ns);
@@ -4753,7 +4743,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
47534743
/* mount new_root on / */
47544744
attach_mnt(new_mnt, root_parent, root_mnt->mnt_mp);
47554745
umount_mnt(root_mnt);
4756-
mnt_add_count(root_parent, -1);
47574746
/* mount old root on put_old */
47584747
attach_mnt(root_mnt, old_mnt, old_mp.mp);
47594748
touch_mnt_namespace(current->nsproxy->mnt_ns);
@@ -4766,8 +4755,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
47664755
error = 0;
47674756
out4:
47684757
unlock_mount(&old_mp);
4769-
if (!error)
4770-
mntput_no_expire(ex_parent);
47714758
out3:
47724759
path_put(&root);
47734760
out2:

fs/pnode.c

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -332,21 +332,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
332332
return ret;
333333
}
334334

335-
static struct mount *find_topper(struct mount *mnt)
336-
{
337-
/* If there is exactly one mount covering mnt completely return it. */
338-
struct mount *child;
339-
340-
if (!list_is_singular(&mnt->mnt_mounts))
341-
return NULL;
342-
343-
child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
344-
if (child->mnt_mountpoint != mnt->mnt.mnt_root)
345-
return NULL;
346-
347-
return child;
348-
}
349-
350335
/*
351336
* return true if the refcount is greater than count
352337
*/
@@ -404,12 +389,8 @@ bool propagation_would_overmount(const struct mount *from,
404389
*/
405390
int propagate_mount_busy(struct mount *mnt, int refcnt)
406391
{
407-
struct mount *m, *child, *topper;
408392
struct mount *parent = mnt->mnt_parent;
409393

410-
if (mnt == parent)
411-
return do_refcount_check(mnt, refcnt);
412-
413394
/*
414395
* quickly check if the current mount can be unmounted.
415396
* If not, we don't have to go checking for all other
@@ -418,23 +399,27 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
418399
if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
419400
return 1;
420401

421-
for (m = propagation_next(parent, parent); m;
402+
if (mnt == parent)
403+
return 0;
404+
405+
for (struct mount *m = propagation_next(parent, parent); m;
422406
m = propagation_next(m, parent)) {
423-
int count = 1;
424-
child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
425-
if (!child)
426-
continue;
407+
struct list_head *head;
408+
struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
427409

428-
/* Is there exactly one mount on the child that covers
429-
* it completely whose reference should be ignored?
430-
*/
431-
topper = find_topper(child);
432-
if (topper)
433-
count += 1;
434-
else if (!list_empty(&child->mnt_mounts))
410+
if (!child)
435411
continue;
436412

437-
if (do_refcount_check(child, count))
413+
head = &child->mnt_mounts;
414+
if (!list_empty(head)) {
415+
/*
416+
* a mount that covers child completely wouldn't prevent
417+
* it being pulled out; any other would.
418+
*/
419+
if (!list_is_singular(head) || !child->overmount)
420+
continue;
421+
}
422+
if (do_refcount_check(child, 1))
438423
return 1;
439424
}
440425
return 0;

0 commit comments

Comments
 (0)