diff options
| author | Al Viro <viro@zeniv.linux.org.uk> | 2025-04-27 22:53:13 -0400 |
|---|---|---|
| committer | Al Viro <viro@zeniv.linux.org.uk> | 2025-06-29 18:13:42 -0400 |
| commit | 493a4bebf5157a5da64e36f8d468ff80a859b563 (patch) | |
| tree | 7942eccfb02dde6218423f961502b55c2856796e /fs/namespace.c | |
| parent | d72c773237c0472e214cda92016ad21625b05bba (diff) | |
| download | tip-493a4bebf5157a5da64e36f8d468ff80a859b563.tar.gz | |
don't have mounts pin their parents
Simplify the rules for mount refcounts. Current rules include:
* being a namespace root => +1
* being someone's child => +1
* being someone's child => +1 to parent's refcount, unless you've
already been through umount_tree().
The last part is not needed at all. It makes for more places where need
to decrement refcounts and it creates an asymmetry between the situations
for something that has never been a part of a namespace and something that
left one, both for no good reason.
If mount's refcount has additions from its children, we know that
* it's either someone's child itself (and will remain so
until umount_tree(), at which point contributions from children
will disappear), or
* or is the root of namespace (and will remain such until
it either becomes someone's child in another namespace or goes through
umount_tree()), or
* it is the root of some tree copy, and is currently pinned
by the caller of copy_tree() (and remains such until it either gets
into namespace, or goes to umount_tree()).
In all cases we already have contribution(s) to refcount that will last
as long as the contribution from children remains. In other words, the
lifetime is not affected by refcount contributions from children.
It might be useful for "is it busy" checks, but those are actually
no harder to express without it.
NB: propagate_mnt_busy() part is an equivalent transformation, ugly as it
is; the current logics is actually wrong and may give false negatives,
but fixing that is for a separate patch (probably earlier in the queue).
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namespace.c')
| -rw-r--r-- | fs/namespace.c | 31 |
1 files changed, 9 insertions, 22 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 6df0436bfcb974..4bdf6a6e75cab0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1075,7 +1075,6 @@ void mnt_set_mountpoint(struct mount *mnt, struct mountpoint *mp, struct mount *child_mnt) { - mnt_add_count(mnt, 1); /* essentially, that's mntget */ child_mnt->mnt_mountpoint = mp->m_dentry; child_mnt->mnt_parent = mnt; child_mnt->mnt_mp = mp; @@ -1118,7 +1117,6 @@ static void attach_mnt(struct mount *mnt, struct mount *parent, void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) { struct mountpoint *old_mp = mnt->mnt_mp; - struct mount *old_parent = mnt->mnt_parent; list_del_init(&mnt->mnt_child); hlist_del_init(&mnt->mnt_mp_list); @@ -1127,7 +1125,6 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m attach_mnt(mnt, parent, mp); maybe_free_mountpoint(old_mp, &ex_mountpoints); - mnt_add_count(old_parent, -1); } static inline struct mount *node_to_mount(struct rb_node *node) @@ -1652,23 +1649,19 @@ const struct seq_operations mounts_op = { int may_umount_tree(struct vfsmount *m) { struct mount *mnt = real_mount(m); - int actual_refs = 0; - int minimum_refs = 0; - struct mount *p; - BUG_ON(!m); + bool busy = false; /* write lock needed for mnt_get_count */ lock_mount_hash(); - for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += mnt_get_count(p); - minimum_refs += 2; + for (struct mount *p = mnt; p; p = next_mnt(p, mnt)) { + if (mnt_get_count(p) > (p == mnt ? 2 : 1)) { + busy = true; + break; + } } unlock_mount_hash(); - if (actual_refs > minimum_refs) - return 0; - - return 1; + return !busy; } EXPORT_SYMBOL(may_umount_tree); @@ -1869,7 +1862,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) disconnect = disconnect_mount(p, how); if (mnt_has_parent(p)) { - mnt_add_count(p->mnt_parent, -1); if (!disconnect) { /* Don't forget about p */ list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); @@ -1946,7 +1938,7 @@ static int do_umount(struct mount *mnt, int flags) * all race cases, but it's a slowpath. */ lock_mount_hash(); - if (mnt_get_count(mnt) != 2) { + if (!list_empty(&mnt->mnt_mounts) || mnt_get_count(mnt) != 2) { unlock_mount_hash(); return -EBUSY; } @@ -3683,9 +3675,7 @@ static int do_move_mount(struct path *old_path, out: unlock_mount(&mp); if (!err) { - if (!is_anon_ns(ns)) { - mntput_no_expire(parent); - } else { + if (is_anon_ns(ns)) { /* Make sure we notice when we leak mounts. */ VFS_WARN_ON_ONCE(!mnt_ns_empty(ns)); free_mnt_ns(ns); @@ -4753,7 +4743,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, root_parent, root_mnt->mnt_mp); umount_mnt(root_mnt); - mnt_add_count(root_parent, -1); /* mount old root on put_old */ attach_mnt(root_mnt, old_mnt, old_mp.mp); touch_mnt_namespace(current->nsproxy->mnt_ns); @@ -4766,8 +4755,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = 0; out4: unlock_mount(&old_mp); - if (!error) - mntput_no_expire(ex_parent); out3: path_put(&root); out2: |
