aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst6
-rw-r--r--Documentation/filesystems/locking.rst8
-rw-r--r--Documentation/filesystems/mount_api.rst2
-rw-r--r--Documentation/filesystems/porting.rst7
-rw-r--r--Documentation/filesystems/vfs.rst58
-rw-r--r--arch/arm/configs/neponset_defconfig2
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/mballoc.c3
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/file.c3
-rw-r--r--fs/fs_context.c208
-rw-r--r--fs/fsopen.c10
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/init.c14
-rw-r--r--fs/internal.h1
-rw-r--r--fs/locks.c14
-rw-r--r--fs/namei.c80
-rw-r--r--fs/namespace.c102
-rw-r--r--fs/nfsd/nfs4proc.c11
-rw-r--r--fs/open.c39
-rw-r--r--fs/splice.c2
-rw-r--r--include/linux/filelock.h18
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/fs/super_types.h1
-rw-r--r--include/linux/init_syscalls.h1
-rw-r--r--include/linux/initrd.h2
-rw-r--r--include/linux/ns/ns_common_types.h4
-rw-r--r--include/uapi/linux/mount.h10
-rw-r--r--include/uapi/linux/sysctl.h1
-rw-r--r--init/do_mounts.c11
-rw-r--r--init/do_mounts.h18
-rw-r--r--init/do_mounts_initrd.c107
-rw-r--r--init/do_mounts_rd.c24
-rw-r--r--kernel/pid.c131
-rw-r--r--rust/helpers/fs.c2
-rw-r--r--rust/helpers/pid_namespace.c8
-rw-r--r--rust/helpers/poll.c5
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h15
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c261
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c101
46 files changed, 685 insertions, 650 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a8d0afde7f85a5..f67591615a6adf 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3437,8 +3437,6 @@ Kernel parameters
If there are multiple matching configurations changing
the same attribute, the last one is used.
- load_ramdisk= [RAM] [Deprecated]
-
lockd.nlm_grace_period=P [NFS] Assign grace period.
Format: <integer>
@@ -4444,8 +4442,10 @@ Kernel parameters
Note that this argument takes precedence over
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
- noinitrd [RAM] Tells the kernel not to load any configured
- initial RAM disk.
+ noinitrd [Deprecated,RAM] Tells the kernel not to load any configured
+ initial RAM disk. Currently this parameter applies to
+ initrd only, not to initramfs. But it applies to both
+ in EFI mode.
nointremap [X86-64,Intel-IOMMU,EARLY] Do not enable interrupt
remapping.
@@ -5402,8 +5402,6 @@ Kernel parameters
Param: <number> - step/bucket size as a power of 2 for
statistical time based profiling.
- prompt_ramdisk= [RAM] [Deprecated]
-
prot_virt= [S390] enable hosting protected virtual machines
isolated from the hypervisor (if hardware supports
that). If enabled, the default kernel base address
@@ -5460,7 +5458,7 @@ Kernel parameters
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
See Documentation/admin-guide/blockdev/ramdisk.rst.
- ramdisk_start= [RAM] RAM disk image start address
+ ramdisk_start= [Deprecated,RAM] RAM disk image start address
random.trust_cpu=off
[KNL,EARLY] Disable trusting the use of the CPU's
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 239da22c4e28f1..bb577fac76a01a 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1235,12 +1235,6 @@ that support this feature.
== ===========================================================================
-real-root-dev
-=============
-
-See Documentation/admin-guide/initrd.rst.
-
-
reboot-cmd (SPARC only)
=======================
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 04c7691e50e01f..35e97618868b37 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -177,7 +177,6 @@ prototypes::
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
- int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
@@ -201,7 +200,6 @@ sync_fs: read
freeze_fs: write
unfreeze_fs: write
statfs: maybe(read) (see below)
-remount_fs: write
umount_begin: no
show_options: no (namespace_sem)
quota_read: no (see below)
@@ -226,8 +224,6 @@ file_system_type
prototypes::
- struct dentry *(*mount) (struct file_system_type *, int,
- const char *, void *);
void (*kill_sb) (struct super_block *);
locking rules:
@@ -235,13 +231,9 @@ locking rules:
======= =========
ops may block
======= =========
-mount yes
kill_sb yes
======= =========
-->mount() returns ERR_PTR or the root dentry; its superblock should be locked
-on return.
-
->kill_sb() takes a write-locked superblock, does all shutdown work on it,
unlocks and drops the reference.
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
index c99ab1f7fea453..a064234fed5bb9 100644
--- a/Documentation/filesystems/mount_api.rst
+++ b/Documentation/filesystems/mount_api.rst
@@ -299,8 +299,6 @@ manage the filesystem context. They are as follows:
On success it should return 0. In the case of an error, it should return
a negative error code.
- .. Note:: reconfigure is intended as a replacement for remount_fs.
-
Filesystem context Security
===========================
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 3397937ed838e5..631eee9bdc3384 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -448,11 +448,8 @@ a file off.
**mandatory**
-->get_sb() is gone. Switch to use of ->mount(). Typically it's just
-a matter of switching from calling ``get_sb_``... to ``mount_``... and changing
-the function type. If you were doing it manually, just switch from setting
-->mnt_root to some pointer to returning that pointer. On errors return
-ERR_PTR(...).
+->get_sb() and ->mount() are gone. Switch to using the new mount API. See
+Documentation/filesystems/mount_api.rst for more details.
---
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 670ba66b60e496..90c357b263fed7 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -94,11 +94,9 @@ functions:
The passed struct file_system_type describes your filesystem. When a
request is made to mount a filesystem onto a directory in your
-namespace, the VFS will call the appropriate mount() method for the
-specific filesystem. New vfsmount referring to the tree returned by
-->mount() will be attached to the mountpoint, so that when pathname
-resolution reaches the mountpoint it will jump into the root of that
-vfsmount.
+namespace, the VFS will call the appropriate get_tree() method for the
+specific filesystem. See Documentation/filesystems/mount_api.rst
+for more details.
You can see all filesystems that are registered to the kernel in the
file /proc/filesystems.
@@ -117,8 +115,6 @@ members are defined:
int fs_flags;
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
- struct dentry *(*mount) (struct file_system_type *, int,
- const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
@@ -151,10 +147,6 @@ members are defined:
'struct fs_parameter_spec'.
More info in Documentation/filesystems/mount_api.rst.
-``mount``
- the method to call when a new instance of this filesystem should
- be mounted
-
``kill_sb``
the method to call when an instance of this filesystem should be
shut down
@@ -173,45 +165,6 @@ members are defined:
s_lock_key, s_umount_key, s_vfs_rename_key, s_writers_key,
i_lock_key, i_mutex_key, invalidate_lock_key, i_mutex_dir_key: lockdep-specific
-The mount() method has the following arguments:
-
-``struct file_system_type *fs_type``
- describes the filesystem, partly initialized by the specific
- filesystem code
-
-``int flags``
- mount flags
-
-``const char *dev_name``
- the device name we are mounting.
-
-``void *data``
- arbitrary mount options, usually comes as an ASCII string (see
- "Mount Options" section)
-
-The mount() method must return the root dentry of the tree requested by
-caller. An active reference to its superblock must be grabbed and the
-superblock must be locked. On failure it should return ERR_PTR(error).
-
-The arguments match those of mount(2) and their interpretation depends
-on filesystem type. E.g. for block filesystems, dev_name is interpreted
-as block device name, that device is opened and if it contains a
-suitable filesystem image the method creates and initializes struct
-super_block accordingly, returning its root dentry to caller.
-
-->mount() may choose to return a subtree of existing filesystem - it
-doesn't have to create a new one. The main result from the caller's
-point of view is a reference to dentry at the root of (sub)tree to be
-attached; creation of new superblock is a common side effect.
-
-The most interesting member of the superblock structure that the mount()
-method fills in is the "s_op" field. This is a pointer to a "struct
-super_operations" which describes the next level of the filesystem
-implementation.
-
-For more information on mounting (and the new mount API), see
-Documentation/filesystems/mount_api.rst.
-
The Superblock Object
=====================
@@ -244,7 +197,6 @@ filesystem. The following members are defined:
enum freeze_wholder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
- int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
@@ -351,10 +303,6 @@ or bottom half).
``statfs``
called when the VFS needs to get filesystem statistics.
-``remount_fs``
- called when the filesystem is remounted. This is called with
- the kernel lock held
-
``umount_begin``
called when the VFS is unmounting a filesystem.
diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig
index 2227f86100ad25..4d720001c12efe 100644
--- a/arch/arm/configs/neponset_defconfig
+++ b/arch/arm/configs/neponset_defconfig
@@ -9,7 +9,7 @@ CONFIG_ASSABET_NEPONSET=y
CONFIG_ZBOOT_ROM_TEXT=0x80000
CONFIG_ZBOOT_ROM_BSS=0xc1000000
CONFIG_ZBOOT_ROM=y
-CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) load_ramdisk=1 prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M"
+CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) mem=32M noinitrd initrd=0xc0800000,3M"
CONFIG_FPE_NWFPE=y
CONFIG_PM=y
CONFIG_MODULES=y
diff --git a/fs/buffer.c b/fs/buffer.c
index 838c0c5710229e..fd53b806ab7eb2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2354,7 +2354,7 @@ bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
if (!head)
return false;
blocksize = head->b_size;
- to = min_t(unsigned, folio_size(folio) - from, count);
+ to = min(folio_size(folio) - from, count);
to = from + to;
if (from < blocksize && to > folio_size(folio) - blocksize)
return false;
@@ -2948,6 +2948,10 @@ bool try_to_free_buffers(struct folio *folio)
if (folio_test_writeback(folio))
return false;
+ /* Misconfigured folio check */
+ if (WARN_ON_ONCE(!folio_buffers(folio)))
+ return true;
+
if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(folio, &buffers_to_free);
goto out;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c2ddb998f3c943..84a5a0699373cd 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -343,7 +343,7 @@ void __unregister_chrdev(unsigned int major, unsigned int baseminor,
kfree(cd);
}
-static DEFINE_SPINLOCK(cdev_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(cdev_lock);
static struct kobject *cdev_get(struct cdev *p)
{
diff --git a/fs/exec.c b/fs/exec.c
index 9d5ebc9d15b0d9..d0606e53376fb6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -555,7 +555,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
return -E2BIG;
while (len > 0) {
- unsigned int bytes_to_copy = min_t(unsigned int, len,
+ unsigned int bytes_to_copy = min(len,
min_not_zero(offset_in_page(pos), PAGE_SIZE));
struct page *page;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 56d50fd3310b47..e817a758801dab 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4276,8 +4276,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
* get the corresponding group metadata to work with.
* For this we have goto again loop.
*/
- thisgrp_len = min_t(unsigned int, (unsigned int)len,
- EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
+ thisgrp_len = min(len, EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
clen = EXT4_NUM_B2C(sbi, thisgrp_len);
if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 050f26168d9726..76842f0957b524 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1479,7 +1479,7 @@ static void ext4_update_super(struct super_block *sb,
/* Update the global fs size fields */
sbi->s_groups_count += flex_gd->count;
- sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
+ sbi->s_blockfile_groups = min(sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
/* Update the reserved block counts only once the new group is
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 87205660c5d026..79762c3e0dff33 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4832,7 +4832,7 @@ static int ext4_check_geometry(struct super_block *sb,
return -EINVAL;
}
sbi->s_groups_count = blocks_count;
- sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
+ sbi->s_blockfile_groups = min(sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
le32_to_cpu(es->s_inodes_count)) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 92b091783966af..8375e7fbc1a551 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1353,7 +1353,7 @@ found:
/* Fill the long name slots. */
for (i = 0; i < long_bhs; i++) {
- int copy = min_t(int, sb->s_blocksize - offset, size);
+ int copy = umin(sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
mark_buffer_dirty_inode(bhs[i], dir);
offset = 0;
@@ -1364,7 +1364,7 @@ found:
err = fat_sync_bhs(bhs, long_bhs);
if (!err && i < nr_bhs) {
/* Fill the short name slot. */
- int copy = min_t(int, sb->s_blocksize - offset, size);
+ int copy = umin(sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
mark_buffer_dirty_inode(bhs[i], dir);
if (IS_DIRSYNC(dir))
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 4fc49a614fb8fd..f48435e586c783 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -140,8 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
if (copy_from_user(&range, user_range, sizeof(range)))
return -EFAULT;
- range.minlen = max_t(unsigned int, range.minlen,
- bdev_discard_granularity(sb->s_bdev));
+ range.minlen = max(range.minlen, bdev_discard_granularity(sb->s_bdev));
err = fat_trim_fs(inode, &range);
if (err < 0)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 93b7ebf8d92795..81ed94f46cac75 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -24,20 +24,6 @@
#include "mount.h"
#include "internal.h"
-enum legacy_fs_param {
- LEGACY_FS_UNSET_PARAMS,
- LEGACY_FS_MONOLITHIC_PARAMS,
- LEGACY_FS_INDIVIDUAL_PARAMS,
-};
-
-struct legacy_fs_context {
- char *legacy_data; /* Data page for legacy filesystems */
- size_t data_size;
- enum legacy_fs_param param_type;
-};
-
-static int legacy_init_fs_context(struct fs_context *fc);
-
static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME },
@@ -275,7 +261,6 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
unsigned int sb_flags_mask,
enum fs_context_purpose purpose)
{
- int (*init_fs_context)(struct fs_context *);
struct fs_context *fc;
int ret = -ENOMEM;
@@ -307,12 +292,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
break;
}
- /* TODO: Make all filesystems support this unconditionally */
- init_fs_context = fc->fs_type->init_fs_context;
- if (!init_fs_context)
- init_fs_context = legacy_init_fs_context;
-
- ret = init_fs_context(fc);
+ ret = fc->fs_type->init_fs_context(fc);
if (ret < 0)
goto err_fc;
fc->need_free = true;
@@ -376,8 +356,6 @@ void fc_drop_locked(struct fs_context *fc)
deactivate_locked_super(sb);
}
-static void legacy_fs_context_free(struct fs_context *fc);
-
/**
* vfs_dup_fs_context - Duplicate a filesystem context.
* @src_fc: The context to copy.
@@ -531,184 +509,6 @@ void put_fs_context(struct fs_context *fc)
}
EXPORT_SYMBOL(put_fs_context);
-/*
- * Free the config for a filesystem that doesn't support fs_context.
- */
-static void legacy_fs_context_free(struct fs_context *fc)
-{
- struct legacy_fs_context *ctx = fc->fs_private;
-
- if (ctx) {
- if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS)
- kfree(ctx->legacy_data);
- kfree(ctx);
- }
-}
-
-/*
- * Duplicate a legacy config.
- */
-static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
-{
- struct legacy_fs_context *ctx;
- struct legacy_fs_context *src_ctx = src_fc->fs_private;
-
- ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) {
- ctx->legacy_data = kmemdup(src_ctx->legacy_data,
- src_ctx->data_size, GFP_KERNEL);
- if (!ctx->legacy_data) {
- kfree(ctx);
- return -ENOMEM;
- }
- }
-
- fc->fs_private = ctx;
- return 0;
-}
-
-/*
- * Add a parameter to a legacy config. We build up a comma-separated list of
- * options.
- */
-static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
- struct legacy_fs_context *ctx = fc->fs_private;
- unsigned int size = ctx->data_size;
- size_t len = 0;
- int ret;
-
- ret = vfs_parse_fs_param_source(fc, param);
- if (ret != -ENOPARAM)
- return ret;
-
- if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
- return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
-
- switch (param->type) {
- case fs_value_is_string:
- len = 1 + param->size;
- fallthrough;
- case fs_value_is_flag:
- len += strlen(param->key);
- break;
- default:
- return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
- param->key);
- }
-
- if (size + len + 2 > PAGE_SIZE)
- return invalf(fc, "VFS: Legacy: Cumulative options too large");
- if (strchr(param->key, ',') ||
- (param->type == fs_value_is_string &&
- memchr(param->string, ',', param->size)))
- return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
- param->key);
- if (!ctx->legacy_data) {
- ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!ctx->legacy_data)
- return -ENOMEM;
- }
-
- if (size)
- ctx->legacy_data[size++] = ',';
- len = strlen(param->key);
- memcpy(ctx->legacy_data + size, param->key, len);
- size += len;
- if (param->type == fs_value_is_string) {
- ctx->legacy_data[size++] = '=';
- memcpy(ctx->legacy_data + size, param->string, param->size);
- size += param->size;
- }
- ctx->legacy_data[size] = '\0';
- ctx->data_size = size;
- ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
- return 0;
-}
-
-/*
- * Add monolithic mount data.
- */
-static int legacy_parse_monolithic(struct fs_context *fc, void *data)
-{
- struct legacy_fs_context *ctx = fc->fs_private;
-
- if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
- pr_warn("VFS: Can't mix monolithic and individual options\n");
- return -EINVAL;
- }
-
- ctx->legacy_data = data;
- ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
- if (!ctx->legacy_data)
- return 0;
-
- if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
- return 0;
- return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
-}
-
-/*
- * Get a mountable root with the legacy mount command.
- */
-static int legacy_get_tree(struct fs_context *fc)
-{
- struct legacy_fs_context *ctx = fc->fs_private;
- struct super_block *sb;
- struct dentry *root;
-
- root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
- fc->source, ctx->legacy_data);
- if (IS_ERR(root))
- return PTR_ERR(root);
-
- sb = root->d_sb;
- BUG_ON(!sb);
-
- fc->root = root;
- return 0;
-}
-
-/*
- * Handle remount.
- */
-static int legacy_reconfigure(struct fs_context *fc)
-{
- struct legacy_fs_context *ctx = fc->fs_private;
- struct super_block *sb = fc->root->d_sb;
-
- if (!sb->s_op->remount_fs)
- return 0;
-
- return sb->s_op->remount_fs(sb, &fc->sb_flags,
- ctx ? ctx->legacy_data : NULL);
-}
-
-const struct fs_context_operations legacy_fs_context_ops = {
- .free = legacy_fs_context_free,
- .dup = legacy_fs_context_dup,
- .parse_param = legacy_parse_param,
- .parse_monolithic = legacy_parse_monolithic,
- .get_tree = legacy_get_tree,
- .reconfigure = legacy_reconfigure,
-};
-
-/*
- * Initialise a legacy context for a filesystem that doesn't support
- * fs_context.
- */
-static int legacy_init_fs_context(struct fs_context *fc)
-{
- fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
- if (!fc->fs_private)
- return -ENOMEM;
- fc->ops = &legacy_fs_context_ops;
- return 0;
-}
-
int parse_monolithic_mount_data(struct fs_context *fc, void *data)
{
int (*monolithic_mount_data)(struct fs_context *, void *);
@@ -757,10 +557,8 @@ int finish_clean_context(struct fs_context *fc)
if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
return 0;
- if (fc->fs_type->init_fs_context)
- error = fc->fs_type->init_fs_context(fc);
- else
- error = legacy_init_fs_context(fc);
+ error = fc->fs_type->init_fs_context(fc);
+
if (unlikely(error)) {
fc->phase = FS_CONTEXT_FAILED;
return error;
diff --git a/fs/fsopen.c b/fs/fsopen.c
index f645c99204eb06..622ee3926cd5df 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -404,16 +404,6 @@ SYSCALL_DEFINE5(fsconfig,
return -EINVAL;
fc = fd_file(f)->private_data;
- if (fc->ops == &legacy_fs_context_ops) {
- switch (cmd) {
- case FSCONFIG_SET_BINARY:
- case FSCONFIG_SET_PATH:
- case FSCONFIG_SET_PATH_EMPTY:
- case FSCONFIG_SET_FD:
- case FSCONFIG_CMD_CREATE_EXCL:
- return -EOPNOTSUPP;
- }
- }
if (_key) {
param.key = strndup_user(_key, 256);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6d59cbc877c6ad..a30c8b57d478bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1813,7 +1813,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
goto out_iput;
folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
- nr_bytes = min_t(unsigned, num, folio_size(folio) - folio_offset);
+ nr_bytes = min(num, folio_size(folio) - folio_offset);
nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 01bc894e9c2bae..4f71eb5a9bac85 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1323,10 +1323,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
unsigned int max_pages)
{
- return min_t(unsigned int,
- ((pos + len - 1) >> PAGE_SHIFT) -
- (pos >> PAGE_SHIFT) + 1,
- max_pages);
+ return min(((pos + len - 1) >> PAGE_SHIFT) - (pos >> PAGE_SHIFT) + 1,
+ max_pages);
}
static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
@@ -1607,7 +1605,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
struct folio *folio = page_folio(pages[i]);
unsigned int offset = start +
(folio_page_idx(folio, pages[i]) << PAGE_SHIFT);
- unsigned int len = min_t(unsigned int, ret, PAGE_SIZE - start);
+ unsigned int len = umin(ret, PAGE_SIZE - start);
ap->descs[ap->num_folios].offset = offset;
ap->descs[ap->num_folios].length = len;
diff --git a/fs/init.c b/fs/init.c
index e0f5429c0a49d0..9b56ebca8cc6d2 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -27,20 +27,6 @@ int __init init_mount(const char *dev_name, const char *dir_name,
return ret;
}
-int __init init_umount(const char *name, int flags)
-{
- int lookup_flags = LOOKUP_MOUNTPOINT;
- struct path path;
- int ret;
-
- if (!(flags & UMOUNT_NOFOLLOW))
- lookup_flags |= LOOKUP_FOLLOW;
- ret = kern_path(name, lookup_flags, &path);
- if (ret)
- return ret;
- return path_umount(&path, flags);
-}
-
int __init init_chdir(const char *filename)
{
struct path path;
diff --git a/fs/internal.h b/fs/internal.h
index ab638d41ab81db..e333b105337a80 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -44,7 +44,6 @@ extern void __init chrdev_init(void);
/*
* fs_context.c
*/
-extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);
diff --git a/fs/locks.c b/fs/locks.c
index e75c8084d937be..a684b367e18b7c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -178,7 +178,6 @@ locks_get_lock_context(struct inode *inode, int type)
{
struct file_lock_context *ctx;
- /* paired with cmpxchg() below */
ctx = locks_inode_context(inode);
if (likely(ctx) || type == F_UNLCK)
goto out;
@@ -196,7 +195,18 @@ locks_get_lock_context(struct inode *inode, int type)
* Assign the pointer if it's not already assigned. If it is, then
* free the context we just allocated.
*/
- if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_opflags & IOP_FLCTX)) {
+ VFS_BUG_ON_INODE(inode->i_flctx, inode);
+ WRITE_ONCE(inode->i_flctx, ctx);
+ /*
+ * Paired with locks_inode_context().
+ */
+ smp_store_release(&inode->i_opflags, inode->i_opflags | IOP_FLCTX);
+ spin_unlock(&inode->i_lock);
+ } else {
+ VFS_BUG_ON_INODE(!inode->i_flctx, inode);
+ spin_unlock(&inode->i_lock);
kmem_cache_free(flctx_cache, ctx);
ctx = locks_inode_context(inode);
}
diff --git a/fs/namei.c b/fs/namei.c
index bf0f66f0e9b92c..aefb21bc0944e3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4279,19 +4279,16 @@ static int may_o_create(struct mnt_idmap *idmap,
*
* Returns an error code otherwise.
*/
-static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
+static struct dentry *atomic_open(const struct path *path, struct dentry *dentry,
struct file *file,
int open_flag, umode_t mode)
{
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
- struct inode *dir = nd->path.dentry->d_inode;
+ struct inode *dir = path->dentry->d_inode;
int error;
- if (nd->flags & LOOKUP_DIRECTORY)
- open_flag |= O_DIRECTORY;
-
file->__f_path.dentry = DENTRY_NOT_SET;
- file->__f_path.mnt = nd->path.mnt;
+ file->__f_path.mnt = path->mnt;
error = dir->i_op->atomic_open(dir, dentry, file,
open_to_namei_flags(open_flag), mode);
d_lookup_done(dentry);
@@ -4403,7 +4400,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (create_error)
open_flag &= ~O_CREAT;
if (dir_inode->i_op->atomic_open) {
- dentry = atomic_open(nd, dentry, file, open_flag, mode);
+ if (nd->flags & LOOKUP_DIRECTORY)
+ open_flag |= O_DIRECTORY;
+ dentry = atomic_open(&nd->path, dentry, file, open_flag, mode);
if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
dentry = ERR_PTR(create_error);
return dentry;
@@ -4937,6 +4936,73 @@ inline struct dentry *start_creating_user_path(
}
EXPORT_SYMBOL(start_creating_user_path);
+/**
+ * dentry_create - Create and open a file
+ * @path: path to create
+ * @flags: O_ flags
+ * @mode: mode bits for new file
+ * @cred: credentials to use
+ *
+ * Caller must hold the parent directory's lock, and have prepared
+ * a negative dentry, placed in @path->dentry, for the new file.
+ *
+ * Caller sets @path->mnt to the vfsmount of the filesystem where
+ * the new file is to be created. The parent directory and the
+ * negative dentry must reside on the same filesystem instance.
+ *
+ * On success, returns a "struct file *". Otherwise a ERR_PTR
+ * is returned.
+ */
+struct file *dentry_create(struct path *path, int flags, umode_t mode,
+ const struct cred *cred)
+{
+ struct file *file __free(fput) = NULL;
+ struct dentry *dentry = path->dentry;
+ struct dentry *dir = dentry->d_parent;
+ struct inode *dir_inode = d_inode(dir);
+ struct mnt_idmap *idmap;
+ int error, create_error;
+
+ file = alloc_empty_file(flags, cred);
+ if (IS_ERR(file))
+ return file;
+
+ idmap = mnt_idmap(path->mnt);
+
+ if (dir_inode->i_op->atomic_open) {
+ path->dentry = dir;
+ mode = vfs_prepare_mode(idmap, dir_inode, mode, S_IALLUGO, S_IFREG);
+
+ create_error = may_o_create(idmap, path, dentry, mode);
+ if (create_error)
+ flags &= ~O_CREAT;
+
+ dentry = atomic_open(path, dentry, file, flags, mode);
+ error = PTR_ERR_OR_ZERO(dentry);
+
+ if (unlikely(create_error) && error == -ENOENT)
+ error = create_error;
+
+ if (!error) {
+ if (file->f_mode & FMODE_CREATED)
+ fsnotify_create(dir->d_inode, dentry);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
+ }
+
+ path->dentry = dentry;
+
+ } else {
+ error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL);
+ if (!error)
+ error = vfs_open(path, file);
+ }
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ return no_free_ptr(file);
+}
+EXPORT_SYMBOL(dentry_create);
/**
* vfs_mknod - create device node or file
diff --git a/fs/namespace.c b/fs/namespace.c
index c58674a20cad54..ec3b16fedd9f29 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5547,31 +5547,49 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
/* locks: namespace_shared */
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
- struct mnt_namespace *ns)
+ struct file *mnt_file, struct mnt_namespace *ns)
{
- struct mount *m;
int err;
- /* Has the namespace already been emptied? */
- if (mnt_ns_id && mnt_ns_empty(ns))
- return -ENOENT;
+ if (mnt_file) {
+ WARN_ON_ONCE(ns != NULL);
- s->mnt = lookup_mnt_in_ns(mnt_id, ns);
- if (!s->mnt)
- return -ENOENT;
+ s->mnt = mnt_file->f_path.mnt;
+ ns = real_mount(s->mnt)->mnt_ns;
+ if (!ns)
+ /*
+ * We can't set mount point and mnt_ns_id since we don't have a
+ * ns for the mount. This can happen if the mount is unmounted
+ * with MNT_DETACH.
+ */
+ s->mask &= ~(STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID);
+ } else {
+ /* Has the namespace already been emptied? */
+ if (mnt_ns_id && mnt_ns_empty(ns))
+ return -ENOENT;
- err = grab_requested_root(ns, &s->root);
- if (err)
- return err;
+ s->mnt = lookup_mnt_in_ns(mnt_id, ns);
+ if (!s->mnt)
+ return -ENOENT;
+ }
- /*
- * Don't trigger audit denials. We just want to determine what
- * mounts to show users.
- */
- m = real_mount(s->mnt);
- if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
- !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
- return -EPERM;
+ if (ns) {
+ err = grab_requested_root(ns, &s->root);
+ if (err)
+ return err;
+
+ if (!mnt_file) {
+ struct mount *m;
+ /*
+ * Don't trigger audit denials. We just want to determine what
+ * mounts to show users.
+ */
+ m = real_mount(s->mnt);
+ if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+ }
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
@@ -5693,7 +5711,7 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
}
static int copy_mnt_id_req(const struct mnt_id_req __user *req,
- struct mnt_id_req *kreq)
+ struct mnt_id_req *kreq, unsigned int flags)
{
int ret;
size_t usize;
@@ -5711,11 +5729,17 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
if (ret)
return ret;
- if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
- return -EINVAL;
- /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
- if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
- return -EINVAL;
+
+ if (flags & STATMOUNT_BY_FD) {
+ if (kreq->mnt_id || kreq->mnt_ns_id)
+ return -EINVAL;
+ } else {
+ if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
+ return -EINVAL;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
+ }
return 0;
}
@@ -5762,25 +5786,33 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
{
struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct kstatmount *ks __free(kfree) = NULL;
+ struct file *mnt_file __free(fput) = NULL;
struct mnt_id_req kreq;
/* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX;
int ret;
- if (flags)
+ if (flags & ~STATMOUNT_BY_FD)
return -EINVAL;
- ret = copy_mnt_id_req(req, &kreq);
+ ret = copy_mnt_id_req(req, &kreq, flags);
if (ret)
return ret;
- ns = grab_requested_mnt_ns(&kreq);
- if (IS_ERR(ns))
- return PTR_ERR(ns);
+ if (flags & STATMOUNT_BY_FD) {
+ mnt_file = fget_raw(kreq.mnt_fd);
+ if (!mnt_file)
+ return -EBADF;
+ /* do_statmount sets ns in case of STATMOUNT_BY_FD */
+ } else {
+ ns = grab_requested_mnt_ns(&kreq);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
- if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
- !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
- return -ENOENT;
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ }
ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
if (!ks)
@@ -5792,7 +5824,7 @@ retry:
return ret;
scoped_guard(namespace_shared)
- ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
+ ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, mnt_file, ns);
if (!ret)
ret = copy_statmount_to_user(ks);
@@ -5932,7 +5964,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT;
- ret = copy_mnt_id_req(req, &kreq);
+ ret = copy_mnt_id_req(req, &kreq, 0);
if (ret)
return ret;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b748009175837c..6aa22b3b2f4394 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -194,7 +194,7 @@ static inline bool nfsd4_create_is_exclusive(int createmode)
}
static __be32
-nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
+nfsd4_vfs_create(struct svc_fh *fhp, struct dentry **child,
struct nfsd4_open *open)
{
struct file *filp;
@@ -202,6 +202,9 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
int oflags;
oflags = O_CREAT | O_LARGEFILE;
+ if (nfsd4_create_is_exclusive(open->op_createmode))
+ oflags |= O_EXCL;
+
switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
case NFS4_SHARE_ACCESS_WRITE:
oflags |= O_WRONLY;
@@ -214,9 +217,11 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
}
path.mnt = fhp->fh_export->ex_path.mnt;
- path.dentry = child;
+ path.dentry = *child;
filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
current_cred());
+ *child = path.dentry;
+
if (IS_ERR(filp))
return nfserrno(PTR_ERR(filp));
@@ -350,7 +355,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = fh_fill_pre_attrs(fhp);
if (status != nfs_ok)
goto out;
- status = nfsd4_vfs_create(fhp, child, open);
+ status = nfsd4_vfs_create(fhp, &child, open);
if (status != nfs_ok)
goto out;
open->op_created = true;
diff --git a/fs/open.c b/fs/open.c
index f328622061c56c..74c4c1462b3e47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1142,45 +1142,6 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
}
/**
- * dentry_create - Create and open a file
- * @path: path to create
- * @flags: O_ flags
- * @mode: mode bits for new file
- * @cred: credentials to use
- *
- * Caller must hold the parent directory's lock, and have prepared
- * a negative dentry, placed in @path->dentry, for the new file.
- *
- * Caller sets @path->mnt to the vfsmount of the filesystem where
- * the new file is to be created. The parent directory and the
- * negative dentry must reside on the same filesystem instance.
- *
- * On success, returns a "struct file *". Otherwise a ERR_PTR
- * is returned.
- */
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
- const struct cred *cred)
-{
- struct file *f;
- int error;
-
- f = alloc_empty_file(flags, cred);
- if (IS_ERR(f))
- return f;
-
- error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL);
- if (!error)
- error = vfs_open(path, f);
-
- if (unlikely(error)) {
- fput(f);
- return ERR_PTR(error);
- }
- return f;
-}
-EXPORT_SYMBOL(dentry_create);
-
-/**
* kernel_file_open - open a file for kernel internal use
* @path: path of the file to open
* @flags: open flags
diff --git a/fs/splice.c b/fs/splice.c
index d338fe56b50b31..5fb07c01936fdf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1467,7 +1467,7 @@ static ssize_t iter_to_pipe(struct iov_iter *from,
n = DIV_ROUND_UP(left + start, PAGE_SIZE);
for (i = 0; i < n; i++) {
- int size = min_t(int, left, PAGE_SIZE - start);
+ int size = umin(left, PAGE_SIZE - start);
buf.page = pages[i];
buf.offset = start;
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 2f5e5588ee0733..d2c9740e26a8eb 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -242,7 +242,14 @@ bool locks_owner_has_blockers(struct file_lock_context *flctx,
static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
- return smp_load_acquire(&inode->i_flctx);
+ /*
+ * Paired with smp_store_release in locks_get_lock_context().
+ *
+ * Ensures ->i_flctx will be visible if we spotted the flag.
+ */
+ if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX)))
+ return NULL;
+ return READ_ONCE(inode->i_flctx);
}
#else /* !CONFIG_FILE_LOCKING */
@@ -469,7 +476,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
* could end up racing with tasks trying to set a new lease on this
* file.
*/
- flctx = READ_ONCE(inode->i_flctx);
+ flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
@@ -488,7 +495,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags)
* could end up racing with tasks trying to set a new lease on this
* file.
*/
- flctx = READ_ONCE(inode->i_flctx);
+ flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
@@ -533,8 +540,11 @@ static inline int break_deleg_wait(struct delegated_inode *di)
static inline int break_layout(struct inode *inode, bool wait)
{
+ struct file_lock_context *flctx;
+
smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) {
+ flctx = locks_inode_context(inode);
+ if (flctx && !list_empty_careful(&flctx->flc_lease)) {
unsigned int flags = LEASE_BREAK_LAYOUT;
if (!wait)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5c9cf28c4dcf9..eb51e2a9b78c5e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_MGTIME 0x0020
#define IOP_CACHED_LINK 0x0040
#define IOP_FASTPERM_MAY_EXEC 0x0080
+#define IOP_FLCTX 0x0100
/*
* Inode state bits. Protected by inode->i_lock
@@ -2274,8 +2275,6 @@ struct file_system_type {
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
- struct dentry *(*mount) (struct file_system_type *, int,
- const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
@@ -2457,7 +2456,7 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
struct file *dentry_open_nonotify(const struct path *path, int flags,
const struct cred *cred);
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+struct file *dentry_create(struct path *path, int flags, umode_t mode,
const struct cred *cred);
const struct path *backing_file_user_path(const struct file *f);
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
index 6bd3009e09b3b8..4bb9981af6acc6 100644
--- a/include/linux/fs/super_types.h
+++ b/include/linux/fs/super_types.h
@@ -96,7 +96,6 @@ struct super_operations {
const void *owner);
int (*unfreeze_fs)(struct super_block *sb);
int (*statfs)(struct dentry *dentry, struct kstatfs *kstatfs);
- int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin)(struct super_block *sb);
int (*show_options)(struct seq_file *seq, struct dentry *dentry);
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 92045d18cbfc99..0bdbc458a881a2 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -2,7 +2,6 @@
int __init init_mount(const char *dev_name, const char *dir_name,
const char *type_page, unsigned long flags, void *data_page);
-int __init init_umount(const char *name, int flags);
int __init init_chdir(const char *filename);
int __init init_chroot(const char *filename);
int __init init_chown(const char *filename, uid_t user, gid_t group, int flags);
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index f1a1f4c92ded39..7e5d26c8136f19 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -3,8 +3,6 @@
#ifndef __LINUX_INITRD_H
#define __LINUX_INITRD_H
-#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
-
/* starting block # of image */
extern int rd_image_start;
diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
index b332b019b29cb8..0014fbc1c62692 100644
--- a/include/linux/ns/ns_common_types.h
+++ b/include/linux/ns/ns_common_types.h
@@ -108,11 +108,13 @@ extern const struct proc_ns_operations utsns_operations;
* @ns_tree: namespace tree nodes and active reference count
*/
struct ns_common {
+ struct {
+ refcount_t __ns_ref; /* do not use directly */
+ } ____cacheline_aligned_in_smp;
u32 ns_type;
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
- refcount_t __ns_ref; /* do not use directly */
union {
struct ns_tree;
struct rcu_head ns_rcu;
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 5d3f8c9e3a6256..18c62440526888 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -197,7 +197,10 @@ struct statmount {
*/
struct mnt_id_req {
__u32 size;
- __u32 mnt_ns_fd;
+ union {
+ __u32 mnt_ns_fd;
+ __u32 mnt_fd;
+ };
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
@@ -232,4 +235,9 @@ struct mnt_id_req {
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+/*
+ * @flag bits for statmount(2)
+ */
+#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */
+
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 63d1464cb71c86..1c7fe0f4dca482 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -92,7 +92,6 @@ enum
KERN_DOMAINNAME=8, /* string: domainname */
KERN_PANIC=15, /* int: panic timeout */
- KERN_REALROOTDEV=16, /* real root device to mount after initrd */
KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index defbbf1d55f768..9c8a547075a7bc 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -34,13 +34,6 @@ static int root_wait;
dev_t ROOT_DEV;
-static int __init load_ramdisk(char *str)
-{
- pr_warn("ignoring the deprecated load_ramdisk= option\n");
- return 1;
-}
-__setup("load_ramdisk=", load_ramdisk);
-
static int __init readonly(char *str)
{
if (*str)
@@ -484,13 +477,11 @@ void __init prepare_namespace(void)
if (saved_root_name[0])
ROOT_DEV = parse_root_device(saved_root_name);
- if (initrd_load(saved_root_name))
- goto out;
+ initrd_load();
if (root_wait)
wait_for_root(saved_root_name);
mount_root(saved_root_name);
-out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
init_chroot(".");
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 6069ea3eb80d70..a386ee5314c952 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -23,25 +23,15 @@ static inline __init int create_dev(char *name, dev_t dev)
}
#ifdef CONFIG_BLK_DEV_RAM
-
-int __init rd_load_disk(int n);
-int __init rd_load_image(char *from);
-
+int __init rd_load_image(void);
#else
-
-static inline int rd_load_disk(int n) { return 0; }
-static inline int rd_load_image(char *from) { return 0; }
-
+static inline int rd_load_image(void) { return 0; }
#endif
#ifdef CONFIG_BLK_DEV_INITRD
-bool __init initrd_load(char *root_device_name);
+void __init initrd_load(void);
#else
-static inline bool initrd_load(char *root_device_name)
-{
- return false;
- }
-
+static inline void initrd_load(void) { }
#endif
/* Ensure that async file closing finished to prevent spurious errors. */
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index f6867bad0d782c..892e69ab41c446 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -2,45 +2,20 @@
#include <linux/unistd.h>
#include <linux/kernel.h>
#include <linux/fs.h>
-#include <linux/minix_fs.h>
-#include <linux/romfs_fs.h>
#include <linux/initrd.h>
-#include <linux/sched.h>
-#include <linux/freezer.h>
-#include <linux/kmod.h>
-#include <uapi/linux/mount.h>
#include "do_mounts.h"
unsigned long initrd_start, initrd_end;
int initrd_below_start_ok;
-static unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
static int __initdata mount_initrd = 1;
phys_addr_t phys_initrd_start __initdata;
unsigned long phys_initrd_size __initdata;
-#ifdef CONFIG_SYSCTL
-static const struct ctl_table kern_do_mounts_initrd_table[] = {
- {
- .procname = "real-root-dev",
- .data = &real_root_dev,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-};
-
-static __init int kernel_do_mounts_initrd_sysctls_init(void)
-{
- register_sysctl_init("kernel", kern_do_mounts_initrd_table);
- return 0;
-}
-late_initcall(kernel_do_mounts_initrd_sysctls_init);
-#endif /* CONFIG_SYSCTL */
-
static int __init no_initrd(char *str)
{
+ pr_warn("noinitrd option is deprecated and will be removed soon\n");
mount_initrd = 0;
return 1;
}
@@ -70,85 +45,19 @@ static int __init early_initrd(char *p)
}
early_param("initrd", early_initrd);
-static int __init init_linuxrc(struct subprocess_info *info, struct cred *new)
-{
- ksys_unshare(CLONE_FS | CLONE_FILES);
- console_on_rootfs();
- /* move initrd over / and chdir/chroot in initrd root */
- init_chdir("/root");
- init_mount(".", "/", NULL, MS_MOVE, NULL);
- init_chroot(".");
- ksys_setsid();
- return 0;
-}
-
-static void __init handle_initrd(char *root_device_name)
-{
- struct subprocess_info *info;
- static char *argv[] = { "linuxrc", NULL, };
- extern char *envp_init[];
- int error;
-
- pr_warn("using deprecated initrd support, will be removed soon.\n");
-
- real_root_dev = new_encode_dev(ROOT_DEV);
- create_dev("/dev/root.old", Root_RAM0);
- /* mount initrd on rootfs' /root */
- mount_root_generic("/dev/root.old", root_device_name,
- root_mountflags & ~MS_RDONLY);
- init_mkdir("/old", 0700);
- init_chdir("/old");
-
- info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
- GFP_KERNEL, init_linuxrc, NULL, NULL);
- if (!info)
- return;
- call_usermodehelper_exec(info, UMH_WAIT_PROC|UMH_FREEZABLE);
-
- /* move initrd to rootfs' /old */
- init_mount("..", ".", NULL, MS_MOVE, NULL);
- /* switch root and cwd back to / of rootfs */
- init_chroot("..");
-
- if (new_decode_dev(real_root_dev) == Root_RAM0) {
- init_chdir("/old");
- return;
- }
-
- init_chdir("/");
- ROOT_DEV = new_decode_dev(real_root_dev);
- mount_root(root_device_name);
-
- printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
- error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
- if (!error)
- printk("okay\n");
- else {
- if (error == -ENOENT)
- printk("/initrd does not exist. Ignored.\n");
- else
- printk("failed\n");
- printk(KERN_NOTICE "Unmounting old root\n");
- init_umount("/old", MNT_DETACH);
- }
-}
-
-bool __init initrd_load(char *root_device_name)
+void __init initrd_load(void)
{
if (mount_initrd) {
create_dev("/dev/ram", Root_RAM0);
/*
- * Load the initrd data into /dev/ram0. Execute it as initrd
- * unless /dev/ram0 is supposed to be our actual root device,
- * in that case the ram disk is just set up here, and gets
- * mounted in the normal path.
+ * Load the initrd data into /dev/ram0.
*/
- if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
- init_unlink("/initrd.image");
- handle_initrd(root_device_name);
- return true;
+ if (rd_load_image()) {
+ pr_warn("using deprecated initrd support, will be removed in January 2027; "
+ "use initramfs instead or (as a last resort) /sys/firmware/initrd; "
+ "see section \"Workaround\" in "
+ "https://lore.kernel.org/lkml/20251010094047.3111495-1-safinaskar@gmail.com\n");
}
}
init_unlink("/initrd.image");
- return false;
}
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index eddbe5cb0413a7..48bfab2fc62f91 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -18,17 +18,11 @@
static struct file *in_file, *out_file;
static loff_t in_pos, out_pos;
-static int __init prompt_ramdisk(char *str)
-{
- pr_warn("ignoring the deprecated prompt_ramdisk= option\n");
- return 1;
-}
-__setup("prompt_ramdisk=", prompt_ramdisk);
-
int __initdata rd_image_start; /* starting block # of image */
static int __init ramdisk_start_setup(char *str)
{
+ pr_warn("ramdisk_start= option is deprecated and will be removed soon\n");
return kstrtoint(str, 0, &rd_image_start) == 0;
}
__setup("ramdisk_start=", ramdisk_start_setup);
@@ -183,7 +177,7 @@ static unsigned long nr_blocks(struct file *file)
return i_size_read(inode) >> 10;
}
-int __init rd_load_image(char *from)
+int __init rd_load_image(void)
{
int res = 0;
unsigned long rd_blocks, devblocks, nr_disks;
@@ -197,7 +191,7 @@ int __init rd_load_image(char *from)
if (IS_ERR(out_file))
goto out;
- in_file = filp_open(from, O_RDONLY, 0);
+ in_file = filp_open("/initrd.image", O_RDONLY, 0);
if (IS_ERR(in_file))
goto noclose_input;
@@ -226,10 +220,7 @@ int __init rd_load_image(char *from)
/*
* OK, time to copy in the data
*/
- if (strcmp(from, "/initrd.image") == 0)
- devblocks = nblocks;
- else
- devblocks = nr_blocks(in_file);
+ devblocks = nblocks;
if (devblocks == 0) {
printk(KERN_ERR "RAMDISK: could not determine device size\n");
@@ -273,13 +264,6 @@ out:
return res;
}
-int __init rd_load_disk(int n)
-{
- create_dev("/dev/root", ROOT_DEV);
- create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
- return rd_load_image("/dev/root");
-}
-
static int exit_code;
static int decompress_error;
diff --git a/kernel/pid.c b/kernel/pid.c
index a31771bc89c1a3..f45ae56db7daa8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -159,58 +159,86 @@ void free_pids(struct pid **pids)
free_pid(pids[tmp]);
}
-struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
- size_t set_tid_size)
+struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid,
+ size_t arg_set_tid_size)
{
+ int set_tid[MAX_PID_NS_LEVEL + 1] = {};
+ int pid_max[MAX_PID_NS_LEVEL + 1] = {};
struct pid *pid;
enum pid_type type;
int i, nr;
struct pid_namespace *tmp;
struct upid *upid;
int retval = -ENOMEM;
+ bool retried_preload;
/*
- * set_tid_size contains the size of the set_tid array. Starting at
+ * arg_set_tid_size contains the size of the arg_set_tid array. Starting at
* the most nested currently active PID namespace it tells alloc_pid()
* which PID to set for a process in that most nested PID namespace
- * up to set_tid_size PID namespaces. It does not have to set the PID
- * for a process in all nested PID namespaces but set_tid_size must
+ * up to arg_set_tid_size PID namespaces. It does not have to set the PID
+ * for a process in all nested PID namespaces but arg_set_tid_size must
* never be greater than the current ns->level + 1.
*/
- if (set_tid_size > ns->level + 1)
+ if (arg_set_tid_size > ns->level + 1)
return ERR_PTR(-EINVAL);
+ /*
+ * Prep before we take locks:
+ *
+ * 1. allocate and fill in pid struct
+ */
pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
if (!pid)
return ERR_PTR(retval);
- tmp = ns;
+ get_pid_ns(ns);
pid->level = ns->level;
+ refcount_set(&pid->count, 1);
+ spin_lock_init(&pid->lock);
+ for (type = 0; type < PIDTYPE_MAX; ++type)
+ INIT_HLIST_HEAD(&pid->tasks[type]);
+ init_waitqueue_head(&pid->wait_pidfd);
+ INIT_HLIST_HEAD(&pid->inodes);
- for (i = ns->level; i >= 0; i--) {
- int tid = 0;
- int pid_max = READ_ONCE(tmp->pid_max);
+ /*
+ * 2. perm check checkpoint_restore_ns_capable()
+ *
+ * This stores found pid_max to make sure the used value is the same should
+ * later code need it.
+ */
+ for (tmp = ns, i = ns->level; i >= 0; i--) {
+ pid_max[ns->level - i] = READ_ONCE(tmp->pid_max);
- if (set_tid_size) {
- tid = set_tid[ns->level - i];
+ if (arg_set_tid_size) {
+ int tid = set_tid[ns->level - i] = arg_set_tid[ns->level - i];
retval = -EINVAL;
- if (tid < 1 || tid >= pid_max)
- goto out_free;
+ if (tid < 1 || tid >= pid_max[ns->level - i])
+ goto out_abort;
/*
* Also fail if a PID != 1 is requested and
* no PID 1 exists.
*/
if (tid != 1 && !tmp->child_reaper)
- goto out_free;
+ goto out_abort;
retval = -EPERM;
if (!checkpoint_restore_ns_capable(tmp->user_ns))
- goto out_free;
- set_tid_size--;
+ goto out_abort;
+ arg_set_tid_size--;
}
- idr_preload(GFP_KERNEL);
- spin_lock(&pidmap_lock);
+ tmp = tmp->parent;
+ }
+
+ /*
+ * Prep is done, id allocation goes here:
+ */
+ retried_preload = false;
+ idr_preload(GFP_KERNEL);
+ spin_lock(&pidmap_lock);
+ for (tmp = ns, i = ns->level; i >= 0;) {
+ int tid = set_tid[ns->level - i];
if (tid) {
nr = idr_alloc(&tmp->idr, NULL, tid,
@@ -220,6 +248,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
* alreay in use. Return EEXIST in that case.
*/
if (nr == -ENOSPC)
+
nr = -EEXIST;
} else {
int pid_min = 1;
@@ -235,19 +264,42 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
* a partially initialized PID (see below).
*/
nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
- pid_max, GFP_ATOMIC);
+ pid_max[ns->level - i], GFP_ATOMIC);
+ if (nr == -ENOSPC)
+ nr = -EAGAIN;
}
- spin_unlock(&pidmap_lock);
- idr_preload_end();
- if (nr < 0) {
- retval = (nr == -ENOSPC) ? -EAGAIN : nr;
+ if (unlikely(nr < 0)) {
+ /*
+ * Preload more memory if idr_alloc{,cyclic} failed with -ENOMEM.
+ *
+ * The IDR API only allows us to preload memory for one call, while we may end
+ * up doing several under pidmap_lock with GFP_ATOMIC. The situation may be
+ * salvageable with GFP_KERNEL. But make sure to not loop indefinitely if preload
+ * did not help (the routine unfortunately returns void, so we have no idea
+ * if it got anywhere).
+ *
+ * The lock can be safely dropped and picked up as historically pid allocation
+ * for different namespaces was *not* atomic -- we try to hold on to it the
+ * entire time only for performance reasons.
+ */
+ if (nr == -ENOMEM && !retried_preload) {
+ spin_unlock(&pidmap_lock);
+ idr_preload_end();
+ retried_preload = true;
+ idr_preload(GFP_KERNEL);
+ spin_lock(&pidmap_lock);
+ continue;
+ }
+ retval = nr;
goto out_free;
}
pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
tmp = tmp->parent;
+ i--;
+ retried_preload = false;
}
/*
@@ -257,25 +309,15 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
* is what we have exposed to userspace for a long time and it is
* documented behavior for pid namespaces. So we can't easily
* change it even if there were an error code better suited.
+ *
+ * This can't be done earlier because we need to preserve other
+ * error conditions.
*/
retval = -ENOMEM;
-
- get_pid_ns(ns);
- refcount_set(&pid->count, 1);
- spin_lock_init(&pid->lock);
- for (type = 0; type < PIDTYPE_MAX; ++type)
- INIT_HLIST_HEAD(&pid->tasks[type]);
-
- init_waitqueue_head(&pid->wait_pidfd);
- INIT_HLIST_HEAD(&pid->inodes);
-
- upid = pid->numbers + ns->level;
- idr_preload(GFP_KERNEL);
- spin_lock(&pidmap_lock);
- if (!(ns->pid_allocated & PIDNS_ADDING))
- goto out_unlock;
+ if (unlikely(!(ns->pid_allocated & PIDNS_ADDING)))
+ goto out_free;
pidfs_add_pid(pid);
- for ( ; upid >= pid->numbers; --upid) {
+ for (upid = pid->numbers + ns->level; upid >= pid->numbers; --upid) {
/* Make the PID visible to find_pid_ns. */
idr_replace(&upid->ns->idr, pid, upid->nr);
upid->ns->pid_allocated++;
@@ -286,13 +328,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
return pid;
-out_unlock:
- spin_unlock(&pidmap_lock);
- idr_preload_end();
- put_pid_ns(ns);
-
out_free:
- spin_lock(&pidmap_lock);
while (++i <= ns->level) {
upid = pid->numbers + i;
idr_remove(&upid->ns->idr, upid->nr);
@@ -303,7 +339,10 @@ out_free:
idr_set_cursor(&ns->idr, 0);
spin_unlock(&pidmap_lock);
+ idr_preload_end();
+out_abort:
+ put_pid_ns(ns);
kmem_cache_free(ns->pid_cachep, pid);
return ERR_PTR(retval);
}
diff --git a/rust/helpers/fs.c b/rust/helpers/fs.c
index a75c9676337246..789d60fb8908b9 100644
--- a/rust/helpers/fs.c
+++ b/rust/helpers/fs.c
@@ -6,7 +6,7 @@
#include <linux/fs.h>
-struct file *rust_helper_get_file(struct file *f)
+__rust_helper struct file *rust_helper_get_file(struct file *f)
{
return get_file(f);
}
diff --git a/rust/helpers/pid_namespace.c b/rust/helpers/pid_namespace.c
index f41482bdec9a7c..f46ab779b5279f 100644
--- a/rust/helpers/pid_namespace.c
+++ b/rust/helpers/pid_namespace.c
@@ -3,18 +3,20 @@
#include <linux/pid_namespace.h>
#include <linux/cleanup.h>
-struct pid_namespace *rust_helper_get_pid_ns(struct pid_namespace *ns)
+__rust_helper struct pid_namespace *
+rust_helper_get_pid_ns(struct pid_namespace *ns)
{
return get_pid_ns(ns);
}
-void rust_helper_put_pid_ns(struct pid_namespace *ns)
+__rust_helper void rust_helper_put_pid_ns(struct pid_namespace *ns)
{
put_pid_ns(ns);
}
/* Get a reference on a task's pid namespace. */
-struct pid_namespace *rust_helper_task_get_pid_ns(struct task_struct *task)
+__rust_helper struct pid_namespace *
+rust_helper_task_get_pid_ns(struct task_struct *task)
{
struct pid_namespace *pid_ns;
diff --git a/rust/helpers/poll.c b/rust/helpers/poll.c
index 7e5b1751c2d526..78b3839b50f065 100644
--- a/rust/helpers/poll.c
+++ b/rust/helpers/poll.c
@@ -3,8 +3,9 @@
#include <linux/export.h>
#include <linux/poll.h>
-void rust_helper_poll_wait(struct file *filp, wait_queue_head_t *wait_address,
- poll_table *p)
+__rust_helper void rust_helper_poll_wait(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p)
{
poll_wait(filp, wait_address, p);
}
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index 99e5ad082fb1ee..e1cba4bfd8d915 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -43,19 +43,24 @@
#endif
#endif
-static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
- struct statmount *buf, size_t bufsize,
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint32_t fd,
+ uint64_t mask, struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
.param = mask,
};
- if (mnt_ns_id) {
+ if (flags & STATMOUNT_BY_FD) {
req.size = MNT_ID_REQ_SIZE_VER1;
- req.mnt_ns_id = mnt_ns_id;
+ req.mnt_fd = fd;
+ } else {
+ req.mnt_id = mnt_id;
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 6e53430423d271..a04bcaace12616 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -33,15 +33,24 @@ static const char *const known_fs[] = {
"sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
"vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
-static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
+static struct statmount *statmount_alloc(uint64_t mnt_id, int fd, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
- struct statmount *buf = NULL, *tmp = alloca(bufsize);
+ struct statmount *buf = NULL, *tmp = NULL;
int tofree = 0;
int ret;
+ if (flags & STATMOUNT_BY_FD && fd < 0)
+ return NULL;
+
+ tmp = alloca(bufsize);
+
for (;;) {
- ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
+ if (flags & STATMOUNT_BY_FD)
+ ret = statmount(0, 0, (uint32_t) fd, mask, tmp, bufsize, flags);
+ else
+ ret = statmount(mnt_id, 0, 0, mask, tmp, bufsize, flags);
+
if (ret != -1)
break;
if (tofree)
@@ -237,7 +246,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
- ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@@ -263,7 +272,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
- ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@@ -323,7 +332,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
- ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@@ -375,7 +384,7 @@ static void test_statmount_mnt_point(void)
{
struct statmount *sm;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_POINT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount point: %s\n",
strerror(errno));
@@ -405,7 +414,7 @@ static void test_statmount_mnt_root(void)
assert(last_dir);
last_dir++;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount root: %s\n",
strerror(errno));
@@ -438,7 +447,7 @@ static void test_statmount_fs_type(void)
const char *fs_type;
const char *const *s;
- sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0);
+ sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
if (!sm) {
ksft_test_result_fail("statmount fs type: %s\n",
strerror(errno));
@@ -467,7 +476,7 @@ static void test_statmount_mnt_opts(void)
char *line = NULL;
size_t len = 0;
- sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
0);
if (!sm) {
ksft_test_result_fail("statmount mnt opts: %s\n",
@@ -557,7 +566,7 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
uint32_t start, i;
int ret;
- sm = statmount_alloc(root_id, mask, 0);
+ sm = statmount_alloc(root_id, 0, mask, 0);
if (!sm) {
ksft_test_result_fail("statmount %s: %s\n", name,
strerror(errno));
@@ -586,14 +595,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
- ret = statmount(root_id, 0, mask, sm, exactsize, 0);
+ ret = statmount(root_id, 0, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
- ret = statmount(root_id, 0, mask, sm, shortsize, 0);
+ ret = statmount(root_id, 0, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@@ -658,6 +667,226 @@ static void test_listmount_tree(void)
ksft_test_result_pass("listmount tree\n");
}
+static void test_statmount_by_fd(void)
+{
+ struct statmount *sm = NULL;
+ char tmpdir[] = "/statmount.fd.XXXXXX";
+ const char root[] = "/test";
+ char subdir[PATH_MAX], tmproot[PATH_MAX];
+ int fd;
+
+ if (!mkdtemp(tmpdir)) {
+ ksft_perror("mkdtemp");
+ return;
+ }
+
+ if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
+ ksft_perror("mount");
+ rmdir(tmpdir);
+ return;
+ }
+
+ snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
+ snprintf(tmproot, PATH_MAX, "%s/%s", tmpdir, "chroot");
+
+ if (mkdir(subdir, 0755)) {
+ ksft_perror("mkdir");
+ goto err_tmpdir;
+ }
+
+ if (mount(subdir, subdir, NULL, MS_BIND, 0)) {
+ ksft_perror("mount");
+ goto err_subdir;
+ }
+
+ if (mkdir(tmproot, 0755)) {
+ ksft_perror("mkdir");
+ goto err_subdir;
+ }
+
+ fd = open(subdir, O_PATH);
+ if (fd < 0) {
+ ksft_perror("open");
+ goto err_tmproot;
+ }
+
+ if (chroot(tmproot)) {
+ ksft_perror("chroot");
+ goto err_fd;
+ }
+
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
+ goto err_chroot;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto err_chroot;
+ }
+
+ if (sm->mask & STATMOUNT_MNT_POINT) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in statmount\n");
+ goto err_chroot;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
+ goto err_chroot;
+ }
+
+ if (strcmp(root, sm->str + sm->mnt_root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n",
+ sm->str + sm->mnt_root, root);
+ goto err_chroot;
+ }
+
+ if (chroot(".")) {
+ ksft_perror("chroot");
+ goto out;
+ }
+
+ free(sm);
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
+ goto err_fd;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto out;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT not set in statmount\n");
+ goto out;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
+ goto out;
+ }
+
+ if (strcmp(subdir, sm->str + sm->mnt_point) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_point,"
+ "statmount mnt_point: %s != %s\n", sm->str + sm->mnt_point, subdir);
+ goto out;
+ }
+
+ if (strcmp(root, sm->str + sm->mnt_root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n", sm->str + sm->mnt_root, root);
+ goto out;
+ }
+
+ ksft_test_result_pass("statmount by fd\n");
+ goto out;
+err_chroot:
+ chroot(".");
+out:
+ free(sm);
+err_fd:
+ close(fd);
+err_tmproot:
+ rmdir(tmproot);
+err_subdir:
+ umount2(subdir, MNT_DETACH);
+ rmdir(subdir);
+err_tmpdir:
+ umount2(tmpdir, MNT_DETACH);
+ rmdir(tmpdir);
+}
+
+static void test_statmount_by_fd_unmounted(void)
+{
+ const char root[] = "/test.unmounted";
+ char tmpdir[] = "/statmount.fd.XXXXXX";
+ char subdir[PATH_MAX];
+ int fd;
+ struct statmount *sm = NULL;
+
+ if (!mkdtemp(tmpdir)) {
+ ksft_perror("mkdtemp");
+ return;
+ }
+
+ if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
+ ksft_perror("mount");
+ rmdir(tmpdir);
+ return;
+ }
+
+ snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
+
+ if (mkdir(subdir, 0755)) {
+ ksft_perror("mkdir");
+ goto err_tmpdir;
+ }
+
+ if (mount(subdir, subdir, 0, MS_BIND, NULL)) {
+ ksft_perror("mount");
+ goto err_subdir;
+ }
+
+ fd = open(subdir, O_PATH);
+ if (fd < 0) {
+ ksft_perror("open");
+ goto err_subdir;
+ }
+
+ if (umount2(tmpdir, MNT_DETACH)) {
+ ksft_perror("umount2");
+ goto err_fd;
+ }
+
+ sm = statmount_alloc(0, fd, STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT, STATMOUNT_BY_FD);
+ if (!sm) {
+ ksft_test_result_fail("statmount by fd unmounted: %s\n",
+ strerror(errno));
+ goto err_sm;
+ }
+
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto err_sm;
+ }
+
+ if (sm->mask & STATMOUNT_MNT_POINT) {
+ ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in mask\n");
+ goto err_sm;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in mask\n");
+ goto err_sm;
+ }
+
+ if (strcmp(sm->str + sm->mnt_root, root) != 0) {
+ ksft_test_result_fail("statmount returned incorrect mnt_root,"
+ "statmount mnt_root: %s != %s\n",
+ sm->str + sm->mnt_root, root);
+ goto err_sm;
+ }
+
+ ksft_test_result_pass("statmount by fd on unmounted mount\n");
+err_sm:
+ free(sm);
+err_fd:
+ close(fd);
+err_subdir:
+ umount2(subdir, MNT_DETACH);
+ rmdir(subdir);
+err_tmpdir:
+ umount2(tmpdir, MNT_DETACH);
+ rmdir(tmpdir);
+}
+
#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t))
int main(void)
@@ -669,14 +898,14 @@ int main(void)
ksft_print_header();
- ret = statmount(0, 0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
- ksft_set_plan(15);
+ ksft_set_plan(17);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@@ -693,6 +922,8 @@ int main(void)
test_statmount_string(all_mask, str_off(fs_type), "fs type & all");
test_listmount_tree();
+ test_statmount_by_fd_unmounted();
+ test_statmount_by_fd();
if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index d56d4103182fd9..063d9de4643163 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -102,7 +102,7 @@ static int _test_statmount_mnt_ns_id(void)
if (!root_id)
return NSID_ERROR;
- ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
@@ -128,6 +128,98 @@ static int _test_statmount_mnt_ns_id(void)
return NSID_PASS;
}
+static int _test_statmount_mnt_ns_id_by_fd(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ int ret, fd, mounted = 1, status = NSID_ERROR;
+ char mnt[] = "/statmount.fd.XXXXXX";
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ if (!mkdtemp(mnt)) {
+ ksft_print_msg("statmount by fd mnt ns id mkdtemp: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (mount(mnt, mnt, NULL, MS_BIND, 0)) {
+ ksft_print_msg("statmount by fd mnt ns id mount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto err;
+ }
+
+ fd = open(mnt, O_PATH);
+ if (fd < 0) {
+ ksft_print_msg("statmount by fd mnt ns id open: %s\n", strerror(errno));
+ goto err;
+ }
+
+ ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto out;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ status = NSID_FAIL;
+ goto out;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ status = NSID_SKIP;
+ goto out;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ mounted = 0;
+ if (umount2(mnt, MNT_DETACH)) {
+ ksft_print_msg("statmount by fd mnt ns id umount2: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
+ status = NSID_ERROR;
+ goto out;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ if (sm.mask == STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("unexpected STATMOUNT_MNT_NS_ID in mask\n");
+ status = NSID_FAIL;
+ goto out;
+ }
+
+ status = NSID_PASS;
+out:
+ close(fd);
+ if (mounted)
+ umount2(mnt, MNT_DETACH);
+err:
+ rmdir(mnt);
+ return status;
+}
+
+
static void test_statmount_mnt_ns_id(void)
{
pid_t pid;
@@ -148,6 +240,9 @@ static void test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id_by_fd();
exit(ret);
}
@@ -179,7 +274,7 @@ static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
for (int i = 0; i < nr_mounts; i++) {
struct statmount sm;
- ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ ret = statmount(list[i], mnt_ns_id, 0, STATMOUNT_MNT_NS_ID, &sm,
sizeof(sm), 0);
if (ret < 0) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
@@ -275,7 +370,7 @@ int main(void)
int ret;
ksft_print_header();
- ret = statmount(0, 0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");