diff options
46 files changed, 685 insertions, 650 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8d0afde7f85a5..f67591615a6adf 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3437,8 +3437,6 @@ Kernel parameters If there are multiple matching configurations changing the same attribute, the last one is used. - load_ramdisk= [RAM] [Deprecated] - lockd.nlm_grace_period=P [NFS] Assign grace period. Format: <integer> @@ -4444,8 +4442,10 @@ Kernel parameters Note that this argument takes precedence over the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option. - noinitrd [RAM] Tells the kernel not to load any configured - initial RAM disk. + noinitrd [Deprecated,RAM] Tells the kernel not to load any configured + initial RAM disk. Currently this parameter applies to + initrd only, not to initramfs. But it applies to both + in EFI mode. nointremap [X86-64,Intel-IOMMU,EARLY] Do not enable interrupt remapping. @@ -5402,8 +5402,6 @@ Kernel parameters Param: <number> - step/bucket size as a power of 2 for statistical time based profiling. - prompt_ramdisk= [RAM] [Deprecated] - prot_virt= [S390] enable hosting protected virtual machines isolated from the hypervisor (if hardware supports that). If enabled, the default kernel base address @@ -5460,7 +5458,7 @@ Kernel parameters ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/admin-guide/blockdev/ramdisk.rst. - ramdisk_start= [RAM] RAM disk image start address + ramdisk_start= [Deprecated,RAM] RAM disk image start address random.trust_cpu=off [KNL,EARLY] Disable trusting the use of the CPU's diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 239da22c4e28f1..bb577fac76a01a 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1235,12 +1235,6 @@ that support this feature. == =========================================================================== -real-root-dev -============= - -See Documentation/admin-guide/initrd.rst. - - reboot-cmd (SPARC only) ======================= diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 04c7691e50e01f..35e97618868b37 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -177,7 +177,6 @@ prototypes:: int (*freeze_fs) (struct super_block *); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); - int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); @@ -201,7 +200,6 @@ sync_fs: read freeze_fs: write unfreeze_fs: write statfs: maybe(read) (see below) -remount_fs: write umount_begin: no show_options: no (namespace_sem) quota_read: no (see below) @@ -226,8 +224,6 @@ file_system_type prototypes:: - struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); void (*kill_sb) (struct super_block *); locking rules: @@ -235,13 +231,9 @@ locking rules: ======= ========= ops may block ======= ========= -mount yes kill_sb yes ======= ========= -->mount() returns ERR_PTR or the root dentry; its superblock should be locked -on return. - ->kill_sb() takes a write-locked superblock, does all shutdown work on it, unlocks and drops the reference. diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst index c99ab1f7fea453..a064234fed5bb9 100644 --- a/Documentation/filesystems/mount_api.rst +++ b/Documentation/filesystems/mount_api.rst @@ -299,8 +299,6 @@ manage the filesystem context. They are as follows: On success it should return 0. In the case of an error, it should return a negative error code. - .. Note:: reconfigure is intended as a replacement for remount_fs. - Filesystem context Security =========================== diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3397937ed838e5..631eee9bdc3384 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -448,11 +448,8 @@ a file off. **mandatory** -->get_sb() is gone. Switch to use of ->mount(). Typically it's just -a matter of switching from calling ``get_sb_``... to ``mount_``... and changing -the function type. If you were doing it manually, just switch from setting -->mnt_root to some pointer to returning that pointer. On errors return -ERR_PTR(...). +->get_sb() and ->mount() are gone. Switch to using the new mount API. See +Documentation/filesystems/mount_api.rst for more details. --- diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 670ba66b60e496..90c357b263fed7 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -94,11 +94,9 @@ functions: The passed struct file_system_type describes your filesystem. When a request is made to mount a filesystem onto a directory in your -namespace, the VFS will call the appropriate mount() method for the -specific filesystem. New vfsmount referring to the tree returned by -->mount() will be attached to the mountpoint, so that when pathname -resolution reaches the mountpoint it will jump into the root of that -vfsmount. +namespace, the VFS will call the appropriate get_tree() method for the +specific filesystem. See Documentation/filesystems/mount_api.rst +for more details. You can see all filesystems that are registered to the kernel in the file /proc/filesystems. @@ -117,8 +115,6 @@ members are defined: int fs_flags; int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; - struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -151,10 +147,6 @@ members are defined: 'struct fs_parameter_spec'. More info in Documentation/filesystems/mount_api.rst. -``mount`` - the method to call when a new instance of this filesystem should - be mounted - ``kill_sb`` the method to call when an instance of this filesystem should be shut down @@ -173,45 +165,6 @@ members are defined: s_lock_key, s_umount_key, s_vfs_rename_key, s_writers_key, i_lock_key, i_mutex_key, invalidate_lock_key, i_mutex_dir_key: lockdep-specific -The mount() method has the following arguments: - -``struct file_system_type *fs_type`` - describes the filesystem, partly initialized by the specific - filesystem code - -``int flags`` - mount flags - -``const char *dev_name`` - the device name we are mounting. - -``void *data`` - arbitrary mount options, usually comes as an ASCII string (see - "Mount Options" section) - -The mount() method must return the root dentry of the tree requested by -caller. An active reference to its superblock must be grabbed and the -superblock must be locked. On failure it should return ERR_PTR(error). - -The arguments match those of mount(2) and their interpretation depends -on filesystem type. E.g. for block filesystems, dev_name is interpreted -as block device name, that device is opened and if it contains a -suitable filesystem image the method creates and initializes struct -super_block accordingly, returning its root dentry to caller. - -->mount() may choose to return a subtree of existing filesystem - it -doesn't have to create a new one. The main result from the caller's -point of view is a reference to dentry at the root of (sub)tree to be -attached; creation of new superblock is a common side effect. - -The most interesting member of the superblock structure that the mount() -method fills in is the "s_op" field. This is a pointer to a "struct -super_operations" which describes the next level of the filesystem -implementation. - -For more information on mounting (and the new mount API), see -Documentation/filesystems/mount_api.rst. - The Superblock Object ===================== @@ -244,7 +197,6 @@ filesystem. The following members are defined: enum freeze_wholder who); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); - int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); @@ -351,10 +303,6 @@ or bottom half). ``statfs`` called when the VFS needs to get filesystem statistics. -``remount_fs`` - called when the filesystem is remounted. This is called with - the kernel lock held - ``umount_begin`` called when the VFS is unmounting a filesystem. diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig index 2227f86100ad25..4d720001c12efe 100644 --- a/arch/arm/configs/neponset_defconfig +++ b/arch/arm/configs/neponset_defconfig @@ -9,7 +9,7 @@ CONFIG_ASSABET_NEPONSET=y CONFIG_ZBOOT_ROM_TEXT=0x80000 CONFIG_ZBOOT_ROM_BSS=0xc1000000 CONFIG_ZBOOT_ROM=y -CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) load_ramdisk=1 prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M" +CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) mem=32M noinitrd initrd=0xc0800000,3M" CONFIG_FPE_NWFPE=y CONFIG_PM=y CONFIG_MODULES=y diff --git a/fs/buffer.c b/fs/buffer.c index 838c0c5710229e..fd53b806ab7eb2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2354,7 +2354,7 @@ bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count) if (!head) return false; blocksize = head->b_size; - to = min_t(unsigned, folio_size(folio) - from, count); + to = min(folio_size(folio) - from, count); to = from + to; if (from < blocksize && to > folio_size(folio) - blocksize) return false; @@ -2948,6 +2948,10 @@ bool try_to_free_buffers(struct folio *folio) if (folio_test_writeback(folio)) return false; + /* Misconfigured folio check */ + if (WARN_ON_ONCE(!folio_buffers(folio))) + return true; + if (mapping == NULL) { /* can this still happen? */ ret = drop_buffers(folio, &buffers_to_free); goto out; diff --git a/fs/char_dev.c b/fs/char_dev.c index c2ddb998f3c943..84a5a0699373cd 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -343,7 +343,7 @@ void __unregister_chrdev(unsigned int major, unsigned int baseminor, kfree(cd); } -static DEFINE_SPINLOCK(cdev_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(cdev_lock); static struct kobject *cdev_get(struct cdev *p) { diff --git a/fs/exec.c b/fs/exec.c index 9d5ebc9d15b0d9..d0606e53376fb6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -555,7 +555,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) return -E2BIG; while (len > 0) { - unsigned int bytes_to_copy = min_t(unsigned int, len, + unsigned int bytes_to_copy = min(len, min_not_zero(offset_in_page(pos), PAGE_SIZE)); struct page *page; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 56d50fd3310b47..e817a758801dab 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4276,8 +4276,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, * get the corresponding group metadata to work with. * For this we have goto again loop. */ - thisgrp_len = min_t(unsigned int, (unsigned int)len, - EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + thisgrp_len = min(len, EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); clen = EXT4_NUM_B2C(sbi, thisgrp_len); if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 050f26168d9726..76842f0957b524 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1479,7 +1479,7 @@ static void ext4_update_super(struct super_block *sb, /* Update the global fs size fields */ sbi->s_groups_count += flex_gd->count; - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + sbi->s_blockfile_groups = min(sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); /* Update the reserved block counts only once the new group is diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 87205660c5d026..79762c3e0dff33 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4832,7 +4832,7 @@ static int ext4_check_geometry(struct super_block *sb, return -EINVAL; } sbi->s_groups_count = blocks_count; - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + sbi->s_blockfile_groups = min(sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != le32_to_cpu(es->s_inodes_count)) { diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 92b091783966af..8375e7fbc1a551 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1353,7 +1353,7 @@ found: /* Fill the long name slots. */ for (i = 0; i < long_bhs; i++) { - int copy = min_t(int, sb->s_blocksize - offset, size); + int copy = umin(sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); mark_buffer_dirty_inode(bhs[i], dir); offset = 0; @@ -1364,7 +1364,7 @@ found: err = fat_sync_bhs(bhs, long_bhs); if (!err && i < nr_bhs) { /* Fill the short name slot. */ - int copy = min_t(int, sb->s_blocksize - offset, size); + int copy = umin(sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); mark_buffer_dirty_inode(bhs[i], dir); if (IS_DIRSYNC(dir)) diff --git a/fs/fat/file.c b/fs/fat/file.c index 4fc49a614fb8fd..f48435e586c783 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -140,8 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg) if (copy_from_user(&range, user_range, sizeof(range))) return -EFAULT; - range.minlen = max_t(unsigned int, range.minlen, - bdev_discard_granularity(sb->s_bdev)); + range.minlen = max(range.minlen, bdev_discard_granularity(sb->s_bdev)); err = fat_trim_fs(inode, &range); if (err < 0) diff --git a/fs/fs_context.c b/fs/fs_context.c index 93b7ebf8d92795..81ed94f46cac75 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -24,20 +24,6 @@ #include "mount.h" #include "internal.h" -enum legacy_fs_param { - LEGACY_FS_UNSET_PARAMS, - LEGACY_FS_MONOLITHIC_PARAMS, - LEGACY_FS_INDIVIDUAL_PARAMS, -}; - -struct legacy_fs_context { - char *legacy_data; /* Data page for legacy filesystems */ - size_t data_size; - enum legacy_fs_param param_type; -}; - -static int legacy_init_fs_context(struct fs_context *fc); - static const struct constant_table common_set_sb_flag[] = { { "dirsync", SB_DIRSYNC }, { "lazytime", SB_LAZYTIME }, @@ -275,7 +261,6 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, unsigned int sb_flags_mask, enum fs_context_purpose purpose) { - int (*init_fs_context)(struct fs_context *); struct fs_context *fc; int ret = -ENOMEM; @@ -307,12 +292,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, break; } - /* TODO: Make all filesystems support this unconditionally */ - init_fs_context = fc->fs_type->init_fs_context; - if (!init_fs_context) - init_fs_context = legacy_init_fs_context; - - ret = init_fs_context(fc); + ret = fc->fs_type->init_fs_context(fc); if (ret < 0) goto err_fc; fc->need_free = true; @@ -376,8 +356,6 @@ void fc_drop_locked(struct fs_context *fc) deactivate_locked_super(sb); } -static void legacy_fs_context_free(struct fs_context *fc); - /** * vfs_dup_fs_context - Duplicate a filesystem context. * @src_fc: The context to copy. @@ -531,184 +509,6 @@ void put_fs_context(struct fs_context *fc) } EXPORT_SYMBOL(put_fs_context); -/* - * Free the config for a filesystem that doesn't support fs_context. - */ -static void legacy_fs_context_free(struct fs_context *fc) -{ - struct legacy_fs_context *ctx = fc->fs_private; - - if (ctx) { - if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) - kfree(ctx->legacy_data); - kfree(ctx); - } -} - -/* - * Duplicate a legacy config. - */ -static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) -{ - struct legacy_fs_context *ctx; - struct legacy_fs_context *src_ctx = src_fc->fs_private; - - ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) { - ctx->legacy_data = kmemdup(src_ctx->legacy_data, - src_ctx->data_size, GFP_KERNEL); - if (!ctx->legacy_data) { - kfree(ctx); - return -ENOMEM; - } - } - - fc->fs_private = ctx; - return 0; -} - -/* - * Add a parameter to a legacy config. We build up a comma-separated list of - * options. - */ -static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) -{ - struct legacy_fs_context *ctx = fc->fs_private; - unsigned int size = ctx->data_size; - size_t len = 0; - int ret; - - ret = vfs_parse_fs_param_source(fc, param); - if (ret != -ENOPARAM) - return ret; - - if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) - return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); - - switch (param->type) { - case fs_value_is_string: - len = 1 + param->size; - fallthrough; - case fs_value_is_flag: - len += strlen(param->key); - break; - default: - return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported", - param->key); - } - - if (size + len + 2 > PAGE_SIZE) - return invalf(fc, "VFS: Legacy: Cumulative options too large"); - if (strchr(param->key, ',') || - (param->type == fs_value_is_string && - memchr(param->string, ',', param->size))) - return invalf(fc, "VFS: Legacy: Option '%s' contained comma", - param->key); - if (!ctx->legacy_data) { - ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!ctx->legacy_data) - return -ENOMEM; - } - - if (size) - ctx->legacy_data[size++] = ','; - len = strlen(param->key); - memcpy(ctx->legacy_data + size, param->key, len); - size += len; - if (param->type == fs_value_is_string) { - ctx->legacy_data[size++] = '='; - memcpy(ctx->legacy_data + size, param->string, param->size); - size += param->size; - } - ctx->legacy_data[size] = '\0'; - ctx->data_size = size; - ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS; - return 0; -} - -/* - * Add monolithic mount data. - */ -static int legacy_parse_monolithic(struct fs_context *fc, void *data) -{ - struct legacy_fs_context *ctx = fc->fs_private; - - if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) { - pr_warn("VFS: Can't mix monolithic and individual options\n"); - return -EINVAL; - } - - ctx->legacy_data = data; - ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS; - if (!ctx->legacy_data) - return 0; - - if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) - return 0; - return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security); -} - -/* - * Get a mountable root with the legacy mount command. - */ -static int legacy_get_tree(struct fs_context *fc) -{ - struct legacy_fs_context *ctx = fc->fs_private; - struct super_block *sb; - struct dentry *root; - - root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, - fc->source, ctx->legacy_data); - if (IS_ERR(root)) - return PTR_ERR(root); - - sb = root->d_sb; - BUG_ON(!sb); - - fc->root = root; - return 0; -} - -/* - * Handle remount. - */ -static int legacy_reconfigure(struct fs_context *fc) -{ - struct legacy_fs_context *ctx = fc->fs_private; - struct super_block *sb = fc->root->d_sb; - - if (!sb->s_op->remount_fs) - return 0; - - return sb->s_op->remount_fs(sb, &fc->sb_flags, - ctx ? ctx->legacy_data : NULL); -} - -const struct fs_context_operations legacy_fs_context_ops = { - .free = legacy_fs_context_free, - .dup = legacy_fs_context_dup, - .parse_param = legacy_parse_param, - .parse_monolithic = legacy_parse_monolithic, - .get_tree = legacy_get_tree, - .reconfigure = legacy_reconfigure, -}; - -/* - * Initialise a legacy context for a filesystem that doesn't support - * fs_context. - */ -static int legacy_init_fs_context(struct fs_context *fc) -{ - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); - if (!fc->fs_private) - return -ENOMEM; - fc->ops = &legacy_fs_context_ops; - return 0; -} - int parse_monolithic_mount_data(struct fs_context *fc, void *data) { int (*monolithic_mount_data)(struct fs_context *, void *); @@ -757,10 +557,8 @@ int finish_clean_context(struct fs_context *fc) if (fc->phase != FS_CONTEXT_AWAITING_RECONF) return 0; - if (fc->fs_type->init_fs_context) - error = fc->fs_type->init_fs_context(fc); - else - error = legacy_init_fs_context(fc); + error = fc->fs_type->init_fs_context(fc); + if (unlikely(error)) { fc->phase = FS_CONTEXT_FAILED; return error; diff --git a/fs/fsopen.c b/fs/fsopen.c index f645c99204eb06..622ee3926cd5df 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -404,16 +404,6 @@ SYSCALL_DEFINE5(fsconfig, return -EINVAL; fc = fd_file(f)->private_data; - if (fc->ops == &legacy_fs_context_ops) { - switch (cmd) { - case FSCONFIG_SET_BINARY: - case FSCONFIG_SET_PATH: - case FSCONFIG_SET_PATH_EMPTY: - case FSCONFIG_SET_FD: - case FSCONFIG_CMD_CREATE_EXCL: - return -EOPNOTSUPP; - } - } if (_key) { param.key = strndup_user(_key, 256); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6d59cbc877c6ad..a30c8b57d478bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1813,7 +1813,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, goto out_iput; folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset; - nr_bytes = min_t(unsigned, num, folio_size(folio) - folio_offset); + nr_bytes = min(num, folio_size(folio) - folio_offset); nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 01bc894e9c2bae..4f71eb5a9bac85 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1323,10 +1323,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, static inline unsigned int fuse_wr_pages(loff_t pos, size_t len, unsigned int max_pages) { - return min_t(unsigned int, - ((pos + len - 1) >> PAGE_SHIFT) - - (pos >> PAGE_SHIFT) + 1, - max_pages); + return min(((pos + len - 1) >> PAGE_SHIFT) - (pos >> PAGE_SHIFT) + 1, + max_pages); } static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii) @@ -1607,7 +1605,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, struct folio *folio = page_folio(pages[i]); unsigned int offset = start + (folio_page_idx(folio, pages[i]) << PAGE_SHIFT); - unsigned int len = min_t(unsigned int, ret, PAGE_SIZE - start); + unsigned int len = umin(ret, PAGE_SIZE - start); ap->descs[ap->num_folios].offset = offset; ap->descs[ap->num_folios].length = len; diff --git a/fs/init.c b/fs/init.c index e0f5429c0a49d0..9b56ebca8cc6d2 100644 --- a/fs/init.c +++ b/fs/init.c @@ -27,20 +27,6 @@ int __init init_mount(const char *dev_name, const char *dir_name, return ret; } -int __init init_umount(const char *name, int flags) -{ - int lookup_flags = LOOKUP_MOUNTPOINT; - struct path path; - int ret; - - if (!(flags & UMOUNT_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - ret = kern_path(name, lookup_flags, &path); - if (ret) - return ret; - return path_umount(&path, flags); -} - int __init init_chdir(const char *filename) { struct path path; diff --git a/fs/internal.h b/fs/internal.h index ab638d41ab81db..e333b105337a80 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -44,7 +44,6 @@ extern void __init chrdev_init(void); /* * fs_context.c */ -extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); diff --git a/fs/locks.c b/fs/locks.c index e75c8084d937be..a684b367e18b7c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -178,7 +178,6 @@ locks_get_lock_context(struct inode *inode, int type) { struct file_lock_context *ctx; - /* paired with cmpxchg() below */ ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -196,7 +195,18 @@ locks_get_lock_context(struct inode *inode, int type) * Assign the pointer if it's not already assigned. If it is, then * free the context we just allocated. */ - if (cmpxchg(&inode->i_flctx, NULL, ctx)) { + spin_lock(&inode->i_lock); + if (!(inode->i_opflags & IOP_FLCTX)) { + VFS_BUG_ON_INODE(inode->i_flctx, inode); + WRITE_ONCE(inode->i_flctx, ctx); + /* + * Paired with locks_inode_context(). + */ + smp_store_release(&inode->i_opflags, inode->i_opflags | IOP_FLCTX); + spin_unlock(&inode->i_lock); + } else { + VFS_BUG_ON_INODE(!inode->i_flctx, inode); + spin_unlock(&inode->i_lock); kmem_cache_free(flctx_cache, ctx); ctx = locks_inode_context(inode); } diff --git a/fs/namei.c b/fs/namei.c index bf0f66f0e9b92c..aefb21bc0944e3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4279,19 +4279,16 @@ static int may_o_create(struct mnt_idmap *idmap, * * Returns an error code otherwise. */ -static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry, +static struct dentry *atomic_open(const struct path *path, struct dentry *dentry, struct file *file, int open_flag, umode_t mode) { struct dentry *const DENTRY_NOT_SET = (void *) -1UL; - struct inode *dir = nd->path.dentry->d_inode; + struct inode *dir = path->dentry->d_inode; int error; - if (nd->flags & LOOKUP_DIRECTORY) - open_flag |= O_DIRECTORY; - file->__f_path.dentry = DENTRY_NOT_SET; - file->__f_path.mnt = nd->path.mnt; + file->__f_path.mnt = path->mnt; error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode); d_lookup_done(dentry); @@ -4403,7 +4400,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (create_error) open_flag &= ~O_CREAT; if (dir_inode->i_op->atomic_open) { - dentry = atomic_open(nd, dentry, file, open_flag, mode); + if (nd->flags & LOOKUP_DIRECTORY) + open_flag |= O_DIRECTORY; + dentry = atomic_open(&nd->path, dentry, file, open_flag, mode); if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT)) dentry = ERR_PTR(create_error); return dentry; @@ -4937,6 +4936,73 @@ inline struct dentry *start_creating_user_path( } EXPORT_SYMBOL(start_creating_user_path); +/** + * dentry_create - Create and open a file + * @path: path to create + * @flags: O_ flags + * @mode: mode bits for new file + * @cred: credentials to use + * + * Caller must hold the parent directory's lock, and have prepared + * a negative dentry, placed in @path->dentry, for the new file. + * + * Caller sets @path->mnt to the vfsmount of the filesystem where + * the new file is to be created. The parent directory and the + * negative dentry must reside on the same filesystem instance. + * + * On success, returns a "struct file *". Otherwise a ERR_PTR + * is returned. + */ +struct file *dentry_create(struct path *path, int flags, umode_t mode, + const struct cred *cred) +{ + struct file *file __free(fput) = NULL; + struct dentry *dentry = path->dentry; + struct dentry *dir = dentry->d_parent; + struct inode *dir_inode = d_inode(dir); + struct mnt_idmap *idmap; + int error, create_error; + + file = alloc_empty_file(flags, cred); + if (IS_ERR(file)) + return file; + + idmap = mnt_idmap(path->mnt); + + if (dir_inode->i_op->atomic_open) { + path->dentry = dir; + mode = vfs_prepare_mode(idmap, dir_inode, mode, S_IALLUGO, S_IFREG); + + create_error = may_o_create(idmap, path, dentry, mode); + if (create_error) + flags &= ~O_CREAT; + + dentry = atomic_open(path, dentry, file, flags, mode); + error = PTR_ERR_OR_ZERO(dentry); + + if (unlikely(create_error) && error == -ENOENT) + error = create_error; + + if (!error) { + if (file->f_mode & FMODE_CREATED) + fsnotify_create(dir->d_inode, dentry); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); + } + + path->dentry = dentry; + + } else { + error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL); + if (!error) + error = vfs_open(path, file); + } + if (unlikely(error)) + return ERR_PTR(error); + + return no_free_ptr(file); +} +EXPORT_SYMBOL(dentry_create); /** * vfs_mknod - create device node or file diff --git a/fs/namespace.c b/fs/namespace.c index c58674a20cad54..ec3b16fedd9f29 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5547,31 +5547,49 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root) /* locks: namespace_shared */ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, - struct mnt_namespace *ns) + struct file *mnt_file, struct mnt_namespace *ns) { - struct mount *m; int err; - /* Has the namespace already been emptied? */ - if (mnt_ns_id && mnt_ns_empty(ns)) - return -ENOENT; + if (mnt_file) { + WARN_ON_ONCE(ns != NULL); - s->mnt = lookup_mnt_in_ns(mnt_id, ns); - if (!s->mnt) - return -ENOENT; + s->mnt = mnt_file->f_path.mnt; + ns = real_mount(s->mnt)->mnt_ns; + if (!ns) + /* + * We can't set mount point and mnt_ns_id since we don't have a + * ns for the mount. This can happen if the mount is unmounted + * with MNT_DETACH. + */ + s->mask &= ~(STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID); + } else { + /* Has the namespace already been emptied? */ + if (mnt_ns_id && mnt_ns_empty(ns)) + return -ENOENT; - err = grab_requested_root(ns, &s->root); - if (err) - return err; + s->mnt = lookup_mnt_in_ns(mnt_id, ns); + if (!s->mnt) + return -ENOENT; + } - /* - * Don't trigger audit denials. We just want to determine what - * mounts to show users. - */ - m = real_mount(s->mnt); - if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && - !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; + if (ns) { + err = grab_requested_root(ns, &s->root); + if (err) + return err; + + if (!mnt_file) { + struct mount *m; + /* + * Don't trigger audit denials. We just want to determine what + * mounts to show users. + */ + m = real_mount(s->mnt); + if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + } + } err = security_sb_statfs(s->mnt->mnt_root); if (err) @@ -5693,7 +5711,7 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, } static int copy_mnt_id_req(const struct mnt_id_req __user *req, - struct mnt_id_req *kreq) + struct mnt_id_req *kreq, unsigned int flags) { int ret; size_t usize; @@ -5711,11 +5729,17 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize); if (ret) return ret; - if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id) - return -EINVAL; - /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ - if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET) - return -EINVAL; + + if (flags & STATMOUNT_BY_FD) { + if (kreq->mnt_id || kreq->mnt_ns_id) + return -EINVAL; + } else { + if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id) + return -EINVAL; + /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ + if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET) + return -EINVAL; + } return 0; } @@ -5762,25 +5786,33 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, { struct mnt_namespace *ns __free(mnt_ns_release) = NULL; struct kstatmount *ks __free(kfree) = NULL; + struct file *mnt_file __free(fput) = NULL; struct mnt_id_req kreq; /* We currently support retrieval of 3 strings. */ size_t seq_size = 3 * PATH_MAX; int ret; - if (flags) + if (flags & ~STATMOUNT_BY_FD) return -EINVAL; - ret = copy_mnt_id_req(req, &kreq); + ret = copy_mnt_id_req(req, &kreq, flags); if (ret) return ret; - ns = grab_requested_mnt_ns(&kreq); - if (IS_ERR(ns)) - return PTR_ERR(ns); + if (flags & STATMOUNT_BY_FD) { + mnt_file = fget_raw(kreq.mnt_fd); + if (!mnt_file) + return -EBADF; + /* do_statmount sets ns in case of STATMOUNT_BY_FD */ + } else { + ns = grab_requested_mnt_ns(&kreq); + if (IS_ERR(ns)) + return PTR_ERR(ns); - if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && - !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) - return -ENOENT; + if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + } ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT); if (!ks) @@ -5792,7 +5824,7 @@ retry: return ret; scoped_guard(namespace_shared) - ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns); + ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, mnt_file, ns); if (!ret) ret = copy_statmount_to_user(ks); @@ -5932,7 +5964,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids))) return -EFAULT; - ret = copy_mnt_id_req(req, &kreq); + ret = copy_mnt_id_req(req, &kreq, 0); if (ret) return ret; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b748009175837c..6aa22b3b2f4394 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -194,7 +194,7 @@ static inline bool nfsd4_create_is_exclusive(int createmode) } static __be32 -nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, +nfsd4_vfs_create(struct svc_fh *fhp, struct dentry **child, struct nfsd4_open *open) { struct file *filp; @@ -202,6 +202,9 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, int oflags; oflags = O_CREAT | O_LARGEFILE; + if (nfsd4_create_is_exclusive(open->op_createmode)) + oflags |= O_EXCL; + switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_WRITE: oflags |= O_WRONLY; @@ -214,9 +217,11 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, } path.mnt = fhp->fh_export->ex_path.mnt; - path.dentry = child; + path.dentry = *child; filp = dentry_create(&path, oflags, open->op_iattr.ia_mode, current_cred()); + *child = path.dentry; + if (IS_ERR(filp)) return nfserrno(PTR_ERR(filp)); @@ -350,7 +355,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = fh_fill_pre_attrs(fhp); if (status != nfs_ok) goto out; - status = nfsd4_vfs_create(fhp, child, open); + status = nfsd4_vfs_create(fhp, &child, open); if (status != nfs_ok) goto out; open->op_created = true; diff --git a/fs/open.c b/fs/open.c index f328622061c56c..74c4c1462b3e47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1142,45 +1142,6 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, } /** - * dentry_create - Create and open a file - * @path: path to create - * @flags: O_ flags - * @mode: mode bits for new file - * @cred: credentials to use - * - * Caller must hold the parent directory's lock, and have prepared - * a negative dentry, placed in @path->dentry, for the new file. - * - * Caller sets @path->mnt to the vfsmount of the filesystem where - * the new file is to be created. The parent directory and the - * negative dentry must reside on the same filesystem instance. - * - * On success, returns a "struct file *". Otherwise a ERR_PTR - * is returned. - */ -struct file *dentry_create(const struct path *path, int flags, umode_t mode, - const struct cred *cred) -{ - struct file *f; - int error; - - f = alloc_empty_file(flags, cred); - if (IS_ERR(f)) - return f; - - error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL); - if (!error) - error = vfs_open(path, f); - - if (unlikely(error)) { - fput(f); - return ERR_PTR(error); - } - return f; -} -EXPORT_SYMBOL(dentry_create); - -/** * kernel_file_open - open a file for kernel internal use * @path: path of the file to open * @flags: open flags diff --git a/fs/splice.c b/fs/splice.c index d338fe56b50b31..5fb07c01936fdf 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1467,7 +1467,7 @@ static ssize_t iter_to_pipe(struct iov_iter *from, n = DIV_ROUND_UP(left + start, PAGE_SIZE); for (i = 0; i < n; i++) { - int size = min_t(int, left, PAGE_SIZE - start); + int size = umin(left, PAGE_SIZE - start); buf.page = pages[i]; buf.offset = start; diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 2f5e5588ee0733..d2c9740e26a8eb 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -242,7 +242,14 @@ bool locks_owner_has_blockers(struct file_lock_context *flctx, static inline struct file_lock_context * locks_inode_context(const struct inode *inode) { - return smp_load_acquire(&inode->i_flctx); + /* + * Paired with smp_store_release in locks_get_lock_context(). + * + * Ensures ->i_flctx will be visible if we spotted the flag. + */ + if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX))) + return NULL; + return READ_ONCE(inode->i_flctx); } #else /* !CONFIG_FILE_LOCKING */ @@ -469,7 +476,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) * could end up racing with tasks trying to set a new lease on this * file. */ - flctx = READ_ONCE(inode->i_flctx); + flctx = locks_inode_context(inode); if (!flctx) return 0; smp_mb(); @@ -488,7 +495,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) * could end up racing with tasks trying to set a new lease on this * file. */ - flctx = READ_ONCE(inode->i_flctx); + flctx = locks_inode_context(inode); if (!flctx) return 0; smp_mb(); @@ -533,8 +540,11 @@ static inline int break_deleg_wait(struct delegated_inode *di) static inline int break_layout(struct inode *inode, bool wait) { + struct file_lock_context *flctx; + smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) { + flctx = locks_inode_context(inode); + if (flctx && !list_empty_careful(&flctx->flc_lease)) { unsigned int flags = LEASE_BREAK_LAYOUT; if (!wait) diff --git a/include/linux/fs.h b/include/linux/fs.h index f5c9cf28c4dcf9..eb51e2a9b78c5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 #define IOP_FASTPERM_MAY_EXEC 0x0080 +#define IOP_FLCTX 0x0100 /* * Inode state bits. Protected by inode->i_lock @@ -2274,8 +2275,6 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; - struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -2457,7 +2456,7 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); -struct file *dentry_create(const struct path *path, int flags, umode_t mode, +struct file *dentry_create(struct path *path, int flags, umode_t mode, const struct cred *cred); const struct path *backing_file_user_path(const struct file *f); diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index 6bd3009e09b3b8..4bb9981af6acc6 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -96,7 +96,6 @@ struct super_operations { const void *owner); int (*unfreeze_fs)(struct super_block *sb); int (*statfs)(struct dentry *dentry, struct kstatfs *kstatfs); - int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin)(struct super_block *sb); int (*show_options)(struct seq_file *seq, struct dentry *dentry); diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 92045d18cbfc99..0bdbc458a881a2 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -2,7 +2,6 @@ int __init init_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page); -int __init init_umount(const char *name, int flags); int __init init_chdir(const char *filename); int __init init_chroot(const char *filename); int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); diff --git a/include/linux/initrd.h b/include/linux/initrd.h index f1a1f4c92ded39..7e5d26c8136f19 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -3,8 +3,6 @@ #ifndef __LINUX_INITRD_H #define __LINUX_INITRD_H -#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ - /* starting block # of image */ extern int rd_image_start; diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h index b332b019b29cb8..0014fbc1c62692 100644 --- a/include/linux/ns/ns_common_types.h +++ b/include/linux/ns/ns_common_types.h @@ -108,11 +108,13 @@ extern const struct proc_ns_operations utsns_operations; * @ns_tree: namespace tree nodes and active reference count */ struct ns_common { + struct { + refcount_t __ns_ref; /* do not use directly */ + } ____cacheline_aligned_in_smp; u32 ns_type; struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; - refcount_t __ns_ref; /* do not use directly */ union { struct ns_tree; struct rcu_head ns_rcu; diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 5d3f8c9e3a6256..18c62440526888 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -197,7 +197,10 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 mnt_ns_fd; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; @@ -232,4 +235,9 @@ struct mnt_id_req { #define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 63d1464cb71c86..1c7fe0f4dca482 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -92,7 +92,6 @@ enum KERN_DOMAINNAME=8, /* string: domainname */ KERN_PANIC=15, /* int: panic timeout */ - KERN_REALROOTDEV=16, /* real root device to mount after initrd */ KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ diff --git a/init/do_mounts.c b/init/do_mounts.c index defbbf1d55f768..9c8a547075a7bc 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -34,13 +34,6 @@ static int root_wait; dev_t ROOT_DEV; -static int __init load_ramdisk(char *str) -{ - pr_warn("ignoring the deprecated load_ramdisk= option\n"); - return 1; -} -__setup("load_ramdisk=", load_ramdisk); - static int __init readonly(char *str) { if (*str) @@ -484,13 +477,11 @@ void __init prepare_namespace(void) if (saved_root_name[0]) ROOT_DEV = parse_root_device(saved_root_name); - if (initrd_load(saved_root_name)) - goto out; + initrd_load(); if (root_wait) wait_for_root(saved_root_name); mount_root(saved_root_name); -out: devtmpfs_mount(); init_mount(".", "/", NULL, MS_MOVE, NULL); init_chroot("."); diff --git a/init/do_mounts.h b/init/do_mounts.h index 6069ea3eb80d70..a386ee5314c952 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -23,25 +23,15 @@ static inline __init int create_dev(char *name, dev_t dev) } #ifdef CONFIG_BLK_DEV_RAM - -int __init rd_load_disk(int n); -int __init rd_load_image(char *from); - +int __init rd_load_image(void); #else - -static inline int rd_load_disk(int n) { return 0; } -static inline int rd_load_image(char *from) { return 0; } - +static inline int rd_load_image(void) { return 0; } #endif #ifdef CONFIG_BLK_DEV_INITRD -bool __init initrd_load(char *root_device_name); +void __init initrd_load(void); #else -static inline bool initrd_load(char *root_device_name) -{ - return false; - } - +static inline void initrd_load(void) { } #endif /* Ensure that async file closing finished to prevent spurious errors. */ diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index f6867bad0d782c..892e69ab41c446 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -2,45 +2,20 @@ #include <linux/unistd.h> #include <linux/kernel.h> #include <linux/fs.h> -#include <linux/minix_fs.h> -#include <linux/romfs_fs.h> #include <linux/initrd.h> -#include <linux/sched.h> -#include <linux/freezer.h> -#include <linux/kmod.h> -#include <uapi/linux/mount.h> #include "do_mounts.h" unsigned long initrd_start, initrd_end; int initrd_below_start_ok; -static unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ static int __initdata mount_initrd = 1; phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; -#ifdef CONFIG_SYSCTL -static const struct ctl_table kern_do_mounts_initrd_table[] = { - { - .procname = "real-root-dev", - .data = &real_root_dev, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -}; - -static __init int kernel_do_mounts_initrd_sysctls_init(void) -{ - register_sysctl_init("kernel", kern_do_mounts_initrd_table); - return 0; -} -late_initcall(kernel_do_mounts_initrd_sysctls_init); -#endif /* CONFIG_SYSCTL */ - static int __init no_initrd(char *str) { + pr_warn("noinitrd option is deprecated and will be removed soon\n"); mount_initrd = 0; return 1; } @@ -70,85 +45,19 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) -{ - ksys_unshare(CLONE_FS | CLONE_FILES); - console_on_rootfs(); - /* move initrd over / and chdir/chroot in initrd root */ - init_chdir("/root"); - init_mount(".", "/", NULL, MS_MOVE, NULL); - init_chroot("."); - ksys_setsid(); - return 0; -} - -static void __init handle_initrd(char *root_device_name) -{ - struct subprocess_info *info; - static char *argv[] = { "linuxrc", NULL, }; - extern char *envp_init[]; - int error; - - pr_warn("using deprecated initrd support, will be removed soon.\n"); - - real_root_dev = new_encode_dev(ROOT_DEV); - create_dev("/dev/root.old", Root_RAM0); - /* mount initrd on rootfs' /root */ - mount_root_generic("/dev/root.old", root_device_name, - root_mountflags & ~MS_RDONLY); - init_mkdir("/old", 0700); - init_chdir("/old"); - - info = call_usermodehelper_setup("/linuxrc", argv, envp_init, - GFP_KERNEL, init_linuxrc, NULL, NULL); - if (!info) - return; - call_usermodehelper_exec(info, UMH_WAIT_PROC|UMH_FREEZABLE); - - /* move initrd to rootfs' /old */ - init_mount("..", ".", NULL, MS_MOVE, NULL); - /* switch root and cwd back to / of rootfs */ - init_chroot(".."); - - if (new_decode_dev(real_root_dev) == Root_RAM0) { - init_chdir("/old"); - return; - } - - init_chdir("/"); - ROOT_DEV = new_decode_dev(real_root_dev); - mount_root(root_device_name); - - printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); - if (!error) - printk("okay\n"); - else { - if (error == -ENOENT) - printk("/initrd does not exist. Ignored.\n"); - else - printk("failed\n"); - printk(KERN_NOTICE "Unmounting old root\n"); - init_umount("/old", MNT_DETACH); - } -} - -bool __init initrd_load(char *root_device_name) +void __init initrd_load(void) { if (mount_initrd) { create_dev("/dev/ram", Root_RAM0); /* - * Load the initrd data into /dev/ram0. Execute it as initrd - * unless /dev/ram0 is supposed to be our actual root device, - * in that case the ram disk is just set up here, and gets - * mounted in the normal path. + * Load the initrd data into /dev/ram0. */ - if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { - init_unlink("/initrd.image"); - handle_initrd(root_device_name); - return true; + if (rd_load_image()) { + pr_warn("using deprecated initrd support, will be removed in January 2027; " + "use initramfs instead or (as a last resort) /sys/firmware/initrd; " + "see section \"Workaround\" in " + "https://lore.kernel.org/lkml/20251010094047.3111495-1-safinaskar@gmail.com\n"); } } init_unlink("/initrd.image"); - return false; } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index eddbe5cb0413a7..48bfab2fc62f91 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -18,17 +18,11 @@ static struct file *in_file, *out_file; static loff_t in_pos, out_pos; -static int __init prompt_ramdisk(char *str) -{ - pr_warn("ignoring the deprecated prompt_ramdisk= option\n"); - return 1; -} -__setup("prompt_ramdisk=", prompt_ramdisk); - int __initdata rd_image_start; /* starting block # of image */ static int __init ramdisk_start_setup(char *str) { + pr_warn("ramdisk_start= option is deprecated and will be removed soon\n"); return kstrtoint(str, 0, &rd_image_start) == 0; } __setup("ramdisk_start=", ramdisk_start_setup); @@ -183,7 +177,7 @@ static unsigned long nr_blocks(struct file *file) return i_size_read(inode) >> 10; } -int __init rd_load_image(char *from) +int __init rd_load_image(void) { int res = 0; unsigned long rd_blocks, devblocks, nr_disks; @@ -197,7 +191,7 @@ int __init rd_load_image(char *from) if (IS_ERR(out_file)) goto out; - in_file = filp_open(from, O_RDONLY, 0); + in_file = filp_open("/initrd.image", O_RDONLY, 0); if (IS_ERR(in_file)) goto noclose_input; @@ -226,10 +220,7 @@ int __init rd_load_image(char *from) /* * OK, time to copy in the data */ - if (strcmp(from, "/initrd.image") == 0) - devblocks = nblocks; - else - devblocks = nr_blocks(in_file); + devblocks = nblocks; if (devblocks == 0) { printk(KERN_ERR "RAMDISK: could not determine device size\n"); @@ -273,13 +264,6 @@ out: return res; } -int __init rd_load_disk(int n) -{ - create_dev("/dev/root", ROOT_DEV); - create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n)); - return rd_load_image("/dev/root"); -} - static int exit_code; static int decompress_error; diff --git a/kernel/pid.c b/kernel/pid.c index a31771bc89c1a3..f45ae56db7daa8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -159,58 +159,86 @@ void free_pids(struct pid **pids) free_pid(pids[tmp]); } -struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, - size_t set_tid_size) +struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid, + size_t arg_set_tid_size) { + int set_tid[MAX_PID_NS_LEVEL + 1] = {}; + int pid_max[MAX_PID_NS_LEVEL + 1] = {}; struct pid *pid; enum pid_type type; int i, nr; struct pid_namespace *tmp; struct upid *upid; int retval = -ENOMEM; + bool retried_preload; /* - * set_tid_size contains the size of the set_tid array. Starting at + * arg_set_tid_size contains the size of the arg_set_tid array. Starting at * the most nested currently active PID namespace it tells alloc_pid() * which PID to set for a process in that most nested PID namespace - * up to set_tid_size PID namespaces. It does not have to set the PID - * for a process in all nested PID namespaces but set_tid_size must + * up to arg_set_tid_size PID namespaces. It does not have to set the PID + * for a process in all nested PID namespaces but arg_set_tid_size must * never be greater than the current ns->level + 1. */ - if (set_tid_size > ns->level + 1) + if (arg_set_tid_size > ns->level + 1) return ERR_PTR(-EINVAL); + /* + * Prep before we take locks: + * + * 1. allocate and fill in pid struct + */ pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) return ERR_PTR(retval); - tmp = ns; + get_pid_ns(ns); pid->level = ns->level; + refcount_set(&pid->count, 1); + spin_lock_init(&pid->lock); + for (type = 0; type < PIDTYPE_MAX; ++type) + INIT_HLIST_HEAD(&pid->tasks[type]); + init_waitqueue_head(&pid->wait_pidfd); + INIT_HLIST_HEAD(&pid->inodes); - for (i = ns->level; i >= 0; i--) { - int tid = 0; - int pid_max = READ_ONCE(tmp->pid_max); + /* + * 2. perm check checkpoint_restore_ns_capable() + * + * This stores found pid_max to make sure the used value is the same should + * later code need it. + */ + for (tmp = ns, i = ns->level; i >= 0; i--) { + pid_max[ns->level - i] = READ_ONCE(tmp->pid_max); - if (set_tid_size) { - tid = set_tid[ns->level - i]; + if (arg_set_tid_size) { + int tid = set_tid[ns->level - i] = arg_set_tid[ns->level - i]; retval = -EINVAL; - if (tid < 1 || tid >= pid_max) - goto out_free; + if (tid < 1 || tid >= pid_max[ns->level - i]) + goto out_abort; /* * Also fail if a PID != 1 is requested and * no PID 1 exists. */ if (tid != 1 && !tmp->child_reaper) - goto out_free; + goto out_abort; retval = -EPERM; if (!checkpoint_restore_ns_capable(tmp->user_ns)) - goto out_free; - set_tid_size--; + goto out_abort; + arg_set_tid_size--; } - idr_preload(GFP_KERNEL); - spin_lock(&pidmap_lock); + tmp = tmp->parent; + } + + /* + * Prep is done, id allocation goes here: + */ + retried_preload = false; + idr_preload(GFP_KERNEL); + spin_lock(&pidmap_lock); + for (tmp = ns, i = ns->level; i >= 0;) { + int tid = set_tid[ns->level - i]; if (tid) { nr = idr_alloc(&tmp->idr, NULL, tid, @@ -220,6 +248,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, * alreay in use. Return EEXIST in that case. */ if (nr == -ENOSPC) + nr = -EEXIST; } else { int pid_min = 1; @@ -235,19 +264,42 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, * a partially initialized PID (see below). */ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, - pid_max, GFP_ATOMIC); + pid_max[ns->level - i], GFP_ATOMIC); + if (nr == -ENOSPC) + nr = -EAGAIN; } - spin_unlock(&pidmap_lock); - idr_preload_end(); - if (nr < 0) { - retval = (nr == -ENOSPC) ? -EAGAIN : nr; + if (unlikely(nr < 0)) { + /* + * Preload more memory if idr_alloc{,cyclic} failed with -ENOMEM. + * + * The IDR API only allows us to preload memory for one call, while we may end + * up doing several under pidmap_lock with GFP_ATOMIC. The situation may be + * salvageable with GFP_KERNEL. But make sure to not loop indefinitely if preload + * did not help (the routine unfortunately returns void, so we have no idea + * if it got anywhere). + * + * The lock can be safely dropped and picked up as historically pid allocation + * for different namespaces was *not* atomic -- we try to hold on to it the + * entire time only for performance reasons. + */ + if (nr == -ENOMEM && !retried_preload) { + spin_unlock(&pidmap_lock); + idr_preload_end(); + retried_preload = true; + idr_preload(GFP_KERNEL); + spin_lock(&pidmap_lock); + continue; + } + retval = nr; goto out_free; } pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; tmp = tmp->parent; + i--; + retried_preload = false; } /* @@ -257,25 +309,15 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, * is what we have exposed to userspace for a long time and it is * documented behavior for pid namespaces. So we can't easily * change it even if there were an error code better suited. + * + * This can't be done earlier because we need to preserve other + * error conditions. */ retval = -ENOMEM; - - get_pid_ns(ns); - refcount_set(&pid->count, 1); - spin_lock_init(&pid->lock); - for (type = 0; type < PIDTYPE_MAX; ++type) - INIT_HLIST_HEAD(&pid->tasks[type]); - - init_waitqueue_head(&pid->wait_pidfd); - INIT_HLIST_HEAD(&pid->inodes); - - upid = pid->numbers + ns->level; - idr_preload(GFP_KERNEL); - spin_lock(&pidmap_lock); - if (!(ns->pid_allocated & PIDNS_ADDING)) - goto out_unlock; + if (unlikely(!(ns->pid_allocated & PIDNS_ADDING))) + goto out_free; pidfs_add_pid(pid); - for ( ; upid >= pid->numbers; --upid) { + for (upid = pid->numbers + ns->level; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); upid->ns->pid_allocated++; @@ -286,13 +328,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, return pid; -out_unlock: - spin_unlock(&pidmap_lock); - idr_preload_end(); - put_pid_ns(ns); - out_free: - spin_lock(&pidmap_lock); while (++i <= ns->level) { upid = pid->numbers + i; idr_remove(&upid->ns->idr, upid->nr); @@ -303,7 +339,10 @@ out_free: idr_set_cursor(&ns->idr, 0); spin_unlock(&pidmap_lock); + idr_preload_end(); +out_abort: + put_pid_ns(ns); kmem_cache_free(ns->pid_cachep, pid); return ERR_PTR(retval); } diff --git a/rust/helpers/fs.c b/rust/helpers/fs.c index a75c9676337246..789d60fb8908b9 100644 --- a/rust/helpers/fs.c +++ b/rust/helpers/fs.c @@ -6,7 +6,7 @@ #include <linux/fs.h> -struct file *rust_helper_get_file(struct file *f) +__rust_helper struct file *rust_helper_get_file(struct file *f) { return get_file(f); } diff --git a/rust/helpers/pid_namespace.c b/rust/helpers/pid_namespace.c index f41482bdec9a7c..f46ab779b5279f 100644 --- a/rust/helpers/pid_namespace.c +++ b/rust/helpers/pid_namespace.c @@ -3,18 +3,20 @@ #include <linux/pid_namespace.h> #include <linux/cleanup.h> -struct pid_namespace *rust_helper_get_pid_ns(struct pid_namespace *ns) +__rust_helper struct pid_namespace * +rust_helper_get_pid_ns(struct pid_namespace *ns) { return get_pid_ns(ns); } -void rust_helper_put_pid_ns(struct pid_namespace *ns) +__rust_helper void rust_helper_put_pid_ns(struct pid_namespace *ns) { put_pid_ns(ns); } /* Get a reference on a task's pid namespace. */ -struct pid_namespace *rust_helper_task_get_pid_ns(struct task_struct *task) +__rust_helper struct pid_namespace * +rust_helper_task_get_pid_ns(struct task_struct *task) { struct pid_namespace *pid_ns; diff --git a/rust/helpers/poll.c b/rust/helpers/poll.c index 7e5b1751c2d526..78b3839b50f065 100644 --- a/rust/helpers/poll.c +++ b/rust/helpers/poll.c @@ -3,8 +3,9 @@ #include <linux/export.h> #include <linux/poll.h> -void rust_helper_poll_wait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p) +__rust_helper void rust_helper_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, + poll_table *p) { poll_wait(filp, wait_address, p); } diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index 99e5ad082fb1ee..e1cba4bfd8d915 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -43,19 +43,24 @@ #endif #endif -static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, - struct statmount *buf, size_t bufsize, +static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint32_t fd, + uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) { struct mnt_id_req req = { .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, .param = mask, }; - if (mnt_ns_id) { + if (flags & STATMOUNT_BY_FD) { req.size = MNT_ID_REQ_SIZE_VER1; - req.mnt_ns_id = mnt_ns_id; + req.mnt_fd = fd; + } else { + req.mnt_id = mnt_id; + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } } return syscall(__NR_statmount, &req, buf, bufsize, flags); diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 6e53430423d271..a04bcaace12616 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -33,15 +33,24 @@ static const char *const known_fs[] = { "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; -static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) +static struct statmount *statmount_alloc(uint64_t mnt_id, int fd, uint64_t mask, unsigned int flags) { size_t bufsize = 1 << 15; - struct statmount *buf = NULL, *tmp = alloca(bufsize); + struct statmount *buf = NULL, *tmp = NULL; int tofree = 0; int ret; + if (flags & STATMOUNT_BY_FD && fd < 0) + return NULL; + + tmp = alloca(bufsize); + for (;;) { - ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags); + if (flags & STATMOUNT_BY_FD) + ret = statmount(0, 0, (uint32_t) fd, mask, tmp, bufsize, flags); + else + ret = statmount(mnt_id, 0, 0, mask, tmp, bufsize, flags); + if (ret != -1) break; if (tofree) @@ -237,7 +246,7 @@ static void test_statmount_zero_mask(void) struct statmount sm; int ret; - ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, 0, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount zero mask: %s\n", strerror(errno)); @@ -263,7 +272,7 @@ static void test_statmount_mnt_basic(void) int ret; uint64_t mask = STATMOUNT_MNT_BASIC; - ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount mnt basic: %s\n", strerror(errno)); @@ -323,7 +332,7 @@ static void test_statmount_sb_basic(void) struct statx sx; struct statfs sf; - ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount sb basic: %s\n", strerror(errno)); @@ -375,7 +384,7 @@ static void test_statmount_mnt_point(void) { struct statmount *sm; - sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_POINT, 0); if (!sm) { ksft_test_result_fail("statmount mount point: %s\n", strerror(errno)); @@ -405,7 +414,7 @@ static void test_statmount_mnt_root(void) assert(last_dir); last_dir++; - sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT, 0); if (!sm) { ksft_test_result_fail("statmount mount root: %s\n", strerror(errno)); @@ -438,7 +447,7 @@ static void test_statmount_fs_type(void) const char *fs_type; const char *const *s; - sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); if (!sm) { ksft_test_result_fail("statmount fs type: %s\n", strerror(errno)); @@ -467,7 +476,7 @@ static void test_statmount_mnt_opts(void) char *line = NULL; size_t len = 0; - sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, 0); if (!sm) { ksft_test_result_fail("statmount mnt opts: %s\n", @@ -557,7 +566,7 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) uint32_t start, i; int ret; - sm = statmount_alloc(root_id, mask, 0); + sm = statmount_alloc(root_id, 0, mask, 0); if (!sm) { ksft_test_result_fail("statmount %s: %s\n", name, strerror(errno)); @@ -586,14 +595,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) exactsize = sm->size; shortsize = sizeof(*sm) + i; - ret = statmount(root_id, 0, mask, sm, exactsize, 0); + ret = statmount(root_id, 0, 0, mask, sm, exactsize, 0); if (ret == -1) { ksft_test_result_fail("statmount exact size: %s\n", strerror(errno)); goto out; } errno = 0; - ret = statmount(root_id, 0, mask, sm, shortsize, 0); + ret = statmount(root_id, 0, 0, mask, sm, shortsize, 0); if (ret != -1 || errno != EOVERFLOW) { ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", strerror(errno)); @@ -658,6 +667,226 @@ static void test_listmount_tree(void) ksft_test_result_pass("listmount tree\n"); } +static void test_statmount_by_fd(void) +{ + struct statmount *sm = NULL; + char tmpdir[] = "/statmount.fd.XXXXXX"; + const char root[] = "/test"; + char subdir[PATH_MAX], tmproot[PATH_MAX]; + int fd; + + if (!mkdtemp(tmpdir)) { + ksft_perror("mkdtemp"); + return; + } + + if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) { + ksft_perror("mount"); + rmdir(tmpdir); + return; + } + + snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root); + snprintf(tmproot, PATH_MAX, "%s/%s", tmpdir, "chroot"); + + if (mkdir(subdir, 0755)) { + ksft_perror("mkdir"); + goto err_tmpdir; + } + + if (mount(subdir, subdir, NULL, MS_BIND, 0)) { + ksft_perror("mount"); + goto err_subdir; + } + + if (mkdir(tmproot, 0755)) { + ksft_perror("mkdir"); + goto err_subdir; + } + + fd = open(subdir, O_PATH); + if (fd < 0) { + ksft_perror("open"); + goto err_tmproot; + } + + if (chroot(tmproot)) { + ksft_perror("chroot"); + goto err_fd; + } + + sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD); + if (!sm) { + ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno)); + goto err_chroot; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto err_chroot; + } + + if (sm->mask & STATMOUNT_MNT_POINT) { + ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in statmount\n"); + goto err_chroot; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n"); + goto err_chroot; + } + + if (strcmp(root, sm->str + sm->mnt_root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", + sm->str + sm->mnt_root, root); + goto err_chroot; + } + + if (chroot(".")) { + ksft_perror("chroot"); + goto out; + } + + free(sm); + sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD); + if (!sm) { + ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno)); + goto err_fd; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto out; + } + + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("STATMOUNT_MNT_POINT not set in statmount\n"); + goto out; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n"); + goto out; + } + + if (strcmp(subdir, sm->str + sm->mnt_point) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_point," + "statmount mnt_point: %s != %s\n", sm->str + sm->mnt_point, subdir); + goto out; + } + + if (strcmp(root, sm->str + sm->mnt_root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", sm->str + sm->mnt_root, root); + goto out; + } + + ksft_test_result_pass("statmount by fd\n"); + goto out; +err_chroot: + chroot("."); +out: + free(sm); +err_fd: + close(fd); +err_tmproot: + rmdir(tmproot); +err_subdir: + umount2(subdir, MNT_DETACH); + rmdir(subdir); +err_tmpdir: + umount2(tmpdir, MNT_DETACH); + rmdir(tmpdir); +} + +static void test_statmount_by_fd_unmounted(void) +{ + const char root[] = "/test.unmounted"; + char tmpdir[] = "/statmount.fd.XXXXXX"; + char subdir[PATH_MAX]; + int fd; + struct statmount *sm = NULL; + + if (!mkdtemp(tmpdir)) { + ksft_perror("mkdtemp"); + return; + } + + if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) { + ksft_perror("mount"); + rmdir(tmpdir); + return; + } + + snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root); + + if (mkdir(subdir, 0755)) { + ksft_perror("mkdir"); + goto err_tmpdir; + } + + if (mount(subdir, subdir, 0, MS_BIND, NULL)) { + ksft_perror("mount"); + goto err_subdir; + } + + fd = open(subdir, O_PATH); + if (fd < 0) { + ksft_perror("open"); + goto err_subdir; + } + + if (umount2(tmpdir, MNT_DETACH)) { + ksft_perror("umount2"); + goto err_fd; + } + + sm = statmount_alloc(0, fd, STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT, STATMOUNT_BY_FD); + if (!sm) { + ksft_test_result_fail("statmount by fd unmounted: %s\n", + strerror(errno)); + goto err_sm; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto err_sm; + } + + if (sm->mask & STATMOUNT_MNT_POINT) { + ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in mask\n"); + goto err_sm; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in mask\n"); + goto err_sm; + } + + if (strcmp(sm->str + sm->mnt_root, root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", + sm->str + sm->mnt_root, root); + goto err_sm; + } + + ksft_test_result_pass("statmount by fd on unmounted mount\n"); +err_sm: + free(sm); +err_fd: + close(fd); +err_subdir: + umount2(subdir, MNT_DETACH); + rmdir(subdir); +err_tmpdir: + umount2(tmpdir, MNT_DETACH); + rmdir(tmpdir); +} + #define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t)) int main(void) @@ -669,14 +898,14 @@ int main(void) ksft_print_header(); - ret = statmount(0, 0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); setup_namespace(); - ksft_set_plan(15); + ksft_set_plan(17); test_listmount_empty_root(); test_statmount_zero_mask(); test_statmount_mnt_basic(); @@ -693,6 +922,8 @@ int main(void) test_statmount_string(all_mask, str_off(fs_type), "fs type & all"); test_listmount_tree(); + test_statmount_by_fd_unmounted(); + test_statmount_by_fd(); if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index d56d4103182fd9..063d9de4643163 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -102,7 +102,7 @@ static int _test_statmount_mnt_ns_id(void) if (!root_id) return NSID_ERROR; - ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret == -1) { ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); return NSID_ERROR; @@ -128,6 +128,98 @@ static int _test_statmount_mnt_ns_id(void) return NSID_PASS; } +static int _test_statmount_mnt_ns_id_by_fd(void) +{ + struct statmount sm; + uint64_t mnt_ns_id; + int ret, fd, mounted = 1, status = NSID_ERROR; + char mnt[] = "/statmount.fd.XXXXXX"; + + ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); + if (ret != NSID_PASS) + return ret; + + if (!mkdtemp(mnt)) { + ksft_print_msg("statmount by fd mnt ns id mkdtemp: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (mount(mnt, mnt, NULL, MS_BIND, 0)) { + ksft_print_msg("statmount by fd mnt ns id mount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto err; + } + + fd = open(mnt, O_PATH); + if (fd < 0) { + ksft_print_msg("statmount by fd mnt ns id open: %s\n", strerror(errno)); + goto err; + } + + ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto out; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + status = NSID_FAIL; + goto out; + } + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + status = NSID_SKIP; + goto out; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + status = NSID_FAIL; + goto out; + } + + mounted = 0; + if (umount2(mnt, MNT_DETACH)) { + ksft_print_msg("statmount by fd mnt ns id umount2: %s\n", strerror(errno)); + goto out; + } + + ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto out; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + status = NSID_FAIL; + goto out; + } + + if (sm.mask == STATMOUNT_MNT_NS_ID) { + ksft_print_msg("unexpected STATMOUNT_MNT_NS_ID in mask\n"); + status = NSID_FAIL; + goto out; + } + + status = NSID_PASS; +out: + close(fd); + if (mounted) + umount2(mnt, MNT_DETACH); +err: + rmdir(mnt); + return status; +} + + static void test_statmount_mnt_ns_id(void) { pid_t pid; @@ -148,6 +240,9 @@ static void test_statmount_mnt_ns_id(void) if (ret != NSID_PASS) exit(ret); ret = _test_statmount_mnt_ns_id(); + if (ret != NSID_PASS) + exit(ret); + ret = _test_statmount_mnt_ns_id_by_fd(); exit(ret); } @@ -179,7 +274,7 @@ static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts) for (int i = 0; i < nr_mounts; i++) { struct statmount sm; - ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm, + ret = statmount(list[i], mnt_ns_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret < 0) { ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); @@ -275,7 +370,7 @@ int main(void) int ret; ksft_print_header(); - ret = statmount(0, 0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); |
