diff options
author | Christian Brauner <brauner@kernel.org> | 2024-03-12 10:39:44 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-03-13 13:03:09 +0100 |
commit | 339f8d58c9d38a98863db5853e73d6e3f1bfe06c (patch) | |
tree | 8a68ba5b8d3bc0023b161d8fc175b4a3e7592068 | |
parent | e9c5263ce16d96311c118111ac779f004be8b473 (diff) | |
download | vfs-6.9.pidfs-1.tar.gz |
pidfs: remove config optionvfs-6.9.pidfs-1
Enable pidfs unconditionally. There's no real reason not do to it.
For 32bit systems we add a simple inode allocation mechanism that still
guarantees that userspace can compare processes by inode number which
they already do as I found out in [1]. If they also need the uniqueness
property that we get by default on 64bit systems they should simply
parse the contents of /proc/<pid>/fd/<nr>. On 64bit we don't have to
deal with any of this and things are nice and simple.
Link: https://github.com/systemd/systemd/pull/31713 [1]
Link: https://lore.kernel.org/r/20240312-dingo-sehnlich-b3ecc35c6de7@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r-- | fs/Kconfig | 7 | ||||
-rw-r--r-- | fs/pidfs.c | 100 | ||||
-rw-r--r-- | include/linux/pid.h | 6 | ||||
-rw-r--r-- | include/linux/pidfs.h | 1 | ||||
-rw-r--r-- | kernel/pid.c | 6 |
5 files changed, 51 insertions, 69 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index f3dbd84a0e40a0..89fdbefd1075f8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -174,13 +174,6 @@ source "fs/proc/Kconfig" source "fs/kernfs/Kconfig" source "fs/sysfs/Kconfig" -config FS_PID - bool "Pseudo filesystem for process file descriptors" - depends on 64BIT - default y - help - Pidfs implements advanced features for process file descriptors. - config TMPFS bool "Tmpfs virtual memory file system support (former shm fs)" depends on SHMEM diff --git a/fs/pidfs.c b/fs/pidfs.c index 8fd71a00be9c0c..5affe74b3330bc 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -16,17 +16,6 @@ #include "internal.h" -static int pidfd_release(struct inode *inode, struct file *file) -{ -#ifndef CONFIG_FS_PID - struct pid *pid = file->private_data; - - file->private_data = NULL; - put_pid(pid); -#endif - return 0; -} - #ifdef CONFIG_PROC_FS /** * pidfd_show_fdinfo - print information about a pidfd @@ -120,7 +109,6 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) } static const struct file_operations pidfs_file_operations = { - .release = pidfd_release, .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, @@ -131,16 +119,48 @@ struct pid *pidfd_pid(const struct file *file) { if (file->f_op != &pidfs_file_operations) return ERR_PTR(-EBADF); -#ifdef CONFIG_FS_PID return file_inode(file)->i_private; -#else - return file->private_data; -#endif } -#ifdef CONFIG_FS_PID static struct vfsmount *pidfs_mnt __ro_after_init; +#if BITS_PER_LONG == 32 +/* + * Provide a fallback mechanism for 32-bit systems so processes remain + * reliably comparable by inode number even on those systems. + */ +static DEFINE_IDA(pidfd_inum_ida); + +/* + * Inode numbering for pidfs start at RESERVED_PIDS + 1. This avoids + * collisions with the root inode which is 1 for pseudo filesystems. + */ +static int pidfs_inum(struct pid *pid, unsigned long *ino) +{ + int ret; + + ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1, + UINT_MAX, GFP_ATOMIC); + if (ret < 0) + return -ENOSPC; + + *ino = ret; + return 0; +} + +static void pidfs_free_inum(unsigned long ino) +{ + ida_free(&pidfd_inum_ida, ino); +} +#else +static inline int pidfs_inum(struct pid *pid, unsigned long *ino) +{ + *ino = pid->ino; + return 0; +} +#define pidfs_free_inum(ino) ((void)(ino)) +#endif + /* * The vfs falls back to simple_setattr() if i_op->setattr() isn't * implemented. Let's reject it completely until we have a clean @@ -173,6 +193,7 @@ static void pidfs_evict_inode(struct inode *inode) clear_inode(inode); put_pid(pid); + pidfs_free_inum(inode->i_ino); } static const struct super_operations pidfs_sops = { @@ -183,8 +204,10 @@ static const struct super_operations pidfs_sops = { static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) { - return dynamic_dname(buffer, buflen, "pidfd:[%lu]", - d_inode(dentry)->i_ino); + struct inode *inode = d_inode(dentry); + struct pid *pid = inode->i_private; + + return dynamic_dname(buffer, buflen, "pidfd:[%llu]", pid->ino); } static const struct dentry_operations pidfs_dentry_operations = { @@ -239,13 +262,13 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) struct file *pidfd_file; struct path path; int ret; + unsigned long ino; - /* - * Inode numbering for pidfs start at RESERVED_PIDS + 1. - * This avoids collisions with the root inode which is 1 - * for pseudo filesystems. - */ - ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt, + ret = pidfs_inum(pid, &ino); + if (ret < 0) + return ERR_PTR(ret); + + ret = path_from_stashed(&pid->stashed, ino, pidfs_mnt, get_pid(pid), &path); if (ret < 0) return ERR_PTR(ret); @@ -261,30 +284,3 @@ void __init pidfs_init(void) if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); } - -bool is_pidfs_sb(const struct super_block *sb) -{ - return sb == pidfs_mnt->mnt_sb; -} - -#else /* !CONFIG_FS_PID */ - -struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) -{ - struct file *pidfd_file; - - pidfd_file = anon_inode_getfile("[pidfd]", &pidfs_file_operations, pid, - flags | O_RDWR); - if (IS_ERR(pidfd_file)) - return pidfd_file; - - get_pid(pid); - return pidfd_file; -} - -void __init pidfs_init(void) { } -bool is_pidfs_sb(const struct super_block *sb) -{ - return false; -} -#endif diff --git a/include/linux/pid.h b/include/linux/pid.h index c79a0efd02586b..a3aad9b4074cb0 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -45,6 +45,8 @@ * find_pid_ns() using the int nr and struct pid_namespace *ns. */ +#define RESERVED_PIDS 300 + struct upid { int nr; struct pid_namespace *ns; @@ -55,10 +57,8 @@ struct pid refcount_t count; unsigned int level; spinlock_t lock; -#ifdef CONFIG_FS_PID struct dentry *stashed; - unsigned long ino; -#endif + u64 ino; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 40dd325a32a634..75bdf9807802a5 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,6 +4,5 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); -bool is_pidfs_sb(const struct super_block *sb); #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/pid.c b/kernel/pid.c index 99a0c5eb24b8d6..da76ed1873f707 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -62,17 +62,13 @@ struct pid init_struct_pid = { int pid_max = PID_MAX_DEFAULT; -#define RESERVED_PIDS 300 - int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#ifdef CONFIG_FS_PID /* * Pseudo filesystems start inode numbering after one. We use Reserved * PIDs as a natural offset. */ static u64 pidfs_ino = RESERVED_PIDS; -#endif /* * PID-map pages start out as NULL, they get allocated upon @@ -280,10 +276,8 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; -#ifdef CONFIG_FS_PID pid->stashed = NULL; pid->ino = ++pidfs_ino; -#endif for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); |