summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-03-12 10:39:44 +0100
committerChristian Brauner <brauner@kernel.org>2024-03-13 13:03:09 +0100
commit339f8d58c9d38a98863db5853e73d6e3f1bfe06c (patch)
tree8a68ba5b8d3bc0023b161d8fc175b4a3e7592068
parente9c5263ce16d96311c118111ac779f004be8b473 (diff)
downloadvfs-6.9.pidfs-1.tar.gz
pidfs: remove config optionvfs-6.9.pidfs-1
Enable pidfs unconditionally. There's no real reason not do to it. For 32bit systems we add a simple inode allocation mechanism that still guarantees that userspace can compare processes by inode number which they already do as I found out in [1]. If they also need the uniqueness property that we get by default on 64bit systems they should simply parse the contents of /proc/<pid>/fd/<nr>. On 64bit we don't have to deal with any of this and things are nice and simple. Link: https://github.com/systemd/systemd/pull/31713 [1] Link: https://lore.kernel.org/r/20240312-dingo-sehnlich-b3ecc35c6de7@brauner Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/pidfs.c100
-rw-r--r--include/linux/pid.h6
-rw-r--r--include/linux/pidfs.h1
-rw-r--r--kernel/pid.c6
5 files changed, 51 insertions, 69 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f3dbd84a0e40a0..89fdbefd1075f8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -174,13 +174,6 @@ source "fs/proc/Kconfig"
source "fs/kernfs/Kconfig"
source "fs/sysfs/Kconfig"
-config FS_PID
- bool "Pseudo filesystem for process file descriptors"
- depends on 64BIT
- default y
- help
- Pidfs implements advanced features for process file descriptors.
-
config TMPFS
bool "Tmpfs virtual memory file system support (former shm fs)"
depends on SHMEM
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 8fd71a00be9c0c..5affe74b3330bc 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -16,17 +16,6 @@
#include "internal.h"
-static int pidfd_release(struct inode *inode, struct file *file)
-{
-#ifndef CONFIG_FS_PID
- struct pid *pid = file->private_data;
-
- file->private_data = NULL;
- put_pid(pid);
-#endif
- return 0;
-}
-
#ifdef CONFIG_PROC_FS
/**
* pidfd_show_fdinfo - print information about a pidfd
@@ -120,7 +109,6 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
}
static const struct file_operations pidfs_file_operations = {
- .release = pidfd_release,
.poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
@@ -131,16 +119,48 @@ struct pid *pidfd_pid(const struct file *file)
{
if (file->f_op != &pidfs_file_operations)
return ERR_PTR(-EBADF);
-#ifdef CONFIG_FS_PID
return file_inode(file)->i_private;
-#else
- return file->private_data;
-#endif
}
-#ifdef CONFIG_FS_PID
static struct vfsmount *pidfs_mnt __ro_after_init;
+#if BITS_PER_LONG == 32
+/*
+ * Provide a fallback mechanism for 32-bit systems so processes remain
+ * reliably comparable by inode number even on those systems.
+ */
+static DEFINE_IDA(pidfd_inum_ida);
+
+/*
+ * Inode numbering for pidfs start at RESERVED_PIDS + 1. This avoids
+ * collisions with the root inode which is 1 for pseudo filesystems.
+ */
+static int pidfs_inum(struct pid *pid, unsigned long *ino)
+{
+ int ret;
+
+ ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1,
+ UINT_MAX, GFP_ATOMIC);
+ if (ret < 0)
+ return -ENOSPC;
+
+ *ino = ret;
+ return 0;
+}
+
+static void pidfs_free_inum(unsigned long ino)
+{
+ ida_free(&pidfd_inum_ida, ino);
+}
+#else
+static inline int pidfs_inum(struct pid *pid, unsigned long *ino)
+{
+ *ino = pid->ino;
+ return 0;
+}
+#define pidfs_free_inum(ino) ((void)(ino))
+#endif
+
/*
* The vfs falls back to simple_setattr() if i_op->setattr() isn't
* implemented. Let's reject it completely until we have a clean
@@ -173,6 +193,7 @@ static void pidfs_evict_inode(struct inode *inode)
clear_inode(inode);
put_pid(pid);
+ pidfs_free_inum(inode->i_ino);
}
static const struct super_operations pidfs_sops = {
@@ -183,8 +204,10 @@ static const struct super_operations pidfs_sops = {
static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- return dynamic_dname(buffer, buflen, "pidfd:[%lu]",
- d_inode(dentry)->i_ino);
+ struct inode *inode = d_inode(dentry);
+ struct pid *pid = inode->i_private;
+
+ return dynamic_dname(buffer, buflen, "pidfd:[%llu]", pid->ino);
}
static const struct dentry_operations pidfs_dentry_operations = {
@@ -239,13 +262,13 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
struct file *pidfd_file;
struct path path;
int ret;
+ unsigned long ino;
- /*
- * Inode numbering for pidfs start at RESERVED_PIDS + 1.
- * This avoids collisions with the root inode which is 1
- * for pseudo filesystems.
- */
- ret = path_from_stashed(&pid->stashed, pid->ino, pidfs_mnt,
+ ret = pidfs_inum(pid, &ino);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ ret = path_from_stashed(&pid->stashed, ino, pidfs_mnt,
get_pid(pid), &path);
if (ret < 0)
return ERR_PTR(ret);
@@ -261,30 +284,3 @@ void __init pidfs_init(void)
if (IS_ERR(pidfs_mnt))
panic("Failed to mount pidfs pseudo filesystem");
}
-
-bool is_pidfs_sb(const struct super_block *sb)
-{
- return sb == pidfs_mnt->mnt_sb;
-}
-
-#else /* !CONFIG_FS_PID */
-
-struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
-{
- struct file *pidfd_file;
-
- pidfd_file = anon_inode_getfile("[pidfd]", &pidfs_file_operations, pid,
- flags | O_RDWR);
- if (IS_ERR(pidfd_file))
- return pidfd_file;
-
- get_pid(pid);
- return pidfd_file;
-}
-
-void __init pidfs_init(void) { }
-bool is_pidfs_sb(const struct super_block *sb)
-{
- return false;
-}
-#endif
diff --git a/include/linux/pid.h b/include/linux/pid.h
index c79a0efd02586b..a3aad9b4074cb0 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -45,6 +45,8 @@
* find_pid_ns() using the int nr and struct pid_namespace *ns.
*/
+#define RESERVED_PIDS 300
+
struct upid {
int nr;
struct pid_namespace *ns;
@@ -55,10 +57,8 @@ struct pid
refcount_t count;
unsigned int level;
spinlock_t lock;
-#ifdef CONFIG_FS_PID
struct dentry *stashed;
- unsigned long ino;
-#endif
+ u64 ino;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct hlist_head inodes;
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 40dd325a32a634..75bdf9807802a5 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -4,6 +4,5 @@
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
-bool is_pidfs_sb(const struct super_block *sb);
#endif /* _LINUX_PID_FS_H */
diff --git a/kernel/pid.c b/kernel/pid.c
index 99a0c5eb24b8d6..da76ed1873f707 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -62,17 +62,13 @@ struct pid init_struct_pid = {
int pid_max = PID_MAX_DEFAULT;
-#define RESERVED_PIDS 300
-
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
-#ifdef CONFIG_FS_PID
/*
* Pseudo filesystems start inode numbering after one. We use Reserved
* PIDs as a natural offset.
*/
static u64 pidfs_ino = RESERVED_PIDS;
-#endif
/*
* PID-map pages start out as NULL, they get allocated upon
@@ -280,10 +276,8 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
spin_lock_irq(&pidmap_lock);
if (!(ns->pid_allocated & PIDNS_ADDING))
goto out_unlock;
-#ifdef CONFIG_FS_PID
pid->stashed = NULL;
pid->ino = ++pidfs_ino;
-#endif
for ( ; upid >= pid->numbers; --upid) {
/* Make the PID visible to find_pid_ns. */
idr_replace(&upid->ns->idr, pid, upid->nr);