diff options
author | Andy Lutomirski <luto@amacapital.net> | 2014-11-28 13:29:41 -0800 |
---|---|---|
committer | Andy Lutomirski <luto@amacapital.net> | 2014-12-01 15:29:17 -0800 |
commit | 48ad64d84ee8ae6e3bfaa49155d7c5f6283f2420 (patch) | |
tree | 732e33f5b933f5b3c3beba4c6aa95598628b0bf4 | |
parent | 8a84e01e147f44111988f9d8ccd2eaa30215a0f2 (diff) | |
download | devel-pidns/highpid.tar.gz |
proc, pidns: Add highpidpidns/highpid
Pid reuse is common, which means that it's difficult or impossible
to read information about a pid from /proc without races.
This introduces a second number associated with each (task, pidns)
pair called highpid. Highpid is a 64-bit number, and, barring
extremely unlikely circumstances or outright error, a (highpid, pid)
will never be reused.
With just this change, a program can open /proc/PID/status, read the
"Highpid" field, and confirm that it has the expected value. If the
pid has been reused, then highpid will be different.
The initial implementation is straightforward: highpid is simply a
64-bit counter. If a high-end system can fork every 3 ns (which
would be amazing, given that just allocating a pid requires at
atomic operation), it would take well over 1000 years for highpid to
wrap.
For CRIU's benefit, the next highpid can be set by a privileged
user.
NB: The sysctl stuff only works on 64-bit systems. If the approach
looks good, I'll fix that somehow.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
-rw-r--r-- | fs/proc/array.c | 5 | ||||
-rw-r--r-- | include/linux/pid.h | 2 | ||||
-rw-r--r-- | include/linux/pid_namespace.h | 7 | ||||
-rw-r--r-- | kernel/pid.c | 42 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 23 |
5 files changed, 74 insertions, 5 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index cd3653e4f35c6..f1e0e69d19f9e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, int g; struct fdtable *fdt = NULL; const struct cred *cred; + const struct upid *upid; pid_t ppid, tpid; rcu_read_lock(); @@ -170,12 +171,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } + upid = pid_upid_ns(pid, ns); cred = get_task_cred(p); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" "Ngid:\t%d\n" "Pid:\t%d\n" + "Highpid:\t%llu\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" @@ -183,7 +186,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, get_task_state(p), task_tgid_nr_ns(p, ns), task_numa_group_id(p), - pid_nr_ns(pid, ns), + upid ? upid->nr : 0, upid ? upid->highnr : 0, ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), diff --git a/include/linux/pid.h b/include/linux/pid.h index 23705a53abbaa..ece70b64d04cf 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -51,6 +51,7 @@ struct upid { /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; struct pid_namespace *ns; + u64 highnr; struct hlist_node pid_chain; }; @@ -170,6 +171,7 @@ static inline pid_t pid_nr(struct pid *pid) } pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns); +const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns); pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 1997ffc295a7e..fe414ec750117 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -20,12 +20,19 @@ struct pidmap { struct bsd_acct_struct; +/* We don't want a highpid to ever be a valid pid. */ +#define MIN_HIGHPID ((u64)(1ULL << 32)) + +/* We don't want a highpid to ever look like an error code. */ +#define MAX_HIGHPID ((u64)(-1ULL - 4096)) + struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; struct rcu_head rcu; int last_pid; unsigned int nr_hashed; + atomic64_t next_highpid; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; diff --git a/kernel/pid.c b/kernel/pid.c index 9b9a26698144e..9cdfbb64162bf 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -76,6 +76,7 @@ struct pid_namespace init_pid_ns = { }, .last_pid = 0, .nr_hashed = PIDNS_HASH_ADDING, + .next_highpid = ATOMIC64_INIT(MIN_HIGHPID), .level = 0, .child_reaper = &init_task, .user_ns = &init_user_ns, @@ -291,6 +292,29 @@ void free_pid(struct pid *pid) call_rcu(&pid->rcu, delayed_put_pid); } +static u64 alloc_highpid(struct pid_namespace *ns) +{ + u64 prev, old, new; + u64 nr = atomic64_inc_return(&ns->next_highpid) - 1; + + if (likely(nr >= MIN_HIGHPID && nr <= MAX_HIGHPID)) + return nr; + + /* + * Atomically increase next_highpid to something between + * MIN_HIGHPID + 1 and MAX_HIGHPID + 1 and return new - 1. + */ + prev = nr + 1; + do { + old = prev; + new = old + 1; + if (new < MIN_HIGHPID + 1 || new > MAX_HIGHPID + 1) + new = MIN_HIGHPID + 1; + prev = atomic64_cmpxchg(&ns->next_highpid, old, new); + } while (prev != old); + return new - 1; +} + struct pid *alloc_pid(struct pid_namespace *ns) { struct pid *pid; @@ -312,6 +336,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; + pid->numbers[i].highnr = alloc_highpid(tmp); tmp = tmp->parent; } @@ -492,17 +517,26 @@ struct pid *find_get_pid(pid_t nr) } EXPORT_SYMBOL_GPL(find_get_pid); -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) +const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns) { struct upid *upid; - pid_t nr = 0; if (pid && ns->level <= pid->level) { upid = &pid->numbers[ns->level]; if (upid->ns == ns) - nr = upid->nr; + return upid; } - return nr; + return NULL; +} +EXPORT_SYMBOL_GPL(pid_upid_ns); + +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) +{ + const struct upid *upid = pid_upid_ns(pid, ns); + + if (!upid) + return 0; + return upid->nr; } EXPORT_SYMBOL_GPL(pid_nr_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index db95d8eb761b4..cbbaa14944dd1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -114,6 +114,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); ns->nr_hashed = PIDNS_HASH_ADDING; + atomic64_set(&ns->next_highpid, MIN_HIGHPID); INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); @@ -268,6 +269,22 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); } +static int pid_ns_next_highpid_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct pid_namespace *pid_ns = task_active_pid_ns(current); + struct ctl_table tmp = *table; + + if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + /* This needs to be fixed. */ + BUILD_BUG_ON(sizeof(u64) != sizeof(unsigned long)); + + tmp.data = &pid_ns->next_highpid; + return proc_dointvec(&tmp, write, buffer, lenp, ppos); +} + extern int pid_max; static int zero = 0; static struct ctl_table pid_ns_ctl_table[] = { @@ -279,6 +296,12 @@ static struct ctl_table pid_ns_ctl_table[] = { .extra1 = &zero, .extra2 = &pid_max, }, + { + .procname = "ns_next_highpid", + .maxlen = sizeof(u64), + .mode = 0666, /* permissions are checked in the handler */ + .proc_handler = pid_ns_next_highpid_handler, + }, { } }; static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |