aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-11-28 13:29:41 -0800
committerAndy Lutomirski <luto@amacapital.net>2014-12-01 15:29:17 -0800
commit48ad64d84ee8ae6e3bfaa49155d7c5f6283f2420 (patch)
tree732e33f5b933f5b3c3beba4c6aa95598628b0bf4
parent8a84e01e147f44111988f9d8ccd2eaa30215a0f2 (diff)
downloaddevel-pidns/highpid.tar.gz
proc, pidns: Add highpidpidns/highpid
Pid reuse is common, which means that it's difficult or impossible to read information about a pid from /proc without races. This introduces a second number associated with each (task, pidns) pair called highpid. Highpid is a 64-bit number, and, barring extremely unlikely circumstances or outright error, a (highpid, pid) will never be reused. With just this change, a program can open /proc/PID/status, read the "Highpid" field, and confirm that it has the expected value. If the pid has been reused, then highpid will be different. The initial implementation is straightforward: highpid is simply a 64-bit counter. If a high-end system can fork every 3 ns (which would be amazing, given that just allocating a pid requires at atomic operation), it would take well over 1000 years for highpid to wrap. For CRIU's benefit, the next highpid can be set by a privileged user. NB: The sysctl stuff only works on 64-bit systems. If the approach looks good, I'll fix that somehow. Signed-off-by: Andy Lutomirski <luto@amacapital.net>
-rw-r--r--fs/proc/array.c5
-rw-r--r--include/linux/pid.h2
-rw-r--r--include/linux/pid_namespace.h7
-rw-r--r--kernel/pid.c42
-rw-r--r--kernel/pid_namespace.c23
5 files changed, 74 insertions, 5 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cd3653e4f35c6..f1e0e69d19f9e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
int g;
struct fdtable *fdt = NULL;
const struct cred *cred;
+ const struct upid *upid;
pid_t ppid, tpid;
rcu_read_lock();
@@ -170,12 +171,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
if (tracer)
tpid = task_pid_nr_ns(tracer, ns);
}
+ upid = pid_upid_ns(pid, ns);
cred = get_task_cred(p);
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
"Ngid:\t%d\n"
"Pid:\t%d\n"
+ "Highpid:\t%llu\n"
"PPid:\t%d\n"
"TracerPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
@@ -183,7 +186,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
get_task_state(p),
task_tgid_nr_ns(p, ns),
task_numa_group_id(p),
- pid_nr_ns(pid, ns),
+ upid ? upid->nr : 0, upid ? upid->highnr : 0,
ppid, tpid,
from_kuid_munged(user_ns, cred->uid),
from_kuid_munged(user_ns, cred->euid),
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 23705a53abbaa..ece70b64d04cf 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -51,6 +51,7 @@ struct upid {
/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
+ u64 highnr;
struct hlist_node pid_chain;
};
@@ -170,6 +171,7 @@ static inline pid_t pid_nr(struct pid *pid)
}
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
+const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns);
pid_t pid_vnr(struct pid *pid);
#define do_each_pid_task(pid, type, task) \
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1997ffc295a7e..fe414ec750117 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -20,12 +20,19 @@ struct pidmap {
struct bsd_acct_struct;
+/* We don't want a highpid to ever be a valid pid. */
+#define MIN_HIGHPID ((u64)(1ULL << 32))
+
+/* We don't want a highpid to ever look like an error code. */
+#define MAX_HIGHPID ((u64)(-1ULL - 4096))
+
struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
struct rcu_head rcu;
int last_pid;
unsigned int nr_hashed;
+ atomic64_t next_highpid;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
diff --git a/kernel/pid.c b/kernel/pid.c
index 9b9a26698144e..9cdfbb64162bf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -76,6 +76,7 @@ struct pid_namespace init_pid_ns = {
},
.last_pid = 0,
.nr_hashed = PIDNS_HASH_ADDING,
+ .next_highpid = ATOMIC64_INIT(MIN_HIGHPID),
.level = 0,
.child_reaper = &init_task,
.user_ns = &init_user_ns,
@@ -291,6 +292,29 @@ void free_pid(struct pid *pid)
call_rcu(&pid->rcu, delayed_put_pid);
}
+static u64 alloc_highpid(struct pid_namespace *ns)
+{
+ u64 prev, old, new;
+ u64 nr = atomic64_inc_return(&ns->next_highpid) - 1;
+
+ if (likely(nr >= MIN_HIGHPID && nr <= MAX_HIGHPID))
+ return nr;
+
+ /*
+ * Atomically increase next_highpid to something between
+ * MIN_HIGHPID + 1 and MAX_HIGHPID + 1 and return new - 1.
+ */
+ prev = nr + 1;
+ do {
+ old = prev;
+ new = old + 1;
+ if (new < MIN_HIGHPID + 1 || new > MAX_HIGHPID + 1)
+ new = MIN_HIGHPID + 1;
+ prev = atomic64_cmpxchg(&ns->next_highpid, old, new);
+ } while (prev != old);
+ return new - 1;
+}
+
struct pid *alloc_pid(struct pid_namespace *ns)
{
struct pid *pid;
@@ -312,6 +336,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
+ pid->numbers[i].highnr = alloc_highpid(tmp);
tmp = tmp->parent;
}
@@ -492,17 +517,26 @@ struct pid *find_get_pid(pid_t nr)
}
EXPORT_SYMBOL_GPL(find_get_pid);
-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns)
{
struct upid *upid;
- pid_t nr = 0;
if (pid && ns->level <= pid->level) {
upid = &pid->numbers[ns->level];
if (upid->ns == ns)
- nr = upid->nr;
+ return upid;
}
- return nr;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pid_upid_ns);
+
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+ const struct upid *upid = pid_upid_ns(pid, ns);
+
+ if (!upid)
+ return 0;
+ return upid->nr;
}
EXPORT_SYMBOL_GPL(pid_nr_ns);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index db95d8eb761b4..cbbaa14944dd1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -114,6 +114,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->nr_hashed = PIDNS_HASH_ADDING;
+ atomic64_set(&ns->next_highpid, MIN_HIGHPID);
INIT_WORK(&ns->proc_work, proc_cleanup_work);
set_bit(0, ns->pidmap[0].page);
@@ -268,6 +269,22 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
}
+static int pid_ns_next_highpid_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct pid_namespace *pid_ns = task_active_pid_ns(current);
+ struct ctl_table tmp = *table;
+
+ if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* This needs to be fixed. */
+ BUILD_BUG_ON(sizeof(u64) != sizeof(unsigned long));
+
+ tmp.data = &pid_ns->next_highpid;
+ return proc_dointvec(&tmp, write, buffer, lenp, ppos);
+}
+
extern int pid_max;
static int zero = 0;
static struct ctl_table pid_ns_ctl_table[] = {
@@ -279,6 +296,12 @@ static struct ctl_table pid_ns_ctl_table[] = {
.extra1 = &zero,
.extra2 = &pid_max,
},
+ {
+ .procname = "ns_next_highpid",
+ .maxlen = sizeof(u64),
+ .mode = 0666, /* permissions are checked in the handler */
+ .proc_handler = pid_ns_next_highpid_handler,
+ },
{ }
};
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };