diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2011-06-15 10:21:48 -0700 |
---|---|---|
committer | Eric W. Biederman <ebiederm@aristanetworks.com> | 2011-08-08 13:50:23 -0500 |
commit | 019eefb14e4393373df8a47aaf977d1f55f68a21 (patch) | |
tree | 504a22d10d007de9dcdb4258d3c82d8b9a8a8b5a | |
parent | 5273884bca0c9a60e23d49ef981a235423b7b568 (diff) | |
download | linux-namespace-control-devel-019eefb14e4393373df8a47aaf977d1f55f68a21.tar.gz |
proc: Usable inode numbers for the namespace file descriptors.
Assign a unique proc inode to each namespace, and use that
inode number to ensure we only allocate at most one proc
inode for every namespace in proc.
A single proc inode per namespace allows userspace to test
to see if two processes are in the same namespace.
This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.
We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace file descriptors)
but that is now allowd by the design if it becomes important.
I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r-- | fs/proc/namespaces.c | 39 | ||||
-rw-r--r-- | include/linux/ipc_namespace.h | 2 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 6 | ||||
-rw-r--r-- | include/linux/utsname.h | 1 | ||||
-rw-r--r-- | include/net/net_namespace.h | 2 | ||||
-rw-r--r-- | init/version.c | 2 | ||||
-rw-r--r-- | ipc/msgutil.c | 2 | ||||
-rw-r--r-- | ipc/namespace.c | 16 | ||||
-rw-r--r-- | kernel/utsname.c | 17 | ||||
-rw-r--r-- | net/core/net_namespace.c | 24 |
10 files changed, 97 insertions, 14 deletions
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 53d4cc890aa493..041b64f33178b0 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -59,7 +59,7 @@ static const struct dentry_operations ns_dentry_operations = static struct dentry *proc_ns_get_dentry(struct super_block *sb, struct task_struct *task, const struct proc_ns_operations *ns_ops) { - struct dentry *dentry; + struct dentry *dentry, *result; struct inode *inode; struct proc_inode *ei; struct qstr qname = { .name = "", }; @@ -75,7 +75,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, return ERR_PTR(-ENOMEM); } - inode = new_inode(sb); + inode = iget_locked(sb, ns_ops->inum(ns)); if (!inode) { dput(dentry); ns_ops->put(ns); @@ -83,16 +83,24 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, } ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &ns_inode_operations; - inode->i_mode = S_IFREG|S_IRUSR; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG|S_IRUSR; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } d_set_d_op(dentry, &ns_dentry_operations); - d_instantiate(dentry, inode); + result = d_instantiate_unique(dentry, inode); + if (result) { + dput(dentry); + dentry = result; + } return dentry; } @@ -135,6 +143,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct proc_inode *ei = PROC_I(inode); const struct proc_ns_operations *ns_ops = ei->ns_ops; struct task_struct *task; + void *ns; char name[50]; int len = -EACCES; @@ -145,14 +154,20 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - snprintf(name, sizeof(name), "%s", ns_ops->name); + len = -ENOENT; + ns = ns_ops->get(task); + if (!ns) + goto out_put_task; + + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); len = strlen(name); if (len > buflen) len = buflen; - if (copy_to_user(buffer, ns_ops->name, len)) + if (copy_to_user(buffer, name, len)) len = -EFAULT; + ns_ops->put(ns); out_put_task: put_task_struct(task); out: diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a6d1655f9607de..22a4dc4c194f4a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -60,6 +60,8 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + + unsigned int proc_inum; }; extern struct ipc_namespace init_ipc_ns; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3067b442597942..5ebb8d7fcc0970 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,9 +28,12 @@ struct mm_struct; */ enum { - PROC_ROOT_INO = 1, + PROC_ROOT_INO = 1, + PROC_IPC_INIT_INO = 0xEFFFFFFFU, + PROC_UTS_INIT_INO = 0xEFFFFFFEU, }; + /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem @@ -257,6 +260,7 @@ struct proc_ns_operations { void *(*get)(struct task_struct *task); void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); + unsigned int (*inum)(void *ns); }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 4e5b0213fdc16a..03db764f690a77 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -44,6 +44,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + unsigned int proc_inum; }; extern struct uts_namespace init_uts_ns; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index aef430d779bdd0..850799ff8d0a0d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -52,6 +52,8 @@ struct net { struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ + unsigned int proc_inum; + struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; diff --git a/init/version.c b/init/version.c index 86fe0ccb997abd..58170f18912d88 100644 --- a/init/version.c +++ b/init/version.c @@ -12,6 +12,7 @@ #include <linux/utsname.h> #include <generated/utsrelease.h> #include <linux/version.h> +#include <linux/proc_fs.h> #ifndef CONFIG_KALLSYMS #define version(a) Version_ ## a @@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = { .domainname = UTS_DOMAINNAME, }, .user_ns = &init_user_ns, + .proc_inum = PROC_UTS_INIT_INO, }; EXPORT_SYMBOL_GPL(init_uts_ns); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 8b5ce5d3f3ef3e..f7da4856b5bbdf 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/ipc.h> #include <linux/ipc_namespace.h> +#include <linux/proc_fs.h> #include <asm/uaccess.h> #include "util.h" @@ -33,6 +34,7 @@ struct ipc_namespace init_ipc_ns = { .mq_msgsize_max = DFLT_MSGSIZEMAX, #endif .user_ns = &init_user_ns, + .proc_inum = PROC_IPC_INIT_INO, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); diff --git a/ipc/namespace.c b/ipc/namespace.c index ce0a647869b1e8..cd7f7330d683a1 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, if (ns == NULL) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + atomic_set(&ns->count, 1); err = mq_init_ns(ns); if (err) { + proc_free_inum(ns->proc_inum); kfree(ns); return ERR_PTR(err); } @@ -113,6 +120,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ ipcns_notify(IPCNS_REMOVED); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -170,10 +178,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int ipcns_inum(void *vp) +{ + struct ipc_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations ipcns_operations = { .name = "ipc", .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, + .inum = ipcns_inum, }; diff --git a/kernel/utsname.c b/kernel/utsname.c index bff131b9510a42..3ab6a0823b04e2 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, struct uts_namespace *old_ns) { struct uts_namespace *ns; + int err; ns = create_uts_ns(); if (!ns) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); @@ -78,6 +85,7 @@ void free_uts_ns(struct kref *kref) ns = container_of(kref, struct uts_namespace, kref); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -110,11 +118,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int utsns_inum(void *vp) +{ + struct uts_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations utsns_operations = { .name = "uts", .type = CLONE_NEWUTS, .get = utsns_get, .put = utsns_put, .install = utsns_install, + .inum = utsns_inum, }; - diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ea489db1bc2361..4791afc1a46891 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -366,6 +366,21 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); +static __net_init int net_ns_net_init(struct net *net) +{ + return proc_alloc_inum(&net->proc_inum); +} + +static __net_exit void net_ns_net_exit(struct net *net) +{ + proc_free_inum(net->proc_inum); +} + +static struct pernet_operations __net_initdata net_ns_ops = { + .init = net_ns_net_init, + .exit = net_ns_net_exit, +}; + static int __init net_ns_init(void) { struct net_generic *ng; @@ -397,6 +412,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); + register_pernet_subsys(&net_ns_ops); + return 0; } @@ -624,11 +641,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int netns_inum(void *ns) +{ + struct net *net = ns; + return net->proc_inum; +} + const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, + .inum = netns_inum, }; #endif |