vfs: Add user namespace control of mounts.

The details changes below make it possible to isolate superblocks and mount namespaces in a usernamespace, and to not need global capability permissions to manipulate the user namespace. - Add a user_ns owner to the mount namespace. - Add a user_ns owner to struct super_block. - Add a FS_SAFE flag for struct file_system_type to allow marking filesystems to be safe to be mounted by anyone. - Reduce the capabilities needed for mount namspace manipulation to just CAP_SYS_ADMIN in the user namespace that the mount namespace is in. - For new mounts recquire CAP_SYS_ADMIN in the initial user namespace unless tye filesystem was marked safe. - Require remounts to be have CAP_SYS_ADMIN in the user namespace of the super block. - For permission checks where the userns is needed use the userns in the superblock instead of the init_user_ns. Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
author: Eric W. Biederman <ebiederm@aristanetworks.com> 2011-08-11 11:57:37 -0500
committer: Eric W. Biederman <ebiederm@aristanetworks.com> 2011-08-11 13:50:23 -0500
commit: 467d31874f976c2db4b1a7a07d5b8bc878c00c73 (patch)
tree: 941781f8d9f22a3379c37e6953bf129b1a75870f
parent: c3de8efebe6fc9d49030a943dbc87b668002ca2a (diff)
download: linux-user-ns-devel-467d31874f976c2db4b1a7a07d5b8bc878c00c73.tar.gz
4 files changed, 44 insertions, 9 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 22bfe8273c680..92c7c45b032c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -31,6 +31,7 @@
 #include <linux/idr.h>
 #include <linux/fs_struct.h>
 #include <linux/fsnotify.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
@@ -1380,7 +1381,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(path.mnt, flags);
@@ -1406,7 +1407,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
 
 static int mount_is_safe(struct path *path)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -1716,7 +1717,7 @@ static int do_change_type(struct path *path, int flag)
 	int type;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (path->dentry != path->mnt->mnt_root)
@@ -1823,7 +1824,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	int err;
 	struct super_block *sb = path->mnt->mnt_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (!check_mnt(path->mnt))
@@ -1832,6 +1833,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
 
+	if (!ns_capable(path->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;
@@ -1871,7 +1875,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	struct path old_path, parent_path;
 	struct vfsmount *p;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -2012,20 +2016,36 @@ unlock:
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
  */
-static int do_new_mount(struct path *path, char *type, int flags,
+static int do_new_mount(struct path *path, char *fstype, int flags,
 			int mnt_flags, char *name, void *data)
 {
+	struct file_system_type *type;
+	struct user_namespace *user_ns;
 	struct vfsmount *mnt;
 	int err;
 
+	if (!fstype)
+		return -EINVAL;
+
+	type = get_fs_type(fstype);
 	if (!type)
 		return -EINVAL;
 
+	user_ns = &init_user_ns;
+	if (type->fs_flags & FS_SAFE)
+		user_ns = current->nsproxy->mnt_ns->user_ns;
+
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
+		put_filesystem(type);
 		return -EPERM;
+	}
 
-	mnt = do_kern_mount(type, flags, name, data);
+	mnt = vfs_kern_mount(type, flags, name, data);
+	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+	    !mnt->mnt_sb->s_subtype)
+		mnt = fs_set_subtype(mnt, fstype);
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -2367,6 +2387,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
+	new_ns->user_ns = get_user_ns(current_user_ns());
 	return new_ns;
 }
 
@@ -2560,7 +2581,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	struct path new, old, parent_path, root_parent, root;
 	int error;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	error = user_path_dir(new_root, &new);
@@ -2715,6 +2736,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	br_write_unlock(vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
+	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
 EXPORT_SYMBOL(put_mnt_ns);
diff --git a/fs/super.c b/fs/super.c
index 3f56a269a4f4e..0575b1f273376 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -32,6 +32,7 @@
 #include <linux/backing-dev.h>
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
+#include <linux/user_namespace.h>
 #include "internal.h"
 
 
@@ -183,6 +184,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		s->s_shrink.seeks = DEFAULT_SEEKS;
 		s->s_shrink.shrink = prune_super;
 		s->s_shrink.batch = 1024;
+		s->s_user_ns = get_user_ns(current_user_ns());
 	}
 out:
 	return s;
@@ -202,6 +204,7 @@ static inline void destroy_super(struct super_block *s)
 	security_sb_free(s);
 	kfree(s->s_subtype);
 	kfree(s->s_options);
+	put_user_ns(s->s_user_ns);
 	kfree(s);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 178cdb4f1d4af..8eea4d2eda1b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,6 +178,7 @@ struct inodes_stat_t {
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
 #define FS_HAS_SUBTYPE 4
+#define FS_SAFE 8 /* Safe to mount by unprivileged users */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
 					 * during rename() internally.
@@ -1387,6 +1388,7 @@ struct super_block {
 	unsigned long		s_blocksize;
 	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
+	struct user_namespace 	*s_user_ns;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
 	const struct quotactl_ops	*s_qcop;
@@ -1488,7 +1490,14 @@ enum {
  * belong to init_user_ns
  */
 extern struct user_namespace init_user_ns;
+#ifdef CONFIG_USER_NS
+static inline struct user_namespace *inode_userns(const struct inode *inode)
+{
+	return inode->i_sb->s_user_ns;
+}
+#else
 #define inode_userns(inode) (&init_user_ns)
+#endif
 extern bool inode_owner_or_capable(const struct inode *inode);
 
 /* not quite ready to be deprecated, but... */
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 29304855652d4..75a01c2d54750 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -12,6 +12,7 @@ struct mnt_namespace {
 	struct list_head	list;
 	wait_queue_head_t poll;
 	int event;
+	struct user_namespace *user_ns;
 };
 
 struct proc_mounts {
author	Eric W. Biederman <ebiederm@aristanetworks.com>	2011-08-11 11:57:37 -0500
committer	Eric W. Biederman <ebiederm@aristanetworks.com>	2011-08-11 13:50:23 -0500
commit	467d31874f976c2db4b1a7a07d5b8bc878c00c73 (patch)
tree	941781f8d9f22a3379c37e6953bf129b1a75870f
parent	c3de8efebe6fc9d49030a943dbc87b668002ca2a (diff)
download	linux-user-ns-devel-467d31874f976c2db4b1a7a07d5b8bc878c00c73.tar.gz