diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 15:44:47 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 15:44:47 -0800 |
| commit | 6a2b60b17b3e48a418695a94bd2420f6ab32e519 (patch) | |
| tree | 54b7792fa68b8890f710fa6398b6ba8626a039a8 | |
| parent | 9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (diff) | |
| parent | 98f842e675f96ffac96e6c50315790912b2812be (diff) | |
| download | linux-6a2b60b17b3e48a418695a94bd2420f6ab32e519.tar.gz linux-6a2b60b17b3e48a418695a94bd2420f6ab32e519.tar.bz2 linux-6a2b60b17b3e48a418695a94bd2420f6ab32e519.zip | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace changes from Eric Biederman:
"While small this set of changes is very significant with respect to
containers in general and user namespaces in particular. The user
space interface is now complete.
This set of changes adds support for unprivileged users to create user
namespaces and as a user namespace root to create other namespaces.
The tyranny of supporting suid root preventing unprivileged users from
using cool new kernel features is broken.
This set of changes completes the work on setns, adding support for
the pid, user, mount namespaces.
This set of changes includes a bunch of basic pid namespace
cleanups/simplifications. Of particular significance is the rework of
the pid namespace cleanup so it no longer requires sending out
tendrils into all kinds of unexpected cleanup paths for operation. At
least one case of broken error handling is fixed by this cleanup.
The files under /proc/<pid>/ns/ have been converted from regular files
to magic symlinks which prevents incorrect caching by the VFS,
ensuring the files always refer to the namespace the process is
currently using and ensuring that the ptrace_mayaccess permission
checks are always applied.
The files under /proc/<pid>/ns/ have been given stable inode numbers
so it is now possible to see if different processes share the same
namespaces.
Through the David Miller's net tree are changes to relax many of the
permission checks in the networking stack to allowing the user
namespace root to usefully use the networking stack. Similar changes
for the mount namespace and the pid namespace are coming through my
tree.
Two small changes to add user namespace support were commited here adn
in David Miller's -net tree so that I could complete the work on the
/proc/<pid>/ns/ files in this tree.
Work remains to make it safe to build user namespaces and 9p, afs,
ceph, cifs, coda, gfs2, ncpfs, nfs, nfsd, ocfs2, and xfs so the
Kconfig guard remains in place preventing that user namespaces from
being built when any of those filesystems are enabled.
Future design work remains to allow root users outside of the initial
user namespace to mount more than just /proc and /sys."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (38 commits)
proc: Usable inode numbers for the namespace file descriptors.
proc: Fix the namespace inode permission checks.
proc: Generalize proc inode allocation
userns: Allow unprivilged mounts of proc and sysfs
userns: For /proc/self/{uid,gid}_map derive the lower userns from the struct file
procfs: Print task uids and gids in the userns that opened the proc file
userns: Implement unshare of the user namespace
userns: Implent proc namespace operations
userns: Kill task_user_ns
userns: Make create_new_namespaces take a user_ns parameter
userns: Allow unprivileged use of setns.
userns: Allow unprivileged users to create new namespaces
userns: Allow setting a userns mapping to your current uid.
userns: Allow chown and setgid preservation
userns: Allow unprivileged users to create user namespaces.
userns: Ignore suid and sgid on binaries if the uid or gid can not be mapped
userns: fix return value on mntns_install() failure
vfs: Allow unprivileged manipulation of the mount namespace.
vfs: Only support slave subtrees across different user namespaces
vfs: Add a user namespace reference from struct mnt_namespace
...
59 files changed, 996 insertions, 451 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 965d381abd75..25db92a8e1cf 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - current->nsproxy->pid_ns->last_pid); + task_active_pid_ns(current)->last_pid); return 0; } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 49e3b49e552f..4bd82ac0210f 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -123,7 +123,7 @@ void mconsole_log(struct mc_request *req) void mconsole_proc(struct mc_request *req) { - struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; + struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt; char *buf; int len; struct file *file; diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 4a36e9ab8cf7..2d12e8a1f82e 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -35,6 +35,7 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/slab.h> +#include <linux/pid_namespace.h> #include "binder.h" #include "binder_trace.h" @@ -2320,7 +2321,7 @@ retry: if (t->from) { struct task_struct *sender = t->from->proc->tsk; tr.sender_pid = task_tgid_nr_ns(sender, - current->nsproxy->pid_ns); + task_active_pid_ns(current)); } else { tr.sender_pid = 0; } diff --git a/fs/attr.c b/fs/attr.c index cce7df53b694..1449adb14ef6 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && (!uid_eq(current_fsuid(), inode->i_uid) || - !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) + !uid_eq(attr->ia_uid, inode->i_uid)) && + !inode_capable(inode, CAP_CHOWN)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && (!uid_eq(current_fsuid(), inode->i_uid) || (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && - !capable(CAP_CHOWN)) + !inode_capable(inode, CAP_CHOWN)) return -EPERM; /* Make sure a caller can chmod. */ @@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) return -EPERM; /* Also check the setgid bit! */ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : - inode->i_gid) && !capable(CAP_FSETID)) + inode->i_gid) && + !inode_capable(inode, CAP_FSETID)) attr->ia_mode &= ~S_ISGID; } @@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + if (!in_group_p(inode->i_gid) && + !inode_capable(inode, CAP_FSETID)) mode &= ~S_ISGID; inode->i_mode = mode; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 908e18455413..b785e7707959 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -74,8 +74,8 @@ struct autofs_info { unsigned long last_used; atomic_t count; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ @@ -89,8 +89,8 @@ struct autofs_wait_queue { struct qstr name; u32 dev; u64 ino; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; pid_t pid; pid_t tgid; /* This is for status reporting upon return */ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index a16214109d31..9f68a37bb2b2 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp, err = 0; autofs4_expire_wait(path.dentry); spin_lock(&sbi->fs_lock); - param->requester.uid = ino->uid; - param->requester.gid = ino->gid; + param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); + param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); spin_unlock(&sbi->fs_lock); } path_put(&path); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 8a4fed8ead30..b104726e2d0a 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) void autofs4_clean_ino(struct autofs_info *ino) { - ino->uid = 0; - ino->gid = 0; + ino->uid = GLOBAL_ROOT_UID; + ino->gid = GLOBAL_ROOT_GID; ino->last_used = jiffies; } @@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) return 0; seq_printf(m, ",fd=%d", sbi->pipefd); - if (root_inode->i_uid != 0) - seq_printf(m, ",uid=%u", root_inode->i_uid); - if (root_inode->i_gid != 0) - seq_printf(m, ",gid=%u", root_inode->i_gid); + if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, root_inode->i_uid)); + if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, root_inode->i_gid)); seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); @@ -126,7 +128,7 @@ static const match_table_t tokens = { {Opt_err, NULL} }; -static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, +static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) { char *p; @@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, case Opt_uid: if (match_int(args, &option)) return 1; - *uid = option; + *uid = make_kuid(current_user_ns(), option); + if (!uid_valid(*uid)) + return 1; break; case Opt_gid: if (match_int(args, &option)) return 1; - *gid = option; + *gid = make_kgid(current_user_ns(), option); + if (!gid_valid(*gid)) + return 1; break; case Opt_pgrp: if (match_int(args, &option)) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index dce436e595c1..03bc1d347d8e 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, case autofs_ptype_expire_direct: { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; + struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns; pktsz = sizeof(*packet); @@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, packet->name[wq->name.len] = '\0'; packet->dev = wq->dev; packet->ino = wq->ino; - packet->uid = wq->uid; - packet->gid = wq->gid; + packet->uid = from_kuid_munged(user_ns, wq->uid); + packet->gid = from_kgid_munged(user_ns, wq->gid); packet->pid = wq->pid; packet->tgid = wq->tgid; break; diff --git a/fs/exec.c b/fs/exec.c index 721a29929511..b71b08ce7120 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm) bprm->cred->egid = current_egid(); if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !current->no_new_privs) { + !current->no_new_privs && + kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && + kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { /* Set-uid? */ if (mode & S_ISUID) { - if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid)) - return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = inode->i_uid; - } /* Set-gid? */ @@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm) * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) - return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = inode->i_gid; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8c23fa7a91e6..c16335315e5d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req) static void fuse_req_init_context(struct fuse_req *req) { - req->in.h.uid = current_fsuid(); - req->in.h.gid = current_fsgid(); + req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid()); + req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid()); req->in.h.pid = current->pid; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 324bc0850534..b7c09f9eb40c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); stat->nlink = attr->nlink; - stat->uid = attr->uid; - stat->gid = attr->gid; + stat->uid = make_kuid(&init_user_ns, attr->uid); + stat->gid = make_kgid(&init_user_ns, attr->gid); stat->rdev = inode->i_rdev; stat->atime.tv_sec = attr->atime; stat->atime.tv_nsec = attr->atimensec; @@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) rcu_read_lock(); ret = 0; cred = __task_cred(task); - if (cred->euid == fc->user_id && - cred->suid == fc->user_id && - cred->uid == fc->user_id && - cred->egid == fc->group_id && - cred->sgid == fc->group_id && - cred->gid == fc->group_id) + if (uid_eq(cred->euid, fc->user_id) && + uid_eq(cred->suid, fc->user_id) && + uid_eq(cred->uid, fc->user_id) && + gid_eq(cred->egid, fc->group_id) && + gid_eq(cred->sgid, fc->group_id) && + gid_eq(cred->gid, fc->group_id)) ret = 1; rcu_read_unlock(); @@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) if (ivalid & ATTR_MODE) arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; if (ivalid & ATTR_UID) - arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; + arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid); if (ivalid & ATTR_GID) - arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; + arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid); if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; if (ivalid & ATTR_ATIME) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e24dd74e3068..e105a53fc72d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -333,10 +333,10 @@ struct fuse_conn { atomic_t count; /** The user id for this mount */ - uid_t user_id; + kuid_t user_id; /** The group id for this mount */ - gid_t group_id; + kgid_t group_id; /** The fuse mount flags for this mount */ unsigned flags; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0eda124cffb..73ca6b72beaf 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh, struct fuse_mount_data { int fd; unsigned rootmode; - unsigned user_id; - unsigned group_id; + kuid_t user_id; + kgid_t group_id; unsigned fd_present:1; unsigned rootmode_present:1; unsigned user_id_present:1; @@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); - inode->i_uid = attr->uid; - inode->i_gid = attr->gid; + inode->i_uid = make_kuid(&init_user_ns, attr->uid); + inode->i_gid = make_kgid(&init_user_ns, attr->gid); inode->i_blocks = attr->blocks; inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; @@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) case OPT_USER_ID: if (match_int(&args[0], &value)) return 0; - d->user_id = value; + d->user_id = make_kuid(current_user_ns(), value); + if (!uid_valid(d->user_id)) + return 0; d->user_id_present = 1; break; case OPT_GROUP_ID: if (match_int(&args[0], &value)) return 0; - d->group_id = value; + d->group_id = make_kgid(current_user_ns(), value); + if (!gid_valid(d->group_id)) + return 0; d->group_id_present = 1; break; @@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); - seq_printf(m, ",user_id=%u", fc->user_id); - seq_printf(m, ",group_id=%u", fc->group_id); + seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id)); + seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id)); if (fc->flags & FUSE_DEFAULT_PERMISSIONS) seq_puts(m, ",default_permissions"); if (fc->flags & FUSE_ALLOW_OTHER) @@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!file) goto err; - if (file->f_op != &fuse_dev_operations) + if ((file->f_op != &fuse_dev_operations) || + (file->f_cred->user_ns != &init_user_ns)) goto err_fput; fc = kmalloc(sizeof(*fc), GFP_KERNEL); diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 78f21f8dc2ec..43b315f2002b 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) struct vfsmount *proc_mnt; int err = -ENOENT; - proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); + proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt); if (IS_ERR(proc_mnt)) goto out; diff --git a/fs/mount.h b/fs/mount.h index 4f291f9de641..cd5007980400 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,8 +4,11 @@ struct mnt_namespace { atomic_t count; + unsigned int proc_inum; struct mount * root; struct list_head list; + struct user_namespace *user_ns; + u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; int event; }; diff --git a/fs/namespace.c b/fs/namespace.c index 24960626bb6b..c1bbe86f4920 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/capability.h> #include <linux/mnt_namespace.h> +#include <linux/user_namespace.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/idr.h> @@ -20,6 +21,7 @@ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> +#include <linux/proc_fs.h> #include "pnode.h" #include "internal.h" @@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (!mnt) return ERR_PTR(-ENOMEM); - if (flag & (CL_SLAVE | CL_PRIVATE)) + if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) mnt->mnt_group_id = 0; /* not a peer of original */ else mnt->mnt_group_id = old->mnt_group_id; @@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, list_add_tail(&mnt->mnt_instance, &sb->s_mounts); br_write_unlock(&vfsmount_lock); - if (flag & CL_SLAVE) { + if ((flag & CL_SLAVE) || + ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); mnt->mnt_master = old; CLEAR_MNT_SHARED(mnt); @@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) goto dput_and_out; retval = do_umount(mnt, flags); @@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static int mount_is_safe(struct path *path) { - if (capable(CAP_SYS_ADMIN)) + if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet @@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path) #endif } +static bool mnt_ns_loop(struct path *path) +{ + /* Could bind mounting the mount namespace inode cause a + * mount namespace loop? + */ + struct inode *inode = path->dentry->d_inode; + struct proc_inode *ei; + struct mnt_namespace *mnt_ns; + + if (!proc_ns_inode(inode)) + return false; + + ei = PROC_I(inode); + if (ei->ns_ops != &mntns_operations) + return false; + + mnt_ns = ei->ns; + return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; +} + struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, int flag) |
