diff options
35 files changed, 1007 insertions, 71 deletions
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README index 8c3306e01d52..91f54ffa0077 100644 --- a/Documentation/sysctl/README +++ b/Documentation/sysctl/README @@ -69,6 +69,7 @@ proc/ <empty> sunrpc/ SUN Remote Procedure Call (NFS) vm/ memory management tuning buffer and cache management +user/ Per user per user namespace limits These are the subdirs I have on my system. There might be more or other subdirs in another setup. If you see another dir, I'd diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 302b5ed616a6..35e17f748ca7 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -265,6 +265,13 @@ aio-nr can grow to. ============================================================== +mount-max: + +This denotes the maximum number of mounts that may exist +in a mount namespace. + +============================================================== + 2. /proc/sys/fs/binfmt_misc ---------------------------------------------------------- diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt new file mode 100644 index 000000000000..1291c498f78f --- /dev/null +++ b/Documentation/sysctl/user.txt @@ -0,0 +1,66 @@ +Documentation for /proc/sys/user/* kernel version 4.9.0 + (c) 2016 Eric Biederman <ebiederm@xmission.com> + +============================================================== + +This file contains the documetation for the sysctl files in +/proc/sys/user. + +The files in this directory can be used to override the default +limits on the number of namespaces and other objects that have +per user per user namespace limits. + +The primary purpose of these limits is to stop programs that +malfunction and attempt to create a ridiculous number of objects, +before the malfunction becomes a system wide problem. It is the +intention that the defaults of these limits are set high enough that +no program in normal operation should run into these limits. + +The creation of per user per user namespace objects are charged to +the user in the user namespace who created the object and +verified to be below the per user limit in that user namespace. + +The creation of objects is also charged to all of the users +who created user namespaces the creation of the object happens +in (user namespaces can be nested) and verified to be below the per user +limits in the user namespaces of those users. + +This recursive counting of created objects ensures that creating a +user namespace does not allow a user to escape their current limits. + +Currently, these files are in /proc/sys/user: + +- max_cgroup_namespaces + + The maximum number of cgroup namespaces that any user in the current + user namespace may create. + +- max_ipc_namespaces + + The maximum number of ipc namespaces that any user in the current + user namespace may create. + +- max_mnt_namespaces + + The maximum number of mount namespaces that any user in the current + user namespace may create. + +- max_net_namespaces + + The maximum number of network namespaces that any user in the + current user namespace may create. + +- max_pid_namespaces + + The maximum number of pid namespaces that any user in the current + user namespace may create. + +- max_user_namespaces + + The maximum number of user namespaces that any user in the current + user namespace may create. + +- max_uts_namespaces + + The maximum number of user namespaces that any user in the current + user namespace may create. diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 431fd7ee3488..e44271dfceb6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, memcpy(&wq->name, &qstr, sizeof(struct qstr)); wq->dev = autofs4_get_dev(sbi); wq->ino = autofs4_get_ino(sbi); - wq->uid = current_uid(); - wq->gid = current_gid(); + wq->uid = current_real_cred()->uid; + wq->gid = current_real_cred()->gid; wq->pid = pid; wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ diff --git a/fs/mount.h b/fs/mount.h index 14db05d424f7..d2e25d7b64b3 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -10,9 +10,12 @@ struct mnt_namespace { struct mount * root; struct list_head list; struct user_namespace *user_ns; + struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; u64 event; + unsigned int mounts; /* # of mounts in the namespace */ + unsigned int pending_mounts; }; struct mnt_pcp { diff --git a/fs/namespace.c b/fs/namespace.c index 7bb2cda3bfef..db1b5a38864e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,9 @@ #include "pnode.h" #include "internal.h" +/* Maximum number of mounts in a mount namespace */ +unsigned int sysctl_mount_max __read_mostly = 100000; + static unsigned int m_hash_mask __read_mostly; static unsigned int m_hash_shift __read_mostly; static unsigned int mp_hash_mask __read_mostly; @@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); + n->mounts += n->pending_mounts; + n->pending_mounts = 0; + attach_shadowed(mnt, parent, shadows); touch_mnt_namespace(n); } @@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) propagate_umount(&tmp_list); while (!list_empty(&tmp_list)) { + struct mnt_namespace *ns; bool disconnect; p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); - __touch_mnt_namespace(p->mnt_ns); + ns = p->mnt_ns; + if (ns) { + ns->mounts--; + __touch_mnt_namespace(ns); + } p->mnt_ns = NULL; if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; @@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse) return 0; } +int count_mounts(struct mnt_namespace *ns, struct mount *mnt) +{ + unsigned int max = READ_ONCE(sysctl_mount_max); + unsigned int mounts = 0, old, pending, sum; + struct mount *p; + + for (p = mnt; p; p = next_mnt(p, mnt)) + mounts++; + + old = ns->mounts; + pending = ns->pending_mounts; + sum = old + pending; + if ((old > sum) || + (pending > sum) || + (max < sum) || + (mounts > (max - sum))) + return -ENOSPC; + + ns->pending_mounts = pending + mounts; + return 0; +} + /* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached @@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct path *parent_path) { HLIST_HEAD(tree_list); + struct mnt_namespace *ns = dest_mnt->mnt_ns; struct mount *child, *p; struct hlist_node *n; int err; + /* Is there space to add these mounts to the mount namespace? */ + if (!parent_path) { + err = count_mounts(ns, source_mnt); + if (err) + goto out; + } + if (IS_MNT_SHARED(dest_mnt)) { err = invent_group_ids(source_mnt, true); if (err) @@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt, out_cleanup_ids: while (!hlist_empty(&tree_list)) { child = hlist_entry(tree_list.first, struct mount, mnt_hash); + child->mnt_parent->mnt_ns->pending_mounts = 0; umount_tree(child, UMOUNT_SYNC); } unlock_mount_hash(); cleanup_group_ids(source_mnt, NULL); out: + ns->pending_mounts = 0; return err; } @@ -2719,9 +2762,20 @@ dput_out: return retval; } +static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES); +} + +static void dec_mnt_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES); +} + static void free_mnt_ns(struct mnt_namespace *ns) { ns_free_inum(&ns->ns); + dec_mnt_namespaces(ns->ucounts); put_user_ns(ns->user_ns); kfree(ns); } @@ -2738,14 +2792,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; + struct ucounts *ucounts; int ret; + ucounts = inc_mnt_namespaces(user_ns); + if (!ucounts) + return ERR_PTR(-ENOSPC); + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); - if (!new_ns) + if (!new_ns) { + dec_mnt_namespaces(ucounts); return ERR_PTR(-ENOMEM); + } ret = ns_alloc_inum(&new_ns->ns); if (ret) { kfree(new_ns); + dec_mnt_namespaces(ucounts); return ERR_PTR(ret); } new_ns->ns.ops = &mntns_operations; @@ -2756,6 +2818,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) init_waitqueue_head(&new_ns->poll); new_ns->event = 0; new_ns->user_ns = get_user_ns(user_ns); + new_ns->ucounts = ucounts; + new_ns->mounts = 0; + new_ns->pending_mounts = 0; return new_ns; } @@ -2805,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, q = new; while (p) { q->mnt_ns = new_ns; + new_ns->mounts++; if (new_fs) { if (&p->mnt == new_fs->root.mnt) { new_fs->root.mnt = mntget(&q->mnt); @@ -2843,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; new_ns->root = mnt; + new_ns->mounts++; list_add(&mnt->mnt_list, &new_ns->list); } else { mntput(m); @@ -3348,10 +3415,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct user_namespace *mntns_owner(struct ns_common *ns) +{ + return to_mnt_ns(ns)->user_ns; +} + const struct proc_ns_operations mntns_operations = { .name = "mnt", .type = CLONE_NEWNS, .get = mntns_get, .put = mntns_put, .install = mntns_install, + .owner = mntns_owner, }; diff --git a/fs/nsfs.c b/fs/nsfs.c index 8f20d6016e20..30bb10034120 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -5,11 +5,16 @@ #include <linux/magic.h> #include <linux/ktime.h> #include <linux/seq_file.h> +#include <linux/user_namespace.h> +#include <linux/nsfs.h> static struct vfsmount *nsfs_mnt; +static long ns_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg); static const struct file_operations ns_file_operations = { .llseek = no_llseek, + .unlocked_ioctl = ns_ioctl, }; static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) @@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode) ns->ops->put(ns); } -void *ns_get_path(struct path *path, struct task_struct *task, - const struct proc_ns_operations *ns_ops) +static void *__ns_get_path(struct path *path, struct ns_common *ns) { - struct vfsmount *mnt = mntget(nsfs_mnt); + struct vfsmount *mnt = nsfs_mnt; struct qstr qname = { .name = "", }; struct dentry *dentry; struct inode *inode; - struct ns_common *ns; unsigned long d; -again: - ns = ns_ops->get(task); - if (!ns) { - mntput(mnt); - return ERR_PTR(-ENOENT); - } rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) @@ -68,17 +65,16 @@ again: if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); - ns_ops->put(ns); + ns->ops->put(ns); got_it: - path->mnt = mnt; + path->mnt = mntget(mnt); path->dentry = dentry; return NULL; slow: rcu_read_unlock(); inode = new_inode_pseudo(mnt->mnt_sb); if (!inode) { - ns_ops->put(ns); - mntput(mnt); + ns->ops->put(ns); return ERR_PTR(-ENOMEM); } inode->i_ino = ns->inum; @@ -91,21 +87,96 @@ slow: dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); if (!dentry) { iput(inode); - mntput(mnt); return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); - dentry->d_fsdata = (void *)ns_ops; + dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { d_delete(dentry); /* make sure ->d_prune() does nothing */ dput(dentry); cpu_relax(); - goto again; + return ERR_PTR(-EAGAIN); } goto got_it; } +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_common *ns; + void *ret; + +again: + ns = ns_ops->get(task); + if (!ns) + return ERR_PTR(-ENOENT); + + ret = __ns_get_path(path, ns); + if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN) + goto again; + return ret; +} + +static int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)) +{ + struct path path = {}; + struct file *f; + void *err; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; + + while (1) { + struct ns_common *relative; + + relative = get_ns(ns); + if (IS_ERR(relative)) { + put_unused_fd(fd); + return PTR_ERR(relative); + } + + err = __ns_get_path(&path, relative); + if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN) + continue; + break; + } + if (IS_ERR(err)) { + put_unused_fd(fd); + return PTR_ERR(err); + } + + f = dentry_open(&path, O_RDONLY, current_cred()); + path_put(&path); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + } else + fd_install(fd, f); + + return fd; +} + +static long ns_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct ns_common *ns = get_proc_ns(file_inode(filp)); + + switch (ioctl) { + case NS_GET_USERNS: + return open_related_ns(ns, ns_get_owner); + case NS_GET_PARENT: + if (!ns->ops->get_parent) + return -EINVAL; + return open_related_ns(ns, ns->ops->get_parent); + default: + return -ENOTTY; + } +} + int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops) { diff --git a/fs/pnode.c b/fs/pnode.c index 99899705b105..234a9ac49958 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -259,7 +259,7 @@ static int propagate_one(struct mount *m) read_sequnlock_excl(&mount_lock); } hlist_add_head(&child->mnt_hash, list); - return 0; + return count_mounts(m->mnt_ns, child); } /* diff --git a/fs/pnode.h b/fs/pnode.h index 0fcdbe7ca648..550f5a8b4fcf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); +int count_mounts(struct mnt_namespace *ns, struct mount *mnt); #endif /* _LINUX_PNODE_H */ diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 2ed3d71d4767..71025b9e2a4e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry, struct nsproxy *namespaces); + struct ctl_table **pentry); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); @@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head) } static struct ctl_table_set * -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +lookup_header_set(struct ctl_table_root *root) { struct ctl_table_set *set = &root->default_set; if (root->lookup) - set = root->lookup(root, namespaces); + set = root->lookup(root); return set; } @@ -496,7 +496,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; if (S_ISLNK(p->mode)) { - ret = sysctl_follow_link(&h, &p, current->nsproxy); + ret = sysctl_follow_link(&h, &p); err = ERR_PTR(ret); if (ret) goto out; @@ -664,7 +664,7 @@ static bool proc_sys_link_fill_cache(struct file *file, if (S_ISLNK(table->mode)) { /* It is not an error if we can not follow the link ignore it */ - int err = sysctl_follow_link(&head, &table, current->nsproxy); + int err = sysctl_follow_link(&head, &table); if (err) goto out; } @@ -981,7 +981,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) } static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry, struct nsproxy *namespaces) + struct ctl_table **pentry) { struct ctl_table_header *head; struct ctl_table_root *root; @@ -993,7 +993,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, ret = 0; spin_lock(&sysctl_lock); root = (*pentry)->data; - set = lookup_header_set(root, namespaces); + set = lookup_header_set(root); dir = xlate_dir(set, (*phead)->parent); if (IS_ERR(dir)) ret = PTR_ERR(dir); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a4414a11eea7..440a72164a11 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -644,6 +644,7 @@ struct cgroup_namespace { atomic_t count; struct ns_common ns; struct user_namespace *user_ns; + struct ucounts *ucounts; struct css_set *root_cset; }; diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index d10e54f03c09..848e5796400e 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -58,6 +58,7 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + struct ucounts *ucounts; struct ns_common ns; }; diff --git a/include/linux/mount.h b/include/linux/mount.h index 54a594d49733..1172cce949a4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(const char *name); +extern unsigned int sysctl_mount_max; + #endif /* _LINUX_MOUNT_H */ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 918b117a7cd3..34cce96741bc 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -40,6 +40,7 @@ struct pid_namespace { struct fs_pin *bacct; #endif struct user_namespace *user_ns; + struct ucounts *ucounts; struct work_struct proc_work; kgid_t pid_gid; int hide_pid; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index de0e7719d4c5..12cb8bd81d2d 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -18,6 +18,8 @@ struct proc_ns_operations { struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); + struct user_namespace *(*owner)(struct ns_common *ns); + struct ns_common *(*get_parent)(struct ns_common *ns); }; extern const struct proc_ns_operations netns_operations; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ecc3e07c6e63..adf4e51cf597 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -158,8 +158,7 @@ struct ctl_table_set { struct ctl_table_root { struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, - struct nsproxy *namespaces); + struct ctl_table_set *(*lookup)(struct ctl_table_root *root); void (*set_ownership)(struct ctl_table_header *head, struct ctl_table *table, kuid_t *uid, kgid_t *gid); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9217169c64cb..eb209d4523f5 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -22,6 +22,19 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED +struct ucounts; + +enum ucount_type { + UCOUNT_USER_NAMESPACES, + UCOUNT_PID_NAMESPACES, + UCOUNT_UTS_NAMESPACES, + UCOUNT_IPC_NAMESPACES, + UCOUNT_NET_NAMESPACES, + UCOUNT_MNT_NAMESPACES, + UCOUNT_CGROUP_NAMESPACES, + UCOUNT_COUNTS, +}; + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -39,10 +52,30 @@ struct user_namespace { struct key *persistent_keyring_register; struct rw_semaphore persistent_keyring_register_sem; #endif + struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#endif + struct ucounts *ucounts; + int ucount_max[UCOUNT_COUNTS]; +}; + +struct ucounts { + struct hlist_node node; + struct user_namespace *ns; + kuid_t uid; + atomic_t count; + atomic_t ucount[UCOUNT_COUNTS]; }; extern struct user_namespace init_user_ns; +bool setup_userns_sysctls(struct user_namespace *ns); +void retire_userns_sysctls(struct user_namespace *ns); +struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); +void dec_ucount(struct ucounts *ucounts, enum ucount_type type); + #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -54,12 +87,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); -extern void free_user_ns(struct user_namespace *ns); +extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { if (ns && atomic_dec_and_test(&ns->count)) - free_user_ns(ns); + __put_user_ns(ns); } struct seq_operations; @@ -73,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool current_in_userns(const struct user_namespace *target_ns); + +struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -106,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } + +static inline struct ns_common *ns_get_owner(struct ns_common *ns) +{ + return ERR_PTR(-EPERM); +} #endif #endif /* _LINUX_USER_H */ diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 5093f58ae192..60f0bb83b313 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -24,6 +24,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + struct ucounts *ucounts; struct ns_common ns; }; extern struct uts_namespace init_uts_ns; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0933c7455a30..fc4f757107df 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,6 +60,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + struct ucounts *ucounts; spinlock_t nsid_lock; struct idr netns_ids; diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..3af617230d1b --- /dev/null +++ b/include/uapi/linux/nsfs.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) + +#endif /* __LINUX_NSFS_H */ diff --git a/ipc/namespace.c b/ipc/namespace.c index d87e6baa1323..0abdea496493 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -16,39 +16,61 @@ #include "util.h" +static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); +} + +static void dec_ipc_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES); +} + static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, struct ipc_namespace *old_ns) { struct ipc_namespace *ns; + struct ucounts *ucounts; int err; + err = -ENOSPC; + ucounts = inc_ipc_namespaces(user_ns); + if (!ucounts) + goto fail; + + err = -ENOMEM; ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); if (ns == NULL) - return ERR_PTR(-ENOMEM); + goto fail_dec; err = ns_alloc_inum(&ns->ns); - if (err) { - kfree(ns); - return ERR_PTR(err); - } + if (err) + goto fail_free; ns->ns.ops = &ipcns_operations; atomic_set(&ns->count, 1); ns->user_ns = get_user_ns(user_ns); + ns->ucounts = ucounts; err = mq_init_ns(ns); |
