summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/README1
-rw-r--r--Documentation/sysctl/fs.txt7
-rw-r--r--Documentation/sysctl/user.txt66
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c77
-rw-r--r--fs/nsfs.c105
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/proc_sysctl.c14
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/ipc_namespace.h1
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/pid_namespace.h1
-rw-r--r--include/linux/proc_ns.h2
-rw-r--r--include/linux/sysctl.h3
-rw-r--r--include/linux/user_namespace.h44
-rw-r--r--include/linux/utsname.h1
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/uapi/linux/nsfs.h13
-rw-r--r--ipc/namespace.c51
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c24
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/pid_namespace.c50
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/ucount.c235
-rw-r--r--kernel/user_namespace.c99
-rw-r--r--kernel/utsname.c40
-rw-r--r--net/core/net_namespace.c28
-rw-r--r--net/sysctl_net.c4
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/nsfs/Makefile12
-rw-r--r--tools/testing/selftests/nsfs/owner.c91
-rw-r--r--tools/testing/selftests/nsfs/pidns.c78
35 files changed, 1007 insertions, 71 deletions
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README
index 8c3306e01d52..91f54ffa0077 100644
--- a/Documentation/sysctl/README
+++ b/Documentation/sysctl/README
@@ -69,6 +69,7 @@ proc/ <empty>
sunrpc/ SUN Remote Procedure Call (NFS)
vm/ memory management tuning
buffer and cache management
+user/ Per user per user namespace limits
These are the subdirs I have on my system. There might be more
or other subdirs in another setup. If you see another dir, I'd
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 302b5ed616a6..35e17f748ca7 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -265,6 +265,13 @@ aio-nr can grow to.
==============================================================
+mount-max:
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.
+
+==============================================================
+
2. /proc/sys/fs/binfmt_misc
----------------------------------------------------------
diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt
new file mode 100644
index 000000000000..1291c498f78f
--- /dev/null
+++ b/Documentation/sysctl/user.txt
@@ -0,0 +1,66 @@
+Documentation for /proc/sys/user/* kernel version 4.9.0
+ (c) 2016 Eric Biederman <ebiederm@xmission.com>
+
+==============================================================
+
+This file contains the documetation for the sysctl files in
+/proc/sys/user.
+
+The files in this directory can be used to override the default
+limits on the number of namespaces and other objects that have
+per user per user namespace limits.
+
+The primary purpose of these limits is to stop programs that
+malfunction and attempt to create a ridiculous number of objects,
+before the malfunction becomes a system wide problem. It is the
+intention that the defaults of these limits are set high enough that
+no program in normal operation should run into these limits.
+
+The creation of per user per user namespace objects are charged to
+the user in the user namespace who created the object and
+verified to be below the per user limit in that user namespace.
+
+The creation of objects is also charged to all of the users
+who created user namespaces the creation of the object happens
+in (user namespaces can be nested) and verified to be below the per user
+limits in the user namespaces of those users.
+
+This recursive counting of created objects ensures that creating a
+user namespace does not allow a user to escape their current limits.
+
+Currently, these files are in /proc/sys/user:
+
+- max_cgroup_namespaces
+
+ The maximum number of cgroup namespaces that any user in the current
+ user namespace may create.
+
+- max_ipc_namespaces
+
+ The maximum number of ipc namespaces that any user in the current
+ user namespace may create.
+
+- max_mnt_namespaces
+
+ The maximum number of mount namespaces that any user in the current
+ user namespace may create.
+
+- max_net_namespaces
+
+ The maximum number of network namespaces that any user in the
+ current user namespace may create.
+
+- max_pid_namespaces
+
+ The maximum number of pid namespaces that any user in the current
+ user namespace may create.
+
+- max_user_namespaces
+
+ The maximum number of user namespaces that any user in the current
+ user namespace may create.
+
+- max_uts_namespaces
+
+ The maximum number of user namespaces that any user in the current
+ user namespace may create.
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 431fd7ee3488..e44271dfceb6 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_uid();
- wq->gid = current_gid();
+ wq->uid = current_real_cred()->uid;
+ wq->gid = current_real_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..d2e25d7b64b3 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,9 +10,12 @@ struct mnt_namespace {
struct mount * root;
struct list_head list;
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
+ unsigned int mounts; /* # of mounts in the namespace */
+ unsigned int pending_mounts;
};
struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..db1b5a38864e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
#include "pnode.h"
#include "internal.h"
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
static unsigned int mp_hash_mask __read_mostly;
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
+ n->mounts += n->pending_mounts;
+ n->pending_mounts = 0;
+
attach_shadowed(mnt, parent, shadows);
touch_mnt_namespace(n);
}
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
propagate_umount(&tmp_list);
while (!list_empty(&tmp_list)) {
+ struct mnt_namespace *ns;
bool disconnect;
p = list_first_entry(&tmp_list, struct mount, mnt_list);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
- __touch_mnt_namespace(p->mnt_ns);
+ ns = p->mnt_ns;
+ if (ns) {
+ ns->mounts--;
+ __touch_mnt_namespace(ns);
+ }
p->mnt_ns = NULL;
if (how & UMOUNT_SYNC)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
return 0;
}
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+ unsigned int max = READ_ONCE(sysctl_mount_max);
+ unsigned int mounts = 0, old, pending, sum;
+ struct mount *p;
+
+ for (p = mnt; p; p = next_mnt(p, mnt))
+ mounts++;
+
+ old = ns->mounts;
+ pending = ns->pending_mounts;
+ sum = old + pending;
+ if ((old > sum) ||
+ (pending > sum) ||
+ (max < sum) ||
+ (mounts > (max - sum)))
+ return -ENOSPC;
+
+ ns->pending_mounts = pending + mounts;
+ return 0;
+}
+
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct path *parent_path)
{
HLIST_HEAD(tree_list);
+ struct mnt_namespace *ns = dest_mnt->mnt_ns;
struct mount *child, *p;
struct hlist_node *n;
int err;
+ /* Is there space to add these mounts to the mount namespace? */
+ if (!parent_path) {
+ err = count_mounts(ns, source_mnt);
+ if (err)
+ goto out;
+ }
+
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
out_cleanup_ids:
while (!hlist_empty(&tree_list)) {
child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+ child->mnt_parent->mnt_ns->pending_mounts = 0;
umount_tree(child, UMOUNT_SYNC);
}
unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
out:
+ ns->pending_mounts = 0;
return err;
}
@@ -2719,9 +2762,20 @@ dput_out:
return retval;
}
+static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
+}
+
static void free_mnt_ns(struct mnt_namespace *ns)
{
ns_free_inum(&ns->ns);
+ dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -2738,14 +2792,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
{
struct mnt_namespace *new_ns;
+ struct ucounts *ucounts;
int ret;
+ ucounts = inc_mnt_namespaces(user_ns);
+ if (!ucounts)
+ return ERR_PTR(-ENOSPC);
+
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
- if (!new_ns)
+ if (!new_ns) {
+ dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
+ }
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
+ dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
new_ns->ns.ops = &mntns_operations;
@@ -2756,6 +2818,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
new_ns->user_ns = get_user_ns(user_ns);
+ new_ns->ucounts = ucounts;
+ new_ns->mounts = 0;
+ new_ns->pending_mounts = 0;
return new_ns;
}
@@ -2805,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = new;
while (p) {
q->mnt_ns = new_ns;
+ new_ns->mounts++;
if (new_fs) {
if (&p->mnt == new_fs->root.mnt) {
new_fs->root.mnt = mntget(&q->mnt);
@@ -2843,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
+ new_ns->mounts++;
list_add(&mnt->mnt_list, &new_ns->list);
} else {
mntput(m);
@@ -3348,10 +3415,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+ return to_mnt_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
+ .owner = mntns_owner,
};
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d6016e20..30bb10034120 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
+#include <linux/user_namespace.h>
+#include <linux/nsfs.h>
static struct vfsmount *nsfs_mnt;
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg);
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
+ .unlocked_ioctl = ns_ioctl,
};
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
}
-void *ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops)
+static void *__ns_get_path(struct path *path, struct ns_common *ns)
{
- struct vfsmount *mnt = mntget(nsfs_mnt);
+ struct vfsmount *mnt = nsfs_mnt;
struct qstr qname = { .name = "", };
struct dentry *dentry;
struct inode *inode;
- struct ns_common *ns;
unsigned long d;
-again:
- ns = ns_ops->get(task);
- if (!ns) {
- mntput(mnt);
- return ERR_PTR(-ENOENT);
- }
rcu_read_lock();
d = atomic_long_read(&ns->stashed);
if (!d)
@@ -68,17 +65,16 @@ again:
if (!lockref_get_not_dead(&dentry->d_lockref))
goto slow;
rcu_read_unlock();
- ns_ops->put(ns);
+ ns->ops->put(ns);
got_it:
- path->mnt = mnt;
+ path->mnt = mntget(mnt);
path->dentry = dentry;
return NULL;
slow:
rcu_read_unlock();
inode = new_inode_pseudo(mnt->mnt_sb);
if (!inode) {
- ns_ops->put(ns);
- mntput(mnt);
+ ns->ops->put(ns);
return ERR_PTR(-ENOMEM);
}
inode->i_ino = ns->inum;
@@ -91,21 +87,96 @@ slow:
dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
if (!dentry) {
iput(inode);
- mntput(mnt);
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
- dentry->d_fsdata = (void *)ns_ops;
+ dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
cpu_relax();
- goto again;
+ return ERR_PTR(-EAGAIN);
}
goto got_it;
}
+void *ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct ns_common *ns;
+ void *ret;
+
+again:
+ ns = ns_ops->get(task);
+ if (!ns)
+ return ERR_PTR(-ENOENT);
+
+ ret = __ns_get_path(path, ns);
+ if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
+ goto again;
+ return ret;
+}
+
+static int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct path path = {};
+ struct file *f;
+ void *err;
+ int fd;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ while (1) {
+ struct ns_common *relative;
+
+ relative = get_ns(ns);
+ if (IS_ERR(relative)) {
+ put_unused_fd(fd);
+ return PTR_ERR(relative);
+ }
+
+ err = __ns_get_path(&path, relative);
+ if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
+ continue;
+ break;
+ }
+ if (IS_ERR(err)) {
+ put_unused_fd(fd);
+ return PTR_ERR(err);
+ }
+
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ path_put(&path);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else
+ fd_install(fd, f);
+
+ return fd;
+}
+
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct ns_common *ns = get_proc_ns(file_inode(filp));
+
+ switch (ioctl) {
+ case NS_GET_USERNS:
+ return open_related_ns(ns, ns_get_owner);
+ case NS_GET_PARENT:
+ if (!ns->ops->get_parent)
+ return -EINVAL;
+ return open_related_ns(ns, ns->ops->get_parent);
+ default:
+ return -ENOTTY;
+ }
+}
+
int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..234a9ac49958 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m)
read_sequnlock_excl(&mount_lock);
}
hlist_add_head(&child->mnt_hash, list);
- return 0;
+ return count_mounts(m->mnt_ns, child);
}
/*
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..550f5a8b4fcf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2ed3d71d4767..71025b9e2a4e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
static void drop_sysctl_table(struct ctl_table_header *header);
static int sysctl_follow_link(struct ctl_table_header **phead,
- struct ctl_table **pentry, struct nsproxy *namespaces);
+ struct ctl_table **pentry);
static int insert_links(struct ctl_table_header *head);
static void put_links(struct ctl_table_header *header);
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
}
static struct ctl_table_set *
-lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+lookup_header_set(struct ctl_table_root *root)
{
struct ctl_table_set *set = &root->default_set;
if (root->lookup)
- set = root->lookup(root, namespaces);
+ set = root->lookup(root);
return set;
}
@@ -496,7 +496,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
if (S_ISLNK(p->mode)) {
- ret = sysctl_follow_link(&h, &p, current->nsproxy);
+ ret = sysctl_follow_link(&h, &p);
err = ERR_PTR(ret);
if (ret)
goto out;
@@ -664,7 +664,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
if (S_ISLNK(table->mode)) {
/* It is not an error if we can not follow the link ignore it */
- int err = sysctl_follow_link(&head, &table, current->nsproxy);
+ int err = sysctl_follow_link(&head, &table);
if (err)
goto out;
}
@@ -981,7 +981,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
}
static int sysctl_follow_link(struct ctl_table_header **phead,
- struct ctl_table **pentry, struct nsproxy *namespaces)
+ struct ctl_table **pentry)
{
struct ctl_table_header *head;
struct ctl_table_root *root;
@@ -993,7 +993,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
ret = 0;
spin_lock(&sysctl_lock);
root = (*pentry)->data;
- set = lookup_header_set(root, namespaces);
+ set = lookup_header_set(root);
dir = xlate_dir(set, (*phead)->parent);
if (IS_ERR(dir))
ret = PTR_ERR(dir);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a4414a11eea7..440a72164a11 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -644,6 +644,7 @@ struct cgroup_namespace {
atomic_t count;
struct ns_common ns;
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
struct css_set *root_cset;
};
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index d10e54f03c09..848e5796400e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -58,6 +58,7 @@ struct ipc_namespace {
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
struct ns_common ns;
};
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 54a594d49733..1172cce949a4 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);
extern dev_t name_to_dev_t(const char *name);
+extern unsigned int sysctl_mount_max;
+
#endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 918b117a7cd3..34cce96741bc 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -40,6 +40,7 @@ struct pid_namespace {
struct fs_pin *bacct;
#endif
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
struct work_struct proc_work;
kgid_t pid_gid;
int hide_pid;
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index de0e7719d4c5..12cb8bd81d2d 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -18,6 +18,8 @@ struct proc_ns_operations {
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+ struct user_namespace *(*owner)(struct ns_common *ns);
+ struct ns_common *(*get_parent)(struct ns_common *ns);
};
extern const struct proc_ns_operations netns_operations;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ecc3e07c6e63..adf4e51cf597 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -158,8 +158,7 @@ struct ctl_table_set {
struct ctl_table_root {
struct ctl_table_set default_set;
- struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
- struct nsproxy *namespaces);
+ struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
void (*set_ownership)(struct ctl_table_header *head,
struct ctl_table *table,
kuid_t *uid, kgid_t *gid);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..eb209d4523f5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,19 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
+struct ucounts;
+
+enum ucount_type {
+ UCOUNT_USER_NAMESPACES,
+ UCOUNT_PID_NAMESPACES,
+ UCOUNT_UTS_NAMESPACES,
+ UCOUNT_IPC_NAMESPACES,
+ UCOUNT_NET_NAMESPACES,
+ UCOUNT_MNT_NAMESPACES,
+ UCOUNT_CGROUP_NAMESPACES,
+ UCOUNT_COUNTS,
+};
+
struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
@@ -39,10 +52,30 @@ struct user_namespace {
struct key *persistent_keyring_register;
struct rw_semaphore persistent_keyring_register_sem;
#endif
+ struct work_struct work;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_set set;
+ struct ctl_table_header *sysctls;
+#endif
+ struct ucounts *ucounts;
+ int ucount_max[UCOUNT_COUNTS];
+};
+
+struct ucounts {
+ struct hlist_node node;
+ struct user_namespace *ns;
+ kuid_t uid;
+ atomic_t count;
+ atomic_t ucount[UCOUNT_COUNTS];
};
extern struct user_namespace init_user_ns;
+bool setup_userns_sysctls(struct user_namespace *ns);
+void retire_userns_sysctls(struct user_namespace *ns);
+struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
+
#ifdef CONFIG_USER_NS
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -54,12 +87,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
extern int create_user_ns(struct cred *new);
extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
-extern void free_user_ns(struct user_namespace *ns);
+extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
if (ns && atomic_dec_and_test(&ns->count))
- free_user_ns(ns);
+ __put_user_ns(ns);
}
struct seq_operations;
@@ -73,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool current_in_userns(const struct user_namespace *target_ns);
+
+struct ns_common *ns_get_owner(struct ns_common *ns);
#else
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -106,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
{
return true;
}
+
+static inline struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+ return ERR_PTR(-EPERM);
+}
#endif
#endif /* _LINUX_USER_H */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 5093f58ae192..60f0bb83b313 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -24,6 +24,7 @@ struct uts_namespace {
struct kref kref;
struct new_utsname name;
struct user_namespace *user_ns;
+ struct ucounts *ucounts;
struct ns_common ns;
};
extern struct uts_namespace init_uts_ns;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0933c7455a30..fc4f757107df 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
struct list_head exit_list; /* Use only net_mutex */
struct user_namespace *user_ns; /* Owning user namespace */
+ struct ucounts *ucounts;
spinlock_t nsid_lock;
struct idr netns_ids;
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..3af617230d1b
--- /dev/null
+++ b/include/uapi/linux/nsfs.h
@@ -0,0 +1,13 @@
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+
+#endif /* __LINUX_NSFS_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index d87e6baa1323..0abdea496493 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,39 +16,61 @@
#include "util.h"
+static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
+}
+
+static void dec_ipc_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES);
+}
+
static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
struct ipc_namespace *old_ns)
{
struct ipc_namespace *ns;
+ struct ucounts *ucounts;
int err;
+ err = -ENOSPC;
+ ucounts = inc_ipc_namespaces(user_ns);
+ if (!ucounts)
+ goto fail;
+
+ err = -ENOMEM;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL)
- return ERR_PTR(-ENOMEM);
+ goto fail_dec;
err = ns_alloc_inum(&ns->ns);
- if (err) {
- kfree(ns);
- return ERR_PTR(err);
- }
+ if (err)
+ goto fail_free;
ns->ns.ops = &ipcns_operations;
atomic_set(&ns->count, 1);
ns->user_ns = get_user_ns(user_ns);
+ ns->ucounts = ucounts;
err = mq_init_ns(ns);