summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-01-06 11:02:29 +0100
committerIngo Molnar <mingo@kernel.org>2016-01-06 11:02:29 +0100
commit567bee2803cb46caeb6011de5b738fde33dc3896 (patch)
tree05bab01377bffa356bfbe06c4b6193b23b7c24ca /kernel
parentaa0b7ae06387d40a988ce16a189082dee6e570bc (diff)
parent093e5840ae76f1082633503964d035f40ed0216d (diff)
downloadlinux-567bee2803cb46caeb6011de5b738fde33dc3896.tar.gz
linux-567bee2803cb46caeb6011de5b738fde33dc3896.tar.bz2
linux-567bee2803cb46caeb6011de5b738fde33dc3896.zip
Merge branch 'sched/urgent' into sched/core, to pick up fixes before merging new patches
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c10
-rw-r--r--kernel/bpf/hashtab.c34
-rw-r--r--kernel/bpf/inode.c6
-rw-r--r--kernel/bpf/syscall.c40
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/cgroup.c99
-rw-r--r--kernel/cgroup_freezer.c23
-rw-r--r--kernel/cgroup_pids.c77
-rw-r--r--kernel/cpuset.c33
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c90
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/osq_lock.c8
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/sched/clock.c2
-rw-r--r--kernel/sched/core.c12
-rw-r--r--kernel/sched/fair.c4
-rw-r--r--kernel/sched/wait.c20
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_events.c16
-rw-r--r--kernel/trace/trace_printk.c1
29 files changed, 323 insertions, 206 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3f4c99e06c6b..b0799bced518 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
attr->value_size == 0)
return ERR_PTR(-EINVAL);
+ if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return ERR_PTR(-E2BIG);
+
elem_size = round_up(attr->value_size, 8);
/* check round_up into zero and u32 overflow */
if (elem_size == 0 ||
- attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
+ attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
return ERR_PTR(-ENOMEM);
array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
/* all elements already exist */
return -EEXIST;
- memcpy(array->value + array->elem_size * index, value, array->elem_size);
+ memcpy(array->value + array->elem_size * index, value, map->value_size);
return 0;
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 19909b22b4f8..34777b3746fa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
goto free_htab;
- err = -ENOMEM;
+ if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+ MAX_BPF_STACK - sizeof(struct htab_elem))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements via bpf syscall. This check also makes
+ * sure that the elem_size doesn't overflow and it's
+ * kmalloc-able later in htab_map_update_elem()
+ */
+ goto free_htab;
+
+ htab->elem_size = sizeof(struct htab_elem) +
+ round_up(htab->map.key_size, 8) +
+ htab->map.value_size;
+
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
goto free_htab;
+ if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
+ (u64) htab->elem_size * htab->map.max_entries >=
+ U32_MAX - PAGE_SIZE)
+ /* make sure page count doesn't overflow */
+ goto free_htab;
+
+ htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+ htab->elem_size * htab->map.max_entries,
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ err = -ENOMEM;
htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
GFP_USER | __GFP_NOWARN);
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->lock);
htab->count = 0;
- htab->elem_size = sizeof(struct htab_elem) +
- round_up(htab->map.key_size, 8) +
- htab->map.value_size;
-
- htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
- htab->elem_size * htab->map.max_entries,
- PAGE_SIZE) >> PAGE_SHIFT;
return &htab->map;
free_htab:
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
WARN_ON_ONCE(!rcu_read_lock_held());
/* allocate new element outside of lock */
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
+ l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return -ENOMEM;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726e31c9..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
break;
case BPF_TYPE_MAP:
- atomic_inc(&((struct bpf_map *)raw)->refcnt);
+ bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
bpf_prog_put(raw);
break;
case BPF_TYPE_MAP:
- bpf_map_put(raw);
+ bpf_map_put_with_uref(raw);
break;
default:
WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
void *raw;
*type = BPF_TYPE_MAP;
- raw = bpf_map_get(ufd);
+ raw = bpf_map_get_with_uref(ufd);
if (IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..3b39550d8485 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
map->ops->map_free(map);
}
+static void bpf_map_put_uref(struct bpf_map *map)
+{
+ if (atomic_dec_and_test(&map->usercnt)) {
+ if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+ bpf_fd_array_map_clear(map);
+ }
+}
+
/* decrement map refcnt and schedule it for freeing via workqueue
* (unrelying map implementation ops->map_free() might sleep)
*/
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
}
}
-static int bpf_map_release(struct inode *inode, struct file *filp)
+void bpf_map_put_with_uref(struct bpf_map *map)
{
- struct bpf_map *map = filp->private_data;
-
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
- /* prog_array stores refcnt-ed bpf_prog pointers
- * release them all when user space closes prog_array_fd
- */
- bpf_fd_array_map_clear(map);
-
+ bpf_map_put_uref(map);
bpf_map_put(map);
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+ bpf_map_put_with_uref(filp->private_data);
return 0;
}
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
return PTR_ERR(map);
atomic_set(&map->refcnt, 1);
+ atomic_set(&map->usercnt, 1);
err = bpf_map_charge_memlock(map);
if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-struct bpf_map *bpf_map_get(u32 ufd)
+void bpf_map_inc(struct bpf_map *map, bool uref)
+{
+ atomic_inc(&map->refcnt);
+ if (uref)
+ atomic_inc(&map->usercnt);
+}
+
+struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
if (IS_ERR(map))
return map;
- atomic_inc(&map->refcnt);
+ bpf_map_inc(map, true);
fdput(f);
return map;
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
- value = kmalloc(map->value_size, GFP_USER);
+ value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
- value = kmalloc(map->value_size, GFP_USER);
+ value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6073056badf..a7945d10b378 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_bpf_prog_info()
*/
- atomic_inc(&map->refcnt);
-
+ bpf_map_inc(map, false);
fdput(f);
next_insn:
insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1603c153890..470f6536b9e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -98,6 +98,12 @@ static DEFINE_SPINLOCK(css_set_lock);
static DEFINE_SPINLOCK(cgroup_idr_lock);
/*
+ * Protects cgroup_file->kn for !self csses. It synchronizes notifications
+ * against file removal/re-creation across css hiding.
+ */
+static DEFINE_SPINLOCK(cgroup_file_kn_lock);
+
+/*
* Protects cgroup_subsys->release_agent_path. Modifying it also requires
* cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
*/
@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
if (!atomic_dec_and_test(&cset->refcount))
return;
- /* This css_set is dead. unlink it and release cgroup refcounts */
- for_each_subsys(ss, ssid)
+ /* This css_set is dead. unlink it and release cgroup and css refs */
+ for_each_subsys(ss, ssid) {
list_del(&cset->e_cset_node[ssid]);
+ css_put(cset->subsys[ssid]);
+ }
hash_del(&cset->hlist);
css_set_count--;
@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
- for_each_subsys(ss, ssid)
+ for_each_subsys(ss, ssid) {
+ struct cgroup_subsys_state *css = cset->subsys[ssid];
+
list_add_tail(&cset->e_cset_node[ssid],
- &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+ &css->cgroup->e_csets[ssid]);
+ css_get(css);
+ }
spin_unlock_bh(&css_set_lock);
@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
char name[CGROUP_FILE_NAME_MAX];
lockdep_assert_held(&cgroup_mutex);
+
+ if (cft->file_offset) {
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
+ struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+ spin_lock_irq(&cgroup_file_kn_lock);
+ cfile->kn = NULL;
+ spin_unlock_irq(&cgroup_file_kn_lock);
+ }
+
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
}
@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
- INIT_LIST_HEAD(&cgrp->self.files);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
@@ -2216,6 +2237,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;
+ /* the subsys currently being processed */
+ int ssid;
+
/*
* Fields for cgroup_taskset_*() iteration.
*
@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
*
* @tset iteration is initialized and the first task is returned.
*/
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
- return cgroup_taskset_next(tset);
+ return cgroup_taskset_next(tset, dst_cssp);
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
+
+ /*
+ * This function may be called both before and
+ * after cgroup_taskset_migrate(). The two cases
+ * can be distinguished by looking at whether @cset
+ * has its ->mg_dst_cset set.
+ */
+ if (cset->mg_dst_cset)
+ *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
+ else
+ *dst_cssp = cset->subsys[tset->ssid];
+
return task;
}
@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->can_attach) {
- ret = css->ss->can_attach(css, tset);
+ tset->ssid = i;
+ ret = css->ss->can_attach(tset);
if (ret) {
failed_css = css;
goto out_cancel_attach;
@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/
tset->csets = &tset->dst_csets;
- for_each_e_css(css, i, dst_cgrp)
- if (css->ss->attach)
- css->ss->attach(css, tset);
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css->ss->attach) {
+ tset->ssid = i;
+ css->ss->attach(tset);
+ }
+ }
ret = 0;
goto out_release_tset;
@@ -2390,8 +2434,10 @@ out_cancel_attach:
for_each_e_css(css, i, dst_cgrp) {
if (css == failed_css)
break;
- if (css->ss->cancel_attach)
- css->ss->cancel_attach(css, tset);
+ if (css->ss->cancel_attach) {
+ tset->ssid = i;
+ css->ss->cancel_attach(tset);
+ }
}
out_release_tset:
spin_lock_bh(&css_set_lock);
@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;
- kernfs_get(kn);
+ spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
- list_add(&cfile->node, &css->files);
+ spin_unlock_irq(&cgroup_file_kn_lock);
}
return 0;
@@ -3553,6 +3599,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
}
/**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+void cgroup_file_notify(struct cgroup_file *cfile)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+ if (cfile->kn)
+ kernfs_notify(cfile->kn);
+ spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
+}
+
+/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
- struct cgroup_file *cfile;
percpu_ref_exit(&css->refcnt);
- list_for_each_entry(cfile, &css->files, node)
- kernfs_put(cfile->kn);
-
if (ss) {
/* css free path */
int id = css->id;
@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->ss = ss;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
- INIT_LIST_HEAD(&css->files);
css->serial_nr = css_serial_nr_next++;
if (cgroup_parent(cgrp)) {
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f1b30ad5dc6d..2d3df82c54f2 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
-static void freezer_attach(struct cgroup_subsys_state *new_css,
- struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup_taskset *tset)
{
- struct freezer *freezer = css_freezer(new_css);
struct task_struct *task;
- bool clear_frozen = false;
+ struct cgroup_subsys_state *new_css;
mutex_lock(&freezer_mutex);
@@ -174,22 +172,21 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, new_css, tset) {
+ struct freezer *freezer = css_freezer(new_css);
+
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
- freezer->state &= ~CGROUP_FROZEN;
- clear_frozen = true;
+ /* clear FROZEN and propagate upwards */
+ while (freezer && (freezer->state & CGROUP_FROZEN)) {
+ freezer->state &= ~CGROUP_FROZEN;
+ freezer = parent_freezer(freezer);
+ }
}
}
- /* propagate FROZEN clearing upwards */
- while (clear_frozen && (freezer = parent_freezer(freezer))) {
- freezer->state &= ~CGROUP_FROZEN;
- clear_frozen = freezer->state & CGROUP_FREEZING;
- }
-
mutex_unlock(&freezer_mutex);
}
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index cdd8df4e991c..b50d5a167fda 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
- for (p = pids; p; p = parent_pids(p))
+ for (p = pids; parent_pids(p); p = parent_pids(p))
pids_cancel(p, num);
}
@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
- for (p = pids; p; p = parent_pids(p))
+ for (p = pids; parent_pids(p); p = parent_pids(p))
atomic64_add(num, &p->counter);
}
@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p, *q;
- for (p = pids; p; p = parent_pids(p)) {
+ for (p = pids; parent_pids(p); p = parent_pids(p)) {
int64_t new = atomic64_add_return(num, &p->counter);
/*
@@ -162,13 +162,13 @@ revert:
return -EAGAIN;
}
-static int pids_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int pids_can_attach(struct cgroup_taskset *tset)
{
- struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
return 0;
}
-static void pids_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void pids_cancel_attach(struct cgroup_taskset *tset)
{
- struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
}
}
+/*
+ * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
+ * on threadgroup_change_begin() held by the copy_process().
+ */
static int pids_can_fork(struct task_struct *task, void **priv_p)
{
struct cgroup_subsys_state *css;
struct pids_cgroup *pids;
- int err;
- /*
- * Use the "current" task_css for the pids subsystem as the tentative
- * css. It is possible we will charge the wrong hierarchy, in which
- * case we will forcefully revert/reapply the charge on the right
- * hierarchy after it is committed to the task proper.
- */
- css = task_get_css(current, pids_cgrp_id);
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
-
- err = pids_try_charge(pids, 1);
- if (err)
- goto err_css_put;
-
- *priv_p = css;
- return 0;
-
-err_css_put:
- css_put(css);
- return err;
+ return pids_try_charge(pids, 1);
}
static void pids_cancel_fork(struct task_struct *task, void *priv)
{
- struct cgroup_subsys_state *css = priv;
- struct pids_cgroup *pids = css_pids(css);
-
- pids_uncharge(pids, 1);
- css_put(css);
-}
-
-static void pids_fork(struct task_struct *task, void *priv)
-{
struct cgroup_subsys_state *css;
- struct cgroup_subsys_state *old_css = priv;
struct pids_cgroup *pids;
- struct pids_cgroup *old_pids = css_pids(old_css);
- css = task_get_css(task, pids_cgrp_id);
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
-
- /*
- * If the association has changed, we have to revert and reapply the
- * charge/uncharge on the wrong hierarchy to the current one. Since
- * the association can only change due to an organisation event, its
- * okay for us to ignore the limit in this case.
- */
- if (pids != old_pids) {
- pids_uncharge(old_pids, 1);
- pids_charge(pids, 1);
- }
-
- css_put(css);
- css_put(old_css);
+ pids_uncharge(pids, 1);
}
static void pids_free(struct task_struct *task)
@@ -335,6 +298,7 @@ static struct cftype pids_files[] = {
{
.name = "current",
.read_s64 = pids_current_read,
+ .flags = CFTYPE_NOT_ON_ROOT,
},
{ } /* terminate */
};
@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork,
- .fork = pids_fork,
.free = pids_free,
.legacy_cftypes = pids_files,
.dfl_cftypes = pids_files,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10ae73611d80..02a8ea5c9963 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup_taskset *tset)
{
- struct cpuset *cs = css_cs(css);
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
struct task_struct *task;
int ret;
/* used later by cpuset_attach() */
- cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset));
+ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
+ cs = css_cs(css);
mutex_lock(&cpuset_mutex);
@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task, cs->cpus_allowed);
if (ret)
goto out_unlock;
@@ -1467,9 +1468,14 @@ out_unlock:
return ret;
}
-static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
+
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
mutex_lock(&cpuset_mutex);
css_cs(css)->attach_in_progress--;
mutex_unlock(&cpuset_mutex);
@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
*/
static cpumask_var_t cpus_attach;
-static void cpuset_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup_taskset *tset)
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
- struct cpuset *cs = css_cs(css);
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs;
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
mutex_lock(&cpuset_mutex);
/* prepare for attach */
@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
* sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- cgroup_taskset_for_each_leader(leader, tset) {
+ cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader);
if (mm) {
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index d659487254d5..9c418002b8c1 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 36babfd20648..ef2d6ea10736 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
@@ -435,7 +435,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
if (!is_cgroup_event(event))
return;
- cgrp = perf_cgroup_from_task(current);
+ cgrp = perf_cgroup_from_task(current, event->ctx);
/*
* Do not update time when cgroup is not active
*/
@@ -458,7 +458,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
if (!task || !ctx->nr_cgroups)
return;
- cgrp = perf_cgroup_from_task(task);
+ cgrp = perf_cgroup_from_task(task, ctx);
info = this_cpu_ptr(cgrp->info);
info->timestamp = ctx->timestamp;
}
@@ -489,7 +489,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
* we reschedule only in the presence of cgroup
* constrained events.
*/
- rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -522,8 +521,10 @@ static void perf_cgroup_swi