diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 17:51:34 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 17:51:34 -0800 |
| commit | f075e0f6993f41c72dbb1d3e7a2d7740f14e89e2 (patch) | |
| tree | a25b464a67fffc6f43940e0e85e2735a48bb1ad7 | |
| parent | 5cb7398caf69e3943df78435a19a8a77fe8b9463 (diff) | |
| parent | dd4b0a4676907481256d16d5de0851b315a6f22c (diff) | |
| download | linux-f075e0f6993f41c72dbb1d3e7a2d7740f14e89e2.tar.gz linux-f075e0f6993f41c72dbb1d3e7a2d7740f14e89e2.tar.bz2 linux-f075e0f6993f41c72dbb1d3e7a2d7740f14e89e2.zip | |
Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"The bulk of changes are cleanups and preparations for the upcoming
kernfs conversion.
- cgroup_event mechanism which is and will be used only by memcg is
moved to memcg.
- pidlist handling is updated so that it can be served by seq_file.
Also, the list is not sorted if sane_behavior. cgroup
documentation explicitly states that the file is not sorted but it
has been for quite some time.
- All cgroup file handling now happens on top of seq_file. This is
to prepare for kernfs conversion. In addition, all operations are
restructured so that they map 1-1 to kernfs operations.
- Other cleanups and low-pri fixes"
* 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (40 commits)
cgroup: trivial style updates
cgroup: remove stray references to css_id
doc: cgroups: Fix typo in doc/cgroups
cgroup: fix fail path in cgroup_load_subsys()
cgroup: fix missing unlock on error in cgroup_load_subsys()
cgroup: remove for_each_root_subsys()
cgroup: implement for_each_css()
cgroup: factor out cgroup_subsys_state creation into create_css()
cgroup: combine css handling loops in cgroup_create()
cgroup: reorder operations in cgroup_create()
cgroup: make for_each_subsys() useable under cgroup_root_mutex
cgroup: css iterations and css_from_dir() are safe under cgroup_mutex
cgroup: unify pidlist and other file handling
cgroup: replace cftype->read_seq_string() with cftype->seq_show()
cgroup: attach cgroup_open_file to all cgroup files
cgroup: generalize cgroup_pidlist_open_file
cgroup: unify read path so that seq_file is always used
cgroup: unify cgroup_write_X64() and cgroup_write_string()
cgroup: remove cftype->read(), ->read_map() and ->write()
hugetlb_cgroup: convert away from cftype->read()
...
| -rw-r--r-- | Documentation/cgroups/cgroups.txt | 20 | ||||
| -rw-r--r-- | Documentation/cgroups/memory.txt | 4 | ||||
| -rw-r--r-- | Documentation/cgroups/resource_counter.txt | 4 | ||||
| -rw-r--r-- | block/blk-throttle.c | 35 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 131 | ||||
| -rw-r--r-- | drivers/md/bcache/request.c | 1 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 112 | ||||
| -rw-r--r-- | include/linux/vmpressure.h | 8 | ||||
| -rw-r--r-- | init/Kconfig | 3 | ||||
| -rw-r--r-- | kernel/cgroup.c | 1202 | ||||
| -rw-r--r-- | kernel/cgroup_freezer.c | 7 | ||||
| -rw-r--r-- | kernel/cpuset.c | 71 | ||||
| -rw-r--r-- | kernel/sched/core.c | 13 | ||||
| -rw-r--r-- | kernel/sched/cpuacct.c | 18 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 22 | ||||
| -rw-r--r-- | mm/memcontrol.c | 426 | ||||
| -rw-r--r-- | mm/page_cgroup.c | 2 | ||||
| -rw-r--r-- | mm/vmpressure.c | 26 | ||||
| -rw-r--r-- | net/core/netprio_cgroup.c | 8 | ||||
| -rw-r--r-- | security/device_cgroup.c | 7 |
20 files changed, 1022 insertions, 1098 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 638bf17ff869..821de56d1580 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -24,7 +24,6 @@ CONTENTS: 2.1 Basic Usage 2.2 Attaching processes 2.3 Mounting hierarchies by name - 2.4 Notification API 3. Kernel API 3.1 Overview 3.2 Synchronization @@ -472,25 +471,6 @@ you give a subsystem a name. The name of the subsystem appears as part of the hierarchy description in /proc/mounts and /proc/<pid>/cgroups. -2.4 Notification API --------------------- - -There is mechanism which allows to get notifications about changing -status of a cgroup. - -To register a new notification handler you need to: - - create a file descriptor for event notification using eventfd(2); - - open a control file to be monitored (e.g. memory.usage_in_bytes); - - write "<event_fd> <control_fd> <args>" to cgroup.event_control. - Interpretation of args is defined by control file implementation; - -eventfd will be woken up by control file implementation or when the -cgroup is removed. - -To unregister a notification handler just close eventfd. - -NOTE: Support of notifications should be implemented for the control -file. See documentation for the subsystem. 3. Kernel API ============= diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index e2bc132608fd..2622115276aa 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable" per-node page counts including "hierarchical_<counter>" which sums up all hierarchical children's values in addition to the memcg's own value. -The ouput format of memory.numa_stat is: +The output format of memory.numa_stat is: total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... @@ -670,7 +670,7 @@ page tables. 8.1 Interface -This feature is disabled by default. It can be enabledi (and disabled again) by +This feature is disabled by default. It can be enabled (and disabled again) by writing to memory.move_charge_at_immigrate of the destination cgroup. If you want to enable it: diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index c4d99ed0b418..52e1da16a309 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt @@ -97,8 +97,8 @@ to work with it. (struct res_counter *rc, struct res_counter *top, unsinged long val) - Almost same as res_cunter_uncharge() but propagation of uncharge - stops when rc == top. This is useful when kill a res_coutner in + Almost same as res_counter_uncharge() but propagation of uncharge + stops when rc == top. This is useful when kill a res_counter in child cgroup. 2.1 Other accounting routines diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 06534049afba..a760857e6b62 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, return __blkg_prfill_rwstat(sf, pd, &rwstat); } -static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_cpu_rwstat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, - cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat, + &blkcg_policy_throtl, seq_cft(sf)->private, true); return 0; } @@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, return __blkg_prfill_u64(sf, pd, v); } -static int tg_print_conf_u64(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_conf_u64(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, - &blkcg_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64, + &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } -static int tg_print_conf_uint(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int tg_print_conf_uint(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, - &blkcg_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint, + &blkcg_policy_throtl, seq_cft(sf)->private, false); return 0; } @@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = { { .name = "throttle.read_bps_device", .private = offsetof(struct throtl_grp, bps[READ]), - .read_seq_string = tg_print_conf_u64, + .seq_show = tg_print_conf_u64, .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.write_bps_device", .private = offsetof(struct throtl_grp, bps[WRITE]), - .read_seq_string = tg_print_conf_u64, + .seq_show = tg_print_conf_u64, .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.read_iops_device", .private = offsetof(struct throtl_grp, iops[READ]), - .read_seq_string = tg_print_conf_uint, + .seq_show = tg_print_conf_uint, .write_string = tg_set_conf_uint, .max_write_len = 256, }, { .name = "throttle.write_iops_device", .private = offsetof(struct throtl_grp, iops[WRITE]), - .read_seq_string = tg_print_conf_uint, + .seq_show = tg_print_conf_uint, .write_string = tg_set_conf_uint, .max_write_len = 256, }, { .name = "throttle.io_service_bytes", .private = offsetof(struct tg_stats_cpu, service_bytes), - .read_seq_string = tg_print_cpu_rwstat, + .seq_show = tg_print_cpu_rwstat, }, { .name = "throttle.io_serviced", .private = offsetof(struct tg_stats_cpu, serviced), - .read_seq_string = tg_print_cpu_rwstat, + .seq_show = tg_print_cpu_rwstat, }, { } /* terminate */ }; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4d5cec1ad80d..744833b630c6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); } -static int cfqg_print_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_weight_device(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_weight_device, &blkcg_policy_cfq, + 0, false); return 0; } @@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); } -static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, - struct seq_file *sf) +static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v) { - blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, + 0, false); return 0; } -static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, - struct seq_file *sf) +static int cfq_print_weight(struct seq_file *sf, void *v) { - seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); + seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); return 0; } -static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfq_print_leaf_weight(struct seq_file *sf, void *v) { - seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); + seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); return 0; } @@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css, return __cfq_set_weight(css, cft, val, true); } -static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, - struct seq_file *sf) +static int cfqg_print_stat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq, - cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, + &blkcg_policy_cfq, seq_cft(sf)->private, false); return 0; } -static int cfqg_print_rwstat(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_rwstat(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq, - cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, + &blkcg_policy_cfq, seq_cft(sf)->private, true); return 0; } @@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, return __blkg_prfill_rwstat(sf, pd, &sum); } -static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_stat_recursive(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, - &blkcg_policy_cfq, cft->private, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_stat_recursive, &blkcg_policy_cfq, + seq_cft(sf)->private, false); return 0; } -static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, - &blkcg_policy_cfq, cft->private, true); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq, + seq_cft(sf)->private, true); return 0; } @@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, } /* print avg_queue_size */ -static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *sf) +static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v) { - struct blkcg *blkcg = css_to_blkcg(css); - - blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, - &blkcg_policy_cfq, 0, false); + blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), + cfqg_prfill_avg_queue_size, &blkcg_policy_cfq, + 0, false); return 0; } #endif /* CONFIG_DEBUG_BLK_CGROUP */ @@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = { { .name = "weight_device", .flags = CFTYPE_ONLY_ON_ROOT, - .read_seq_string = cfqg_print_leaf_weight_device, + .seq_show = cfqg_print_leaf_weight_device, .write_string = cfqg_set_leaf_weight_device, .max_write_len = 256, }, { .name = "weight", .flags = CFTYPE_ONLY_ON_ROOT, - .read_seq_string = cfq_print_leaf_weight, + .seq_show = cfq_print_leaf_weight, .write_u64 = cfq_set_leaf_weight, }, @@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = { { .name = "weight_device", .flags = CFTYPE_NOT_ON_ROOT, - .read_seq_string = cfqg_print_weight_device, + .seq_show = cfqg_print_weight_device, .write_string = cfqg_set_weight_device, .max_write_len = 256, }, { .name = "weight", .flags = CFTYPE_NOT_ON_ROOT, - .read_seq_string = cfq_print_weight, + .seq_show = cfq_print_weight, .write_u64 = cfq_set_weight, }, { .name = "leaf_weight_device", - .read_seq_string = cfqg_print_leaf_weight_device, + .seq_show = cfqg_print_leaf_weight_device, .write_string = cfqg_set_leaf_weight_device, .max_write_len = 256, }, { .name = "leaf_weight", - .read_seq_string = cfq_print_leaf_weight, + .seq_show = cfq_print_leaf_weight, .write_u64 = cfq_set_leaf_weight, }, @@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = { { .name = "time", .private = offsetof(struct cfq_group, stats.time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "sectors", .private = offsetof(struct cfq_group, stats.sectors), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "io_service_bytes", .private = offsetof(struct cfq_group, stats.service_bytes), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_serviced", .private = offsetof(struct cfq_group, stats.serviced), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_service_time", .private = offsetof(struct cfq_group, stats.service_time), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_wait_time", .private = offsetof(struct cfq_group, stats.wait_time), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_merged", .private = offsetof(struct cfq_group, stats.merged), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, { .name = "io_queued", .private = offsetof(struct cfq_group, stats.queued), - .read_seq_string = cfqg_print_rwstat, + .seq_show = cfqg_print_rwstat, }, /* the same statictics which cover the cfqg and its descendants */ { .name = "time_recursive", .private = offsetof(struct cfq_group, stats.time), - .read_seq_string = cfqg_print_stat_recursive, + .seq_show = cfqg_print_stat_recursive, }, { .name = "sectors_recursive", .private = offsetof(struct cfq_group, stats.sectors), - .read_seq_string = cfqg_print_stat_recursive, + .seq_show = cfqg_print_stat_recursive, }, { .name = "io_service_bytes_recursive", .private = offsetof(struct cfq_group, stats.service_bytes), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_serviced_recursive", .private = offsetof(struct cfq_group, stats.serviced), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_service_time_recursive", .private = offsetof(struct cfq_group, stats.service_time), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_wait_time_recursive", .private = offsetof(struct cfq_group, stats.wait_time), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_merged_recursive", .private = offsetof(struct cfq_group, stats.merged), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, { .name = "io_queued_recursive", .private = offsetof(struct cfq_group, stats.queued), - .read_seq_string = cfqg_print_rwstat_recursive, + .seq_show = cfqg_print_rwstat_recursive, }, #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "avg_queue_size", - .read_seq_string = cfqg_print_avg_queue_size, + .seq_show = cfqg_print_avg_queue_size, }, { .name = "group_wait_time", .private = offsetof(struct cfq_group, stats.group_wait_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "idle_time", .private = offsetof(struct cfq_group, stats.idle_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "empty_time", .private = offsetof(struct cfq_group, stats.empty_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "dequeue", .private = offsetof(struct cfq_group, stats.dequeue), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, { .name = "unaccounted_time", .private = offsetof(struct cfq_group, stats.unaccounted_time), - .read_seq_string = cfqg_print_stat, + .seq_show = cfqg_print_stat, }, #endif /* CONFIG_DEBUG_BLK_CGROUP */ { } /* terminate */ diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fbcc851ed5a5..61bcfc21d2a0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) static void bcachecg_destroy(struct cgroup *cgroup) { struct bch_cgroup *cg = cgroup_to_bcache(cgroup); - free_css_id(&bcache_subsys, &cg->css); kfree(cg); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 39c1d9469677..5c097596104b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -21,6 +21,7 @@ #include <linux/xattr.h> #include <linux/fs.h> #include <linux/percpu-refcount.h> +#include <linux/seq_file.h> #ifdef CONFIG_CGROUPS @@ -28,8 +29,6 @@ struct cgroupfs_root; struct cgroup_subsys; struct inode; struct cgroup; -struct css_id; -struct eventfd_ctx; extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -79,8 +78,6 @@ struct cgroup_subsys_state { struct cgroup_subsys_state *parent; unsigned long flags; - /* ID for this css, if possible */ - struct css_id __rcu *id; /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; @@ -239,10 +236,6 @@ struct cgroup { struct rcu_head rcu_head; struct work_struct destroy_work; - /* List of events which userspace want to receive */ - struct list_head event_list; - spinlock_t event_list_lock; - /* directory xattrs */ struct simple_xattrs xattrs; }; @@ -280,6 +273,9 @@ enum { * - "tasks" is removed. Everything should be at process * granularity. Use "cgroup.procs" instead. * + * - "cgroup.procs" is not sorted. pids will be unique unless they + * got recycled inbetween reads. + * * - "release_agent" and "notify_on_release" are removed. * Replacement notification mechanism will be implemented. * @@ -320,9 +316,6 @@ struct cgroupfs_root { /* Unique id for this hierarchy. */ int hierarchy_id; - /* A list running through the attached subsystems */ - struct list_head subsys_list; - /* The root cgroup for this hierarchy */ struct cgroup top_cgroup; @@ -389,16 +382,6 @@ struct css_set { }; /* - * cgroup_map_cb is an abstract callback API for reporting map-valued - * control files - */ - -struct cgroup_map_cb { - int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); - void *state; -}; - -/* * struct cftype: handler definitions for cgroup control files * * When reading/writing to a file: @@ -445,10 +428,6 @@ struct cftype { */ struct cgroup_subsys *ss; - int (*open)(struct inode *inode, struct file *file); - ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, loff_t *ppos); /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() @@ -458,24 +437,14 @@ struct cftype { * read_s64() is a signed version of read_u64() */ s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); - /* - * read_map() is used for defining a map of key/value - * pairs. It should call cb->fill(cb, key, value) for each - * entry. The key/value pairs (and their ordering) should not - * change between reboots. - */ - int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, - struct cgroup_map_cb *cb); - /* - * read_seq_string() is used for outputting a simple sequence - * using seqfile. - */ - int (*read_seq_string)(struct cgroup_subsys_state *css, - struct cftype *cft, struct seq_file *m); - ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, - struct file *file, - const char __user *buf, size_t nbytes, loff_t *ppos); + /* generic seq_file read interface */ + int (*seq_show)(struct seq_file *sf, void *v); + + /* optional ops, implement all or none */ + void *(*seq_start)(struct seq_file *sf, loff_t *ppos); + void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); + void (*seq_stop)(struct seq_file *sf, void *v); /* * write_u64() is a shortcut for the common case of accepting @@ -504,27 +473,6 @@ struct cftype { * kick type for multiplexing. */ int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); - - int (*release)(struct inode *inode, struct file *file); - - /* - * register_event() callback will be used to add new userspace - * waiter for changes related to the cftype. Implement it if - * you want to provide this functionality. Use eventfd_signal() - * on eventfd to send notification to userspace. - */ - int (*register_event)(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd, - const char *args); - /* - * unregister_event() callback will be called when userspace - * closes the eventfd or on cgroup removing. - * This callback must be implemented, if you want provide - * notification functionality. - */ - void (*unregister_event)(struct cgroup_subsys_state *css, - struct cftype *cft, - struct eventfd_ctx *eventfd); }; /* @@ -538,6 +486,26 @@ struct cftype_set { }; /* + * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't + * access directly. + */ +struct cfent { + struct list_head node; + struct dentry *dentry; + struct cftype *type; + struct cgroup_subsys_state *css; + + /* file xattrs */ + struct simple_xattrs xattrs; +}; + +/* seq_file->private points to the following, only ->priv is public */ +struct cgroup_open_file { + struct cfent *cfe; + void *priv; +}; + +/* * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This * function can be called as long as @cgrp is accessible. */ @@ -552,6 +520,18 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) return rcu_dereference(cgrp->name)->name; } +static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) +{ + struct cgroup_open_file *of = seq->private; + return of->cfe->css; +} + +static inline struct cftype *seq_cft(struct seq_file *seq) +{ + struct cgroup_open_file *of = seq->private; + return of->cfe->type; +} + int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); @@ -631,12 +611,8 @@ struct cgroup_subsys { #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; - /* - * Link to parent, and list entry in parent's children. - * Protected by cgroup_lock() - */ + /* link to parent, protected by cgroup_lock() */ struct cgroupfs_root *root; - struct list_head sibling; /* list of cftype_sets */ struct list_head cftsets; diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3f3788d49362..3e4535876d37 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -7,6 +7,7 @@ #include <linux/gfp.h> #include <linux/types.h> #include <linux/cgroup.h> +#include <linux/eventfd.h> struct vmpressure { unsigned long scanned; @@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr); extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); -extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); -extern int vmpressure_register_event(struct cgroup_subsys_state *css, - struct cftype *cft, +extern int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args); -extern void vmpressure_unregister_event(struct cgroup_subsys_state *css, - struct cftype *cft, +extern void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd); #else static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, diff --git a/init/Kconfig b/init/Kconfig index 5236dc562a36..8d402e33b7fc 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -854,7 +854,6 @@ config NUMA_BALANCING menuconfig CGROUPS boolean "Control Group support" - depends on EVENTFD help This option adds support for grouping sets of processes together, for use with process control subsystems such as Cpusets, CFS, memory @@ -921,6 +920,7 @@ config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS select MM_OWNER + select EVENTFD help Provides a memory resource controller that manages both anonymous memory and page cache. (See Documentation/cgroups/memory.txt) @@ -1160,7 +1160,6 @@ config UIDGID_STRICT_TYPE_CHECKS config SCHED_AUTOGROUP bool "Automatic process group scheduling" - select EVENTFD select CGROUPS select CGROUP_SCHED select FAIR_GROUP_SCHED diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bc1dcabe9217..e2f46ba37f72 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -41,7 +41,6 @@ #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/backing-dev.h> -#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/magic.h> #include <linux/spinlock.h> @@ -56,15 +55,20 @@ #include <linux/pid_namespace.h> #include <linux/idr.h> #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ -#include <linux/eventfd.h> -#include <linux/poll.h> #include <linux/flex_array.h> /* used in cgroup_attach_task */ #include <linux/kthread.h> -#include <linux/file.h> #include <linux/atomic.h> /* + * pidlists linger the following amount before being destroyed. The goal + * is avoiding frequent destruction in the middle of consecutive read calls + * Expiring in the middle is a performance problem not a correctness one. + * 1 sec should be enough. + */ +#define CGROUP_PIDLIST_DESTROY_DELAY HZ + +/* * cgroup_mutex is the master lock. Any modification to cgroup or its * hierarchy must be performed while holding it. * @@ -89,6 +93,19 @@ static DEFINE_MUTEX(cgroup_mutex); static DEFINE_MUTEX(cgroup_root_mutex); +#define cgroup_assert_mutex_or_rcu_locked() \ + rcu_lockdep_assert(rcu_read_lock_held() || \ + lockdep_is_held(&cgroup_mutex), \ + "cgroup_mutex or RCU read lock required"); + +#ifdef CONFIG_LOCKDEP +#define cgroup_assert_mutex_or_root_locked() \ + WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&cgroup_mutex) && \ + !lockdep_is_held(&cgroup_root_mutex))) +#else +#define cgroup_assert_mutex_or_root_locked() do { } while (0) +#endif + /* * cgroup destruction makes heavy use of work items and there can be a lot * of concurrent destructions. Use a separate workqueue so that cgroup @@ -98,6 +115,12 @@ static DEFINE_MUTEX(cgroup_root_mutex); static struct workqueue_struct *cgroup_destroy_wq; /* + * pidlist destructions need to be flushed on cgroup destruction. Use a + * separate workqueue as flush domain. + */ +static struct workqueue_struct *cgroup_pidlist_destroy_wq; + +/* * Generate an array of cgroup subsystem pointers. At boot time, this is * populated with the built in subsystems, and modular subsystems are * registered after that. The mutable section of this array is protected by @@ -119,49 +142,6 @@ static struct cgroupfs_root cgroup_dummy_root; /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; |
