From 749d811f10a410b64cf4c674c498ec04316ec373 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Jun 2010 20:19:28 +1000 Subject: padata: add parenthesis in MAX_SEQ_NR macro MAX_SEQ_NR is used in padata_alloc_pd() like this: pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; It needs parenthesis or the divide by num_cpus takes precedence over the subtraction. Signed-off-by: Dan Carpenter Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index b1c9857f8402..ff8de1b71e4e 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -28,7 +28,7 @@ #include #include -#define MAX_SEQ_NR INT_MAX - NR_CPUS +#define MAX_SEQ_NR (INT_MAX - NR_CPUS) #define MAX_OBJ_NUM 1000 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) -- cgit v1.2.3 From 732bee7af3102cad811fb047dee8d15966efe569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 11 Jun 2010 12:16:59 +0200 Subject: fix typos concerning "hierarchy" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9a50c5f6e727..1a109788592f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -105,7 +105,7 @@ struct cpuset { /* for custom sched domain */ int relax_domain_level; - /* used for walking a cpuset heirarchy */ + /* used for walking a cpuset hierarchy */ struct list_head stack_list; }; -- cgit v1.2.3 From 5c1469de7545a35a16ff2b902e217044a7d2f8a5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 13 Jun 2010 03:28:03 +0000 Subject: user_ns: Introduce user_nsmap_uid and user_ns_map_gid. Define what happens when a we view a uid from one user_namespace in another user_namepece. - If the user namespaces are the same no mapping is necessary. - For most cases of difference use overflowuid and overflowgid, the uid and gid currently used for 16bit apis when we have a 32bit uid that does fit in 16bits. Effectively the situation is the same, we want to return a uid or gid that is not assigned to any user. - For the case when we happen to be mapping the uid or gid of the creator of the target user namespace use uid 0 and gid as confusing that user with root is not a problem. Signed-off-by: Eric W. Biederman Acked-by: Serge E. Hallyn Signed-off-by: David S. Miller --- kernel/user_namespace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'kernel') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index b2d70d38dff4..25915832291a 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include /* @@ -82,3 +83,46 @@ void free_user_ns(struct kref *kref) schedule_work(&ns->destroyer); } EXPORT_SYMBOL(free_user_ns); + +uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid) +{ + struct user_namespace *tmp; + + if (likely(to == cred->user->user_ns)) + return uid; + + + /* Is cred->user the creator of the target user_ns + * or the creator of one of it's parents? + */ + for ( tmp = to; tmp != &init_user_ns; + tmp = tmp->creator->user_ns ) { + if (cred->user == tmp->creator) { + return (uid_t)0; + } + } + + /* No useful relationship so no mapping */ + return overflowuid; +} + +gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid) +{ + struct user_namespace *tmp; + + if (likely(to == cred->user->user_ns)) + return gid; + + /* Is cred->user the creator of the target user_ns + * or the creator of one of it's parents? + */ + for ( tmp = to; tmp != &init_user_ns; + tmp = tmp->creator->user_ns ) { + if (cred->user == tmp->creator) { + return (gid_t)0; + } + } + + /* No useful relationship so no mapping */ + return overflowgid; +} -- cgit v1.2.3 From f7136c5150c29846d7a1d09109449d96b2f63445 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Tue, 15 Jun 2010 11:34:34 +0530 Subject: hw_breakpoints: Allow arch-specific cleanup before breakpoint unregistration Certain architectures (such as PowerPC) have a need to clean up data structures before a breakpoint is unregistered. This introduces an arch-specific hook in release_bp_slot() along with a weak definition in the form of a stub function. Signed-off-by: K.Prasad Acked-by: Frederic Weisbecker Signed-off-by: Paul Mackerras --- kernel/hw_breakpoint.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 7a56b22e0602..71ed3ce29e12 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -241,6 +241,17 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; } +/* + * Function to perform processor-specific cleanup during unregistration + */ +__weak void arch_unregister_hw_breakpoint(struct perf_event *bp) +{ + /* + * A weak stub function here for those archs that don't define + * it inside arch/.../kernel/hw_breakpoint.c + */ +} + /* * Contraints to check before allowing this new breakpoint counter: * @@ -339,6 +350,7 @@ void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); + arch_unregister_hw_breakpoint(bp); __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); -- cgit v1.2.3 From 698f93159a735bd29a8767c9f60d9b2d75870f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 2 Jul 2010 20:41:51 +0200 Subject: fix comment/printk typos concerning "already" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- kernel/time/tick-broadcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b3bafd5fc66d..48b2761b5668 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -188,7 +188,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) /* * Setup the next period for devices, which do not have * periodic mode. We read dev->next_event first and add to it - * when the event alrady expired. clockevents_program_event() + * when the event already expired. clockevents_program_event() * sets dev->next_event only when the event is really * programmed to the device. */ -- cgit v1.2.3 From 4c879170296174bde05cd1c643dac16594edee77 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Jul 2010 15:30:10 +0200 Subject: padata: Check for valid padata instance on start This patch introduces the PADATA_INVALID flag which is checked on padata start. This will be used to mark a padata instance as invalid, if the padata cpumask does not intersect with the active cpumask. we change padata_start to return an error if the PADATA_INVALID is set. Also we adapt the only padata user, pcrypt to this change. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index ff8de1b71e4e..e7d723a3e31d 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -485,6 +485,11 @@ static void padata_flush_queues(struct parallel_data *pd) BUG_ON(atomic_read(&pd->refcnt) != 0); } +static void __padata_start(struct padata_instance *pinst) +{ + pinst->flags |= PADATA_INIT; +} + /* Replace the internal control stucture with a new one. */ static void padata_replace(struct padata_instance *pinst, struct parallel_data *pd_new) @@ -619,11 +624,20 @@ EXPORT_SYMBOL(padata_remove_cpu); * * @pinst: padata instance to start */ -void padata_start(struct padata_instance *pinst) +int padata_start(struct padata_instance *pinst) { + int err = 0; + mutex_lock(&pinst->lock); - pinst->flags |= PADATA_INIT; + + if (pinst->flags & PADATA_INVALID) + err =-EINVAL; + + __padata_start(pinst); + mutex_unlock(&pinst->lock); + + return err; } EXPORT_SYMBOL(padata_start); -- cgit v1.2.3 From ee836555120140f770005b8ce6673c913d1b9a98 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Jul 2010 15:30:47 +0200 Subject: padata: Block until the instance is unused on stop This patch makes padata_stop to block until the padata instance is unused. Also we split padata_stop to a locked and a unlocked version. This is in preparation to be able to change the cpumask after a call to patata stop. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index e7d723a3e31d..9e18dfa372a9 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -490,6 +490,20 @@ static void __padata_start(struct padata_instance *pinst) pinst->flags |= PADATA_INIT; } +static void __padata_stop(struct padata_instance *pinst) +{ + if (!(pinst->flags & PADATA_INIT)) + return; + + pinst->flags &= ~PADATA_INIT; + + synchronize_rcu(); + + get_online_cpus(); + padata_flush_queues(pinst->pd); + put_online_cpus(); +} + /* Replace the internal control stucture with a new one. */ static void padata_replace(struct padata_instance *pinst, struct parallel_data *pd_new) @@ -649,7 +663,7 @@ EXPORT_SYMBOL(padata_start); void padata_stop(struct padata_instance *pinst) { mutex_lock(&pinst->lock); - pinst->flags &= ~PADATA_INIT; + __padata_stop(pinst); mutex_unlock(&pinst->lock); } EXPORT_SYMBOL(padata_stop); @@ -770,17 +784,11 @@ EXPORT_SYMBOL(padata_alloc); */ void padata_free(struct padata_instance *pinst) { - padata_stop(pinst); - - synchronize_rcu(); - #ifdef CONFIG_HOTPLUG_CPU unregister_hotcpu_notifier(&pinst->cpu_notifier); #endif - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); + padata_stop(pinst); padata_free_pd(pinst->pd); free_cpumask_var(pinst->cpumask); kfree(pinst); -- cgit v1.2.3 From 33e54450683c5e970ac007489d7921ba792d093c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Jul 2010 15:31:26 +0200 Subject: padata: Handle empty padata cpumasks This patch fixes a bug when the padata cpumask does not intersect with the active cpumask. In this case we get a division by zero in padata_alloc_pd and we end up with a useless padata instance. Padata can end up with an empty cpumask for two reasons: 1. A user removed the last cpu that belongs to the padata cpumask and the active cpumask. 2. The last cpu that belongs to the padata cpumask and the active cpumask goes offline. We introduce a function padata_validate_cpumask to check if the padata cpumask does intersect with the active cpumask. If the cpumasks do not intersect we mark the instance as invalid, so it can't be used. We do not allocate the cpumask dependend recources in this case. This fixes the division by zero and keeps the padate instance in a consistent state. It's not possible to trigger this bug by now because the only padata user, pcrypt uses always the possible cpumask. Reported-by: Dan Kruchinin Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index 9e18dfa372a9..57ec4eb5f2e3 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -516,12 +516,27 @@ static void padata_replace(struct padata_instance *pinst, synchronize_rcu(); - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + if (pd_old) { + padata_flush_queues(pd_old); + padata_free_pd(pd_old); + } pinst->flags &= ~PADATA_RESET; } +/* If cpumask contains no active cpu, we mark the instance as invalid. */ +static bool padata_validate_cpumask(struct padata_instance *pinst, + const struct cpumask *cpumask) +{ + if (!cpumask_intersects(cpumask, cpu_active_mask)) { + pinst->flags |= PADATA_INVALID; + return false; + } + + pinst->flags &= ~PADATA_INVALID; + return true; +} + /** * padata_set_cpumask - set the cpumask that padata should use * @@ -531,11 +546,18 @@ static void padata_replace(struct padata_instance *pinst, int padata_set_cpumask(struct padata_instance *pinst, cpumask_var_t cpumask) { - struct parallel_data *pd; + int valid; int err = 0; + struct parallel_data *pd = NULL; mutex_lock(&pinst->lock); + valid = padata_validate_cpumask(pinst, cpumask); + if (!valid) { + __padata_stop(pinst); + goto out_replace; + } + get_online_cpus(); pd = padata_alloc_pd(pinst, cpumask); @@ -544,10 +566,14 @@ int padata_set_cpumask(struct padata_instance *pinst, goto out; } +out_replace: cpumask_copy(pinst->cpumask, cpumask); padata_replace(pinst, pd); + if (valid) + __padata_start(pinst); + out: put_online_cpus(); @@ -567,6 +593,9 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) return -ENOMEM; padata_replace(pinst, pd); + + if (padata_validate_cpumask(pinst, pinst->cpumask)) + __padata_start(pinst); } return 0; @@ -597,9 +626,16 @@ EXPORT_SYMBOL(padata_add_cpu); static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) { - struct parallel_data *pd; + struct parallel_data *pd = NULL; if (cpumask_test_cpu(cpu, cpu_online_mask)) { + + if (!padata_validate_cpumask(pinst, pinst->cpumask)) { + __padata_stop(pinst); + padata_replace(pinst, pd); + goto out; + } + pd = padata_alloc_pd(pinst, pinst->cpumask); if (!pd) return -ENOMEM; @@ -607,6 +643,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) padata_replace(pinst, pd); } +out: return 0; } @@ -732,7 +769,7 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, struct workqueue_struct *wq) { struct padata_instance *pinst; - struct parallel_data *pd; + struct parallel_data *pd = NULL; pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); if (!pinst) @@ -740,12 +777,14 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, get_online_cpus(); - pd = padata_alloc_pd(pinst, cpumask); - if (!pd) + if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL)) goto err_free_inst; - if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL)) - goto err_free_pd; + if (padata_validate_cpumask(pinst, cpumask)) { + pd = padata_alloc_pd(pinst, cpumask); + if (!pd) + goto err_free_mask; + } rcu_assign_pointer(pinst->pd, pd); @@ -767,8 +806,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, return pinst; -err_free_pd: - padata_free_pd(pd); +err_free_mask: + free_cpumask_var(pinst->cpumask); err_free_inst: kfree(pinst); put_online_cpus(); -- cgit v1.2.3 From 83f619f3c8abb82cac9158cf23c656ec5c184607 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Jul 2010 15:32:02 +0200 Subject: padata: make padata_do_parallel to return zero on success To return -EINPROGRESS on success in padata_do_parallel was considered to be odd. This patch changes this to return zero on success. Also the only user of padata, pcrypt is adapted to convert a return of zero to -EINPROGRESS within the crypto layer. This also removes the pcrypt fallback if padata_do_parallel was called on a not running padata instance as we can't handle it anymore. This fallback was unused, so it's save to remove it. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index 57ec4eb5f2e3..ae8defcf0622 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -111,10 +111,13 @@ int padata_do_parallel(struct padata_instance *pinst, pd = rcu_dereference(pinst->pd); - err = 0; + err = -EINVAL; if (!(pinst->flags & PADATA_INIT)) goto out; + if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) + goto out; + err = -EBUSY; if ((pinst->flags & PADATA_RESET)) goto out; @@ -122,11 +125,7 @@ int padata_do_parallel(struct padata_instance *pinst, if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) goto out; - err = -EINVAL; - if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) - goto out; - - err = -EINPROGRESS; + err = 0; atomic_inc(&pd->refcnt); padata->pd = pd; padata->cb_cpu = cb_cpu; -- cgit v1.2.3 From 5f1a8c1bc724498ff32acbd59ed5263275676b9d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 7 Jul 2010 15:32:39 +0200 Subject: padata: simplify serialization mechanism We count the number of processed objects on a percpu basis, so we need to go through all the percpu reorder queues to calculate the sequence number of the next object that needs serialization. This patch changes this to count the number of processed objects global. So we can calculate the sequence number and the percpu reorder queue of the next object that needs serialization without searching through the percpu reorder queues. This avoids some accesses to memory of foreign cpus. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 71 +++++++++++++++------------------------------------------ 1 file changed, 19 insertions(+), 52 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index ae8defcf0622..450d67d394b0 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -170,79 +170,47 @@ EXPORT_SYMBOL(padata_do_parallel); */ static struct padata_priv *padata_get_next(struct parallel_data *pd) { - int cpu, num_cpus, empty, calc_seq_nr; - int seq_nr, next_nr, overrun, next_overrun; + int cpu, num_cpus; + int next_nr, next_index; struct padata_queue *queue, *next_queue; struct padata_priv *padata; struct padata_list *reorder; - empty = 0; - next_nr = -1; - next_overrun = 0; - next_queue = NULL; - num_cpus = cpumask_weight(pd->cpumask); - for_each_cpu(cpu, pd->cpumask) { - queue = per_cpu_ptr(pd->queue, cpu); - reorder = &queue->reorder; - - /* - * Calculate the seq_nr of the object that should be - * next in this reorder queue. - */ - overrun = 0; - calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) - + queue->cpu_index; - - if (unlikely(calc_seq_nr > pd->max_seq_nr)) { - calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; - overrun = 1; - } - - if (!list_empty(&reorder->list)) { - padata = list_entry(reorder->list.next, - struct padata_priv, list); - - seq_nr = padata->seq_nr; - BUG_ON(calc_seq_nr != seq_nr); - } else { - seq_nr = calc_seq_nr; - empty++; - } - - if (next_nr < 0 || seq_nr < next_nr - || (next_overrun && !overrun)) { - next_nr = seq_nr; - next_overrun = overrun; - next_queue = queue; - } + /* + * Calculate the percpu reorder queue and the sequence + * number of the next object. + */ + next_nr = pd->processed; + next_index = next_nr % num_cpus; + cpu = padata_index_to_cpu(pd, next_index); + next_queue = per_cpu_ptr(pd->queue, cpu); + + if (unlikely(next_nr > pd->max_seq_nr)) { + next_nr = next_nr - pd->max_seq_nr - 1; + next_index = next_nr % num_cpus; + cpu = padata_index_to_cpu(pd, next_index); + next_queue = per_cpu_ptr(pd->queue, cpu); + pd->processed = 0; } padata = NULL; - if (empty == num_cpus) - goto out; - reorder = &next_queue->reorder; if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); - if (unlikely(next_overrun)) { - for_each_cpu(cpu, pd->cpumask) { - queue = per_cpu_ptr(pd->queue, cpu); - atomic_set(&queue->num_obj, 0); - } - } + BUG_ON(next_nr != padata->seq_nr); spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); spin_unlock(&reorder->lock); - atomic_inc(&next_queue->num_obj); + pd->processed++; goto out; } @@ -430,7 +398,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, INIT_WORK(&queue->pwork, padata_parallel_worker); INIT_WORK(&queue->swork, padata_serial_worker); - atomic_set(&queue->num_obj, 0); } num_cpus = cpumask_weight(pd->cpumask); -- cgit v1.2.3 From 90133673395849c9d4e66a563f2d0d91d92aa461 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 7 Jun 2010 22:23:12 +0200 Subject: PM / Hibernate: Fix typos in comments in kernel/power/swap.c There are a few typos in kernel/power/swap.c. Fix them. Signed-off-by: Cesar Eduardo Barros Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b0bb21778391..7c3ae83e41d7 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -32,7 +32,7 @@ /* * The swap map is a data structure used for keeping track of each page * written to a swap partition. It consists of many swap_map_page - * structures that contain each an array of MAP_PAGE_SIZE swap entries. + * structures that contain each an array of MAP_PAGE_ENTRIES swap entries. * These structures are stored on the swap and linked together with the * help of the .next_swap member. * @@ -148,7 +148,7 @@ sector_t alloc_swapdev_block(int swap) /** * free_all_swap_pages - free swap pages allocated for saving image data. - * It also frees the extents used to register which swap entres had been + * It also frees the extents used to register which swap entries had been * allocated. */ -- cgit v1.2.3 From c125e96f044427f38d106fab7bc5e4a5e6a18262 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jul 2010 22:43:53 +0200 Subject: PM: Make it possible to avoid races between wakeup and system sleep One of the arguments during the suspend blockers discussion was that the mainline kernel didn't contain any mechanisms making it possible to avoid races between wakeup and system suspend. Generally, there are two problems in that area. First, if a wakeup event occurs exactly when /sys/power/state is being written to, it may be delivered to user space right before the freezer kicks in, so the user space consumer of the event may not be able to process it before the system is suspended. Second, if a wakeup event occurs after user space has been frozen, it is not generally guaranteed that the ongoing transition of the system into a sleep state will be aborted. To address these issues introduce a new global sysfs attribute, /sys/power/wakeup_count, associated with a running counter of wakeup events and three helper functions, pm_stay_awake(), pm_relax(), and pm_wakeup_event(), that may be used by kernel subsystems to control the behavior of this attribute and to request the PM core to abort system transitions into a sleep state already in progress. The /sys/power/wakeup_count file may be read from or written to by user space. Reads will always succeed (unless interrupted by a signal) and return the current value of the wakeup events counter. Writes, however, will only succeed if the written number is equal to the current value of the wakeup events counter. If a write is successful, it will cause the kernel to save the current value of the wakeup events counter and to abort the subsequent system transition into a sleep state if any wakeup events are reported after the write has returned. [The assumption is that before writing to /sys/power/state user space will first read from /sys/power/wakeup_count. Next, user space consumers of wakeup events will have a chance to acknowledge or veto the upcoming system transition to a sleep state. Finally, if the transition is allowed to proceed, /sys/power/wakeup_count will be written to and if that succeeds, /sys/power/state will be written to as well. Still, if any wakeup events are reported to the PM core by kernel subsystems after that point, the transition will be aborted.] Additionally, put a wakeup events counter into struct dev_pm_info and make these per-device wakeup event counters available via sysfs, so that it's possible to check the activity of various wakeup event sources within the kernel. To illustrate how subsystems can use pm_wakeup_event(), make the low-level PCI runtime PM wakeup-handling code use it. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes Acked-by: Greg Kroah-Hartman Acked-by: markgross Reviewed-by: Alan Stern --- kernel/power/hibernate.c | 20 ++++++++++++------ kernel/power/main.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/power/suspend.c | 4 +++- 3 files changed, 72 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index aa9e916da4d5..f61202916631 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -277,7 +277,7 @@ static int create_image(int platform_mode) goto Enable_irqs; } - if (hibernation_test(TEST_CORE)) + if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) goto Power_up; in_suspend = 1; @@ -288,8 +288,10 @@ static int create_image(int platform_mode) error); /* Restore control flow magically appears here */ restore_processor_state(); - if (!in_suspend) + if (!in_suspend) { + events_check_enabled = false; platform_leave(platform_mode); + } Power_up: sysdev_resume(); @@ -511,14 +513,20 @@ int hibernation_platform_enter(void) local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); + if (!pm_check_wakeup_events()) { + error = -EAGAIN; + goto Power_up; + } + hibernation_ops->enter(); /* We should never get here */ while (1); - /* - * We don't need to reenable the nonboot CPUs or resume consoles, since - * the system is going to be halted anyway. - */ + Power_up: + sysdev_resume(); + local_irq_enable(); + enable_nonboot_cpus(); + Platform_finish: hibernation_ops->finish(); diff --git a/kernel/power/main.c b/kernel/power/main.c index b58800b21fc0..62b0bc6e4983 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(state); +#ifdef CONFIG_PM_SLEEP +/* + * The 'wakeup_count' attribute, along with the functions defined in + * drivers/base/power/wakeup.c, provides a means by which wakeup events can be + * handled in a non-racy way. + * + * If a wakeup event occurs when the system is in a sleep state, it simply is + * woken up. In turn, if an event that would wake the system up from a sleep + * state occurs when it is undergoing a transition to that sleep state, the + * transition should be aborted. Moreover, if such an event occurs when the + * system is in the working state, an attempt to start a transition to the + * given sleep state should fail during certain period after the detection of + * the event. Using the 'state' attribute alone is not sufficient to satisfy + * these requirements, because a wakeup event may occur exactly when 'state' + * is being written to and may be delivered to user space right before it is + * frozen, so the event will remain only partially processed until the system is + * woken up by another event. In particular, it won't cause the transition to + * a sleep state to be aborted. + * + * This difficulty may be overcome if user space uses 'wakeup_count' before + * writing to 'state'. It first should read from 'wakeup_count' and store + * the read value. Then, after carrying out its own preparations for the system + * transition to a sleep state, it should write the stored value to + * 'wakeup_count'. If that fails, at least one wakeup event has occured since + * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it + * is allowed to write to 'state', but the transition will be aborted if there + * are any wakeup events detected after 'wakeup_count' was written to. + */ + +static ssize_t wakeup_count_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned long val; + + return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR; +} + +static ssize_t wakeup_count_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (sscanf(buf, "%lu", &val) == 1) { + if (pm_save_wakeup_count(val)) + return n; + } + return -EINVAL; +} + +power_attr(wakeup_count); +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_PM_TRACE int pm_trace_enabled; @@ -236,6 +290,7 @@ static struct attribute * g[] = { #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, + &wakeup_count_attr.attr, #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f37cb7dd4402..5f8d09f94325 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state) error = sysdev_suspend(PMSG_SUSPEND); if (!error) { - if (!suspend_test(TEST_CORE)) + if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { error = suspend_ops->enter(state); + events_check_enabled = false; + } sysdev_resume(); } -- cgit v1.2.3 From 5f279845f9d684661563894d44729a0c706375b4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 19 Jul 2010 02:00:18 +0200 Subject: pm_qos: Reimplement using plists A lot of the pm_qos extremal value handling is really duplicating what a priority ordered list does, just in a less efficient fashion. Simply redoing the implementation in terms of a plist gets rid of a lot of this junk (although there are several other strange things that could do with tidying up, like pm_qos_request_list has to carry the pm_qos_class with every node, simply because it doesn't get passed in to pm_qos_update_request even though every caller knows full well what parameter it's updating). I think this redo is a win independent of android, so we should do something like this now. Signed-off-by: James Bottomley Signed-off-by: mark gross Signed-off-by: Rafael J. Wysocki --- kernel/pm_qos_params.c | 172 ++++++++++++++++++++++++------------------------- 1 file changed, 86 insertions(+), 86 deletions(-) (limited to 'kernel') diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index f42d3f737a33..db8e51d7f392 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -30,6 +30,7 @@ /*#define DEBUG*/ #include +#include #include #include #include @@ -49,58 +50,53 @@ * held, taken with _irqsave. One lock to rule them all */ struct pm_qos_request_list { - struct list_head list; - union { - s32 value; - s32 usec; - s32 kbps; - }; + struct plist_node list; int pm_qos_class; }; -static s32 max_compare(s32 v1, s32 v2); -static s32 min_compare(s32 v1, s32 v2); +enum pm_qos_type { + PM_QOS_MAX, /* return the largest value */ + PM_QOS_MIN /* return the smallest value */ +}; struct pm_qos_object { - struct pm_qos_request_list requests; + struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; s32 default_value; - atomic_t target_value; - s32 (*comparitor)(s32, s32); + enum pm_qos_type type; }; +static DEFINE_SPINLOCK(pm_qos_lock); + static struct pm_qos_object null_pm_qos; static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); static struct pm_qos_object cpu_dma_pm_qos = { - .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), - .comparitor = min_compare + .type = PM_QOS_MIN, }; static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); static struct pm_qos_object network_lat_pm_qos = { - .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), - .comparitor = min_compare + .type = PM_QOS_MIN }; static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); static struct pm_qos_object network_throughput_pm_qos = { - .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)}, + .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", .default_value = 0, - .target_value = ATOMIC_INIT(0), - .comparitor = max_compare + .type = PM_QOS_MAX, }; @@ -111,8 +107,6 @@ static struct pm_qos_object *pm_qos_array[] = { &network_throughput_pm_qos }; -static DEFINE_SPINLOCK(pm_qos_lock); - static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos); static int pm_qos_power_open(struct inode *inode, struct file *filp); @@ -124,46 +118,55 @@ static const struct file_operations pm_qos_power_fops = { .release = pm_qos_power_release, }; -/* static helper functions */ -static s32 max_compare(s32 v1, s32 v2) +/* unlocked internal variant */ +static inline int pm_qos_get_value(struct pm_qos_object *o) { - return max(v1, v2); -} + if (plist_head_empty(&o->requests)) + return o->default_value; -static s32 min_compare(s32 v1, s32 v2) -{ - return min(v1, v2); -} + switch (o->type) { + case PM_QOS_MIN: + return plist_last(&o->requests)->prio; + case PM_QOS_MAX: + return plist_first(&o->requests)->prio; -static void update_target(int pm_qos_class) + default: + /* runtime check for not using enum */ + BUG(); + } +} + +static void update_target(struct pm_qos_object *o, struct plist_node *node, + int del, int value) { - s32 extreme_value; - struct pm_qos_request_list *node; unsigned long flags; - int call_notifier = 0; + int prev_value, curr_value; spin_lock_irqsave(&pm_qos_lock, flags); - extreme_value = pm_qos_array[pm_qos_class]->default_value; - list_for_each_entry(node, - &pm_qos_array[pm_qos_class]->requests.list, list) { - extreme_value = pm_qos_array[pm_qos_class]->comparitor( - extreme_value, node->value); - } - if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) != - extreme_value) { - call_notifier = 1; - atomic_set(&pm_qos_array[pm_qos_class]->target_value, - extreme_value); - pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class, - atomic_read(&pm_qos_array[pm_qos_class]->target_value)); + prev_value = pm_qos_get_value(o); + /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ + if (value != PM_QOS_DEFAULT_VALUE) { + /* + * to change the list, we atomically remove, reinit + * with new value and add, then see if the extremal + * changed + */ + plist_del(node, &o->requests); + plist_node_init(node, value); + plist_add(node, &o->requests); + } else if (del) { + plist_del(node, &o->requests); + } else { + plist_add(node, &o->requests); } + curr_value = pm_qos_get_value(o); spin_unlock_irqrestore(&pm_qos_lock, flags); - if (call_notifier) - blocking_notifier_call_chain( - pm_qos_array[pm_qos_class]->notifiers, - (unsigned long) extreme_value, NULL); + if (prev_value != curr_value) + blocking_notifier_call_chain(o->notifiers, + (unsigned long)curr_value, + NULL); } static int register_pm_qos_misc(struct pm_qos_object *qos) @@ -196,7 +199,14 @@ static int find_pm_qos_object_by_minor(int minor) */ int pm_qos_request(int pm_qos_class) { - return atomic_read(&pm_qos_array[pm_qos_class]->target_value); + unsigned long flags; + int value; + + spin_lock_irqsave(&pm_qos_lock, flags); + value = pm_qos_get_value(pm_qos_array[pm_qos_class]); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return value; } EXPORT_SYMBOL_GPL(pm_qos_request); @@ -214,21 +224,19 @@ EXPORT_SYMBOL_GPL(pm_qos_request); struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) { struct pm_qos_request_list *dep; - unsigned long flags; dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); if (dep) { + struct pm_qos_object *o = pm_qos_array[pm_qos_class]; + int new_value; + if (value == PM_QOS_DEFAULT_VALUE) - dep->value = pm_qos_array[pm_qos_class]->default_value; + new_value = o->default_value; else - dep->value = value; + new_value = value; + plist_node_init(&dep->list, new_value); dep->pm_qos_class = pm_qos_class; - - spin_lock_irqsave(&pm_qos_lock, flags); - list_add(&dep->list, - &pm_qos_array[pm_qos_class]->requests.list); - spin_unlock_irqrestore(&pm_qos_lock, flags); - update_target(pm_qos_class); + update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); } return dep; @@ -246,27 +254,23 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); * Attempts are made to make this code callable on hot code paths. */ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, - s32 new_value) + s32 new_value) { - unsigned long flags; - int pending_update = 0; s32 temp; + struct pm_qos_object *o; - if (pm_qos_req) { /*guard against callers passing in null */ - spin_lock_irqsave(&pm_qos_lock, flags); - if (new_value == PM_QOS_DEFAULT_VALUE) - temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value; - else - temp = new_value; - - if (temp != pm_qos_req->value) { - pending_update = 1; - pm_qos_req->value = temp; - } - spin_unlock_irqrestore(&pm_qos_lock, flags); - if (pending_update) - update_target(pm_qos_req->pm_qos_class); - } + if (!pm_qos_req) /*guard against callers passing in null */ + return; + + o = pm_qos_array[pm_qos_req->pm_qos_class]; + + if (new_value == PM_QOS_DEFAULT_VALUE) + temp = o->default_value; + else + temp = new_value; + + if (temp != pm_qos_req->list.prio) + update_target(o, &pm_qos_req->list, 0, temp); } EXPORT_SYMBOL_GPL(pm_qos_update_request); @@ -280,19 +284,15 @@ EXPORT_SYMBOL_GPL(pm_qos_update_request); */ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) { - unsigned long flags; - int qos_class; + struct pm_qos_object *o; if (pm_qos_req == NULL) return; /* silent return to keep pcm code cleaner */ - qos_class = pm_qos_req->pm_qos_class; - spin_lock_irqsave(&pm_qos_lock, flags); - list_del(&pm_qos_req->list); + o = pm_qos_array[pm_qos_req->pm_qos_class]; + update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); kfree(pm_qos_req); - spin_unlock_irqrestore(&pm_qos_lock, flags); - update_target(qos_class); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); -- cgit v1.2.3 From 82f682514a5df89ffb3890627eebf0897b7a84ec Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 5 Jul 2010 22:53:06 +0200 Subject: pm_qos: Get rid of the allocation in pm_qos_add_request() All current users of pm_qos_add_request() have the ability to supply the memory required by the pm_qos routines, so make them do this and eliminate the kmalloc() with pm_qos_add_request(). This has the double benefit of making the call never fail and allowing it to be called from atomic context. Signed-off-by: James Bottomley Signed-off-by: mark gross Signed-off-by: Rafael J. Wysocki --- kernel/pm_qos_params.c | 67 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index db8e51d7f392..996a4dec5f96 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -30,7 +30,6 @@ /*#define DEBUG*/ #include -#include #include #include #include @@ -49,11 +48,6 @@ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ -struct pm_qos_request_list { - struct plist_node list; - int pm_qos_class; -}; - enum pm_qos_type { PM_QOS_MAX, /* return the largest value */ PM_QOS_MIN /* return the smallest value */ @@ -210,6 +204,12 @@ int pm_qos_request(int pm_qos_class) } EXPORT_SYMBOL_GPL(pm_qos_request); +int pm_qos_request_active(struct pm_qos_request_list *req) +{ + return req->pm_qos_class != 0; +} +EXPORT_SYMBOL_GPL(pm_qos_request_active); + /** * pm_qos_add_request - inserts new qos request into the list * @pm_qos_class: identifies which list of qos request to us @@ -221,25 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request); * element as a handle for use in updating and removal. Call needs to save * this handle for later use. */ -struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) +void pm_qos_add_request(struct pm_qos_request_list *dep, + int pm_qos_class, s32 value) { - struct pm_qos_request_list *dep; - - dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); - if (dep) { - struct pm_qos_object *o = pm_qos_array[pm_qos_class]; - int new_value; - - if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->default_value; - else - new_value = value; - plist_node_init(&dep->list, new_value); - dep->pm_qos_class = pm_qos_class; - update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); - } + struct pm_qos_object *o = pm_qos_array[pm_qos_class]; + int new_value; - return dep; + if (pm_qos_request_active(dep)) { + WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); + return; + } + if (value == PM_QOS_DEFAULT_VALUE) + new_value = o->default_value; + else + new_value = value; + plist_node_init(&dep->list, new_value); + dep->pm_qos_class = pm_qos_class; + update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); } EXPORT_SYMBOL_GPL(pm_qos_add_request); @@ -262,6 +260,11 @@ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, if (!pm_qos_req) /*guard against callers passing in null */ return; + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); + return; + } + o = pm_qos_array[pm_qos_req->pm_qos_class]; if (new_value == PM_QOS_DEFAULT_VALUE) @@ -290,9 +293,14 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) return; /* silent return to keep pcm code cleaner */ + if (!pm_qos_request_active(pm_qos_req)) { + WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); + return; + } + o = pm_qos_array[pm_qos_req->pm_qos_class]; update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); - kfree(pm_qos_req); + memset(pm_qos_req, 0, sizeof(*pm_qos_req)); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); @@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); if (pm_qos_class >= 0) { - filp->private_data = (void *) pm_qos_add_request(pm_qos_class, - PM_QOS_DEFAULT_VALUE); + struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req)); + if (!req) + return -ENOMEM; + + pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); + filp->private_data = req; if (filp->private_data) return 0; @@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp) { struct pm_qos_request_list *req; - req = (struct pm_qos_request_list *)filp->private_data; + req = filp->private_data; pm_qos_remove_request(req); + kfree(req); return 0; } -- cgit v1.2.3 From f6f71f187518477cecc01cd887933b5da19585e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:18 +0200 Subject: PM / Hibernate: Fix hibernation_platform_enter() The hibernation_platform_enter() function calls dpm_suspend_noirq() instead of dpm_resume_noirq() by mistake. Fix this. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f61202916631..d97ba8615c30 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -530,7 +530,7 @@ int hibernation_platform_enter(void) Platform_finish: hibernation_ops->finish(); - dpm_suspend_noirq(PMSG_RESTORE); + dpm_resume_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; -- cgit v1.2.3 From d074ee023fa3a4681b64223c5e636102c39628c4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:35 +0200 Subject: PM / Hibernate: Fix snapshot error code path There is an inconsistency between hibernation_platform_enter() and hibernation_snapshot(), because the latter calls hibernation_ops->end() after failing hibernation_ops->begin(), while the former doesn't do that. Make hibernation_snapshot() behave in the same way as hibernation_platform_enter() in that respect. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index d97ba8615c30..d26f04e92743 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -330,7 +330,7 @@ int hibernation_snapshot(int platform_mode) error = platform_begin(platform_mode); if (error) - return error; + goto Close; /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); -- cgit v1.2.3 From ce4410116c5debfb0e049f5db4b5cd6211e05b80 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:45 +0200 Subject: PM / Suspend: Fix ordering of calls in suspend error paths The ACPI suspend code calls suspend_nvs_free() at a wrong place, which may lead to a memory leak if there's an error executing acpi_pm_prepare(), because acpi_pm_finish() will not be called in that case. However, the root cause of this problem is the apparently confusing ordering of calls in suspend error paths that needs to be fixed. In addition to that, fix a typo in a label name in suspend.c. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/suspend.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5f8d09f94325..7335952ee473 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - return error; + goto Platform_finish; } error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Platfrom_finish; + goto Platform_finish; } if (suspend_ops->prepare_late) { error = suspend_ops->prepare_late(); if (error) - goto Power_up_devices; + goto Platform_wake; } if (suspend_test(TEST_PLATFORM)) @@ -180,10 +180,9 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->wake) suspend_ops->wake(); - Power_up_devices: dpm_resume_noirq(PMSG_RESUME); - Platfrom_finish: + Platform_finish: if (suspend_ops->finish) suspend_ops->finish(); -- cgit v1.2.3 From e15bacbebb9dcc95f148f28dfc83a6d5e48b60b8 Mon Sep 17 00:00:00 2001 From: Dan Kruchinin Date: Wed, 14 Jul 2010 14:31:57 +0400 Subject: padata: Make two separate cpumasks The aim of this patch is to make two separate cpumasks for padata parallel and serial workers respectively. It allows user to make more thin and sophisticated configurations of padata framework. For example user may bind parallel and serial workers to non-intersecting CPU groups to gain better performance. Also each padata instance has notifiers chain for its cpumasks now. If either parallel or serial or both masks were changed all interested subsystems will get notification about that. It's especially useful if padata user uses algorithm for callback CPU selection according to serial cpumask. Signed-off-by: Dan Kruchinin Signed-off-by: Herbert Xu --- kernel/padata.c | 471 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 347 insertions(+), 124 deletions(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index 450d67d394b0..84d0ca9dac9c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -35,9 +35,9 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; - target_cpu = cpumask_first(pd->cpumask); + target_cpu = cpumask_first(pd->cpumask.pcpu); for (cpu = 0; cpu < cpu_index; cpu++) - target_cpu = cpumask_next(target_cpu, pd->cpumask); + target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu); return target_cpu; } @@ -53,26 +53,27 @@ static int padata_cpu_hash(struct padata_priv *padata) * Hash the sequence numbers to the cpus by taking * seq_nr mod. number of cpus in use. */ - cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask); + cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu); return padata_index_to_cpu(pd, cpu_index); } -static void padata_parallel_worker(struct work_struct *work) +static void padata_parallel_worker(struct work_struct *parallel_work) { - struct padata_queue *queue; + struct padata_parallel_queue *pqueue; struct parallel_data *pd; struct padata_instance *pinst; LIST_HEAD(local_list); local_bh_disable(); - queue = container_of(work, struct padata_queue, pwork); - pd = queue->pd; + pqueue = container_of(parallel_work, + struct padata_parallel_queue, work); + pd = pqueue->pd; pinst = pd->pinst; - spin_lock(&queue->parallel.lock); - list_replace_init(&queue->parallel.list, &local_list); - spin_unlock(&queue->parallel.lock); + spin_lock(&pqueue->parallel.lock); + list_replace_init(&pqueue->parallel.list, &local_list); + spin_unlock(&pqueue->parallel.lock); while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -94,7 +95,7 @@ static void padata_parallel_worker(struct work_struct *work) * @pinst: padata instance * @padata: object to be parallelized * @cb_cpu: cpu the serialization callback function will run on, - * must be in the cpumask of padata. + * must be in the serial cpumask of padata(i.e. cpumask.cbcpu). * * The parallelization callback function will run with BHs off. * Note: Every object which is parallelized by padata_do_parallel @@ -104,7 +105,7 @@ int padata_do_parallel(struct padata_instance *pinst, struct padata_priv *padata, int cb_cpu) { int target_cpu, err; - struct padata_queue *queue; + struct padata_parallel_queue *queue; struct parallel_data *pd; rcu_read_lock_bh(); @@ -115,7 +116,7 @@ int padata_do_parallel(struct padata_instance *pinst, if (!(pinst->flags & PADATA_INIT)) goto out; - if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) + if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu)) goto out; err = -EBUSY; @@ -136,13 +137,13 @@ int padata_do_parallel(struct padata_instance *pinst, padata->seq_nr = atomic_inc_return(&pd->seq_nr); target_cpu = padata_cpu_hash(padata); - queue = per_cpu_ptr(pd->queue, target_cpu); + queue = per_cpu_ptr(pd->pqueue, target_cpu); spin_lock(&queue->parallel.lock); list_add_tail(&padata->list, &queue->parallel.list); spin_unlock(&queue->parallel.lock); - queue_work_on(target_cpu, pinst->wq, &queue->pwork); + queue_work_on(target_cpu, pinst->wq, &queue->work); out: rcu_read_unlock_bh(); @@ -172,11 +173,11 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) { int cpu, num_cpus; int next_nr, next_index; - struct padata_queue *queue, *next_queue; + struct padata_parallel_queue *queue, *next_queue; struct padata_priv *padata; struct padata_list *reorder; - num_cpus = cpumask_weight(pd->cpumask); + num_cpus = cpumask_weight(pd->cpumask.pcpu); /* * Calculate the percpu reorder queue and the sequence @@ -185,13 +186,13 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) next_nr = pd->processed; next_index = next_nr % num_cpus; cpu = padata_index_to_cpu(pd, next_index); - next_queue = per_cpu_ptr(pd->queue, cpu); + next_queue = per_cpu_ptr(pd->pqueue, cpu); if (unlikely(next_nr > pd->max_seq_nr)) { next_nr = next_nr - pd->max_seq_nr - 1; next_index = next_nr % num_cpus; cpu = padata_index_to_cpu(pd, next_index); - next_queue = per_cpu_ptr(pd->queue, cpu); + next_queue = per_cpu_ptr(pd->pqueue, cpu); pd->processed = 0; } @@ -215,7 +216,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) goto out; } - queue = per_cpu_ptr(pd->queue, smp_processor_id()); + queue = per_cpu_ptr(pd->pqueue, smp_processor_id()); if (queue->cpu_index == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); goto out; @@ -229,7 +230,7 @@ out: static void padata_reorder(struct parallel_data *pd) { struct padata_priv *padata; - struct padata_queue *queue; + struct padata_serial_queue *squeue; struct padata_instance *pinst = pd->pinst; /* @@ -268,13 +269,13 @@ static void padata_reorder(struct parallel_data *pd) return; } - queue = per_cpu_ptr(pd->queue, padata->cb_cpu); + squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu); - spin_lock(&queue->serial.lock); - list_add_tail(&padata->list, &queue->serial.list); - spin_unlock(&queue->serial.lock); + spin_lock(&squeue->serial.lock); + list_add_tail(&padata->list, &squeue->serial.list); + spin_unlock(&squeue->serial.lock); - queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork); + queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work); } spin_unlock_bh(&pd->lock); @@ -300,19 +301,19 @@ static void padata_reorder_timer(unsigned long arg) padata_reorder(pd); } -static void padata_serial_worker(struct work_struct *work) +static void padata_serial_worker(struct work_struct *serial_work) { - struct padata_queue *queue; + struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); local_bh_disable(); - queue = container_of(work, struct padata_queue, swork); - pd = queue->pd; + squeue = container_of(serial_work, struct padata_serial_queue, work); + pd = squeue->pd; - spin_lock(&queue->serial.lock); - list_replace_init(&queue->serial.list, &local_list); - spin_unlock(&queue->serial.lock); + spin_lock(&squeue->serial.lock); + list_replace_init(&squeue->serial.list, &local_list); + spin_unlock(&squeue->serial.lock); while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -339,18 +340,18 @@ static void padata_serial_worker(struct work_struct *work) void padata_do_serial(struct padata_priv *padata) { int cpu; - struct padata_queue *queue; + struct padata_parallel_queue *pqueue; struct parallel_data *pd; pd = padata->pd; cpu = get_cpu(); - queue = per_cpu_ptr(pd->queue, cpu); + pqueue = per_cpu_ptr(pd->pqueue, cpu); - spin_lock(&queue->reorder.lock); + spin_lock(&pqueue->reorder.lock); atomic_inc(&pd->reorder_objects); - list_add_tail(&padata->list, &queue->reorder.list); - spin_unlock(&queue->reorder.lock); + list_add_tail(&padata->list, &pqueue->reorder.list); + spin_unlock(&pqueue->reorder.lock); put_cpu(); @@ -358,51 +359,88 @@ void padata_do_serial(struct padata_priv *padata) } EXPORT_SYMBOL(padata_do_serial); -/* Allocate and initialize the internal cpumask dependend resources. */ -static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, - const struct cpumask *cpumask) +static int padata_setup_cpumasks(struct parallel_data *pd, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) { - int cpu, cpu_index, num_cpus; - struct padata_queue *queue; - struct parallel_data *pd; + if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) + return -ENOMEM; - cpu_index = 0; + cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); + if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { + free_cpumask_var(pd->cpumask.cbcpu); + return -ENOMEM; + } - pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); - if (!pd) - goto err; + cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); + return 0; +} - pd->queue = alloc_percpu(struct padata_queue); - if (!pd->queue) - goto err_free_pd; +static void __padata_list_init(struct padata_list *pd_list) +{ + INIT_LIST_HEAD(&pd_list->list); + spin_lock_init(&pd_list->lock); +} - if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) - goto err_free_queue; +/* Initialize all percpu queues used by serial workers */ +static void padata_init_squeues(struct parallel_data *pd) +{ + int cpu; + struct padata_serial_queue *squeue; - cpumask_and(pd->cpumask, cpumask, cpu_active_mask); + for_each_cpu(cpu, pd->cpumask.cbcpu) { + squeue = per_cpu_ptr(pd->squeue, cpu); + squeue->pd = pd; + __padata_list_init(&squeue->serial); + INIT_WORK(&squeue->work, padata_serial_worker); + } +} - for_each_cpu(cpu, pd->cpumask) { - queue = per_cpu_ptr(pd->queue, cpu); +/* Initialize all percpu queues used by parallel workers */ +static void padata_init_pqueues(struct parallel_data *pd) +{ + int cpu_index, num_cpus, cpu; + struct padata_parallel_queue *pqueue; - queue->pd = pd; + cpu_index = 0; + for_each_cpu(cpu, pd->cpumask.pcpu) { + pqueue = per_cpu_ptr(pd->pqueue, cpu); + pqueue->pd = pd; + pqueue->cpu_index = cpu_index; + + __padata_list_init(&pqueue->reorder); + __padata_list_init(&pqueue->parallel); + INIT_WORK(&pqueue->work, padata_parallel_worker); + atomic_set(&pqueue->num_obj, 0); + } - queue->cpu_index = cpu_index; - cpu_index++; + num_cpus = cpumask_weight(pd->cpumask.pcpu); + pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; +} - INIT_LIST_HEAD(&queue->reorder.list); - INIT_LIST_HEAD(&queue->parallel.list); - INIT_LIST_HEAD(&queue->serial.list); - spin_lock_init(&queue->reorder.lock); - spin_lock_init(&queue->parallel.lock); - spin_lock_init(&queue->serial.lock); +/* Allocate and initialize the internal cpumask dependend resources. */ +static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) +{ + struct parallel_data *pd; - INIT_WORK(&queue->pwork, padata_parallel_worker); - INIT_WORK(&queue->swork, padata_serial_worker); - } + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); + if (!pd) + goto err; - num_cpus = cpumask_weight(pd->cpumask); - pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; + pd->pqueue = alloc_percpu(struct padata_parallel_queue); + if (!pd->pqueue) + goto err_free_pd; + + pd->squeue = alloc_percpu(struct padata_serial_queue); + if (!pd->squeue) + goto err_free_pqueue; + if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) + goto err_free_squeue; + padata_init_pqueues(pd); + padata_init_squeues(pd); setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); @@ -412,8 +450,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, return pd; -err_free_queue: - free_percpu(pd->queue); +err_free_squeue: + free_percpu(pd->squeue); +err_free_pqueue: + free_percpu(pd->pqueue); err_free_pd: kfree(pd); err: @@ -422,8 +462,10 @@ err: static void padata_free_pd(struct parallel_data *pd) { - free_cpumask_var(pd->cpumask); - free_percpu(pd->queue); + free_cpumask_var(pd->cpumask.pcpu); + free_cpumask_var(pd->cpumask.cbcpu); + free_percpu(pd->pqueue); + free_percpu(pd->squeue); kfree(pd); } @@ -431,11 +473,12 @@ static void padata_free_pd(struct parallel_data *pd) static void padata_flush_queues(struct parallel_data *pd) { int cpu; - struct padata_queue *queue; + struct padata_parallel_queue *pqueue; + struct padata_serial_queue *squeue; - for_each_cpu(cpu, pd->cpumask) { - queue = per_cpu_ptr(pd->queue, cpu); - flush_work(&queue->pwork); + for_each_cpu(cpu, pd->cpumask.pcpu) { + pqueue = per_cpu_ptr(pd->pqueue, cpu); + flush_work(&pqueue->work); } del_timer_sync(&pd->timer); @@ -443,9 +486,9 @@ static void padata_flush_queues(struct parallel_data *pd) if (atomic_read(&pd->reorder_objects)) padata_reorder(pd); - for_each_cpu(cpu, pd->cpumask) { - queue = per_cpu_ptr(pd->queue, cpu); - flush_work(&queue->swork); + for_each_cpu(cpu, pd->cpumask.cbcpu) { + squeue = per_cpu_ptr(pd->squeue, cpu); + flush_work(&squeue->work); } BUG_ON(atomic_read(&pd->refcnt) != 0); @@ -475,21 +518,63 @@ static void padata_replace(struct padata_instance *pinst, struct parallel_data *pd_new) { struct parallel_data *pd_old = pinst->pd; + int notification_mask = 0; pinst->flags |= PADATA_RESET; rcu_assign_pointer(pinst->pd, pd_new); synchronize_rcu(); + if (!pd_old) + goto out; - if (pd_old) { - padata_flush_queues(pd_old); - padata_free_pd(pd_old); - } + padata_flush_queues(pd_old); + if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu)) + notification_mask |= PADATA_CPU_PARALLEL; + if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) + notification_mask |= PADATA_CPU_SERIAL; + + padata_free_pd(pd_old); + if (notification_mask) + blocking_notifier_call_chain(&pinst->cpumask_change_notifier, + notification_mask, pinst); +out: pinst->flags &= ~PADATA_RESET; } +/** + * padata_register_cpumask_notifier - Registers a notifier that will be called + * if either pcpu or cbcpu or both cpumasks change. + * + * @pinst: A poineter to padata instance + * @nblock: A pointer to notifier block. + */ +int padata_register_cpumask_notifier(struct padata_instance *pinst, + struct notifier_block *nblock) +{ + return blocking_notifier_chain_register(&pinst->cpumask_change_notifier, + nblock); +} +EXPORT_SYMBOL(padata_register_cpumask_notifier); + +/** + * padata_unregister_cpumask_notifier - Unregisters cpumask notifier + * registered earlier using padata_register_cpumask_notifier + * + * @pinst: A pointer to data instance. + * @nlock: A pointer to notifier block. + */ +int padata_unregister_cpumask_notifier(struct padata_instance *pinst, + struct notifier_block *nblock) +{ + return blocking_notifier_chain_unregister( + &pinst->cpumask_change_notifier, + nblock); +} +EXPORT_SYMBOL(padata_unregister_cpumask_notifier); + + /* If cpumask contains no active cpu, we mark the instance as invalid. */ static bool padata_validate_cpumask(struct padata_instance *pinst, const struct cpumask *cpumask) @@ -504,13 +589,82 @@ static bool padata_validate_cpumask(struct padata_instance *pinst, } /** - * padata_set_cpumask - set the cpumask that padata should use + * padata_get_cpumask: Fetch serial or parallel cpumask from the + * given padata instance and copy it to @out_mask + * + * @pinst: A pointer to padata instance + * @cpumask_type: Specifies which cpumask will be copied. + * Possible values are PADATA_CPU_SERIAL *or* PADATA_CPU_PARALLEL + * corresponding to serial and parallel cpumask respectively. + * @out_mask: A