From 0ab4dc92278a0f3816e486d6350c6652a72e06c8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: usermodehelper: split setup from execution Rather than having hundreds of variations of call_usermodehelper for various pieces of usermode state which could be set up, split the info allocation and initialization from the actual process execution. This means the general pattern becomes: info = call_usermodehelper_setup(path, argv, envp); /* basic state */ call_usermodehelper_(info, stuff...); /* extra state */ call_usermodehelper_exec(info, wait); /* run process and free info */ This patch introduces wrappers for all the existing calling styles for call_usermodehelper_*, but folds their implementations into one. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Rusty Russell Cc: David Howells Cc: Bj?rn Steinbrink Cc: Randy Dunlap --- kernel/kmod.c | 191 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 135 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index 4d32eb077179..d2dce71115d8 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -122,6 +122,7 @@ struct subprocess_info { int wait; int retval; struct file *stdin; + void (*cleanup)(char **argv, char **envp); }; /* @@ -180,6 +181,14 @@ static int ____call_usermodehelper(void *data) do_exit(0); } +void call_usermodehelper_freeinfo(struct subprocess_info *info) +{ + if (info->cleanup) + (*info->cleanup)(info->argv, info->envp); + kfree(info); +} +EXPORT_SYMBOL(call_usermodehelper_freeinfo); + /* Keventd can't block, but this (a child) can. */ static int wait_for_helper(void *data) { @@ -217,7 +226,7 @@ static int wait_for_helper(void *data) } if (sub_info->wait < 0) - kfree(sub_info); + call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); return 0; @@ -252,11 +261,94 @@ static void __call_usermodehelper(struct work_struct *work) } /** - * call_usermodehelper_keys - start a usermode application - * @path: pathname for the application - * @argv: null-terminated argument list - * @envp: null-terminated environment list - * @session_keyring: session keyring for process (NULL for an empty keyring) + * call_usermodehelper_setup - prepare to call a usermode helper + * @path - path to usermode executable + * @argv - arg vector for process + * @envp - environment for process + * + * Returns either NULL on allocation failure, or a subprocess_info + * structure. This should be passed to call_usermodehelper_exec to + * exec the process and free the structure. + */ +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp) +{ + struct subprocess_info *sub_info; + sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); + if (!sub_info) + goto out; + + INIT_WORK(&sub_info->work, __call_usermodehelper); + sub_info->path = path; + sub_info->argv = argv; + sub_info->envp = envp; + + out: + return sub_info; +} +EXPORT_SYMBOL(call_usermodehelper_setup); + +/** + * call_usermodehelper_setkeys - set the session keys for usermode helper + * @info: a subprocess_info returned by call_usermodehelper_setup + * @session_keyring: the session keyring for the process + */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring) +{ + info->ring = session_keyring; +} +EXPORT_SYMBOL(call_usermodehelper_setkeys); + +/** + * call_usermodehelper_setcleanup - set a cleanup function + * @info: a subprocess_info returned by call_usermodehelper_setup + * @cleanup: a cleanup function + * + * The cleanup function is just befor ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. + */ +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)) +{ + info->cleanup = cleanup; +} +EXPORT_SYMBOL(call_usermodehelper_setcleanup); + +/** + * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin + * @sub_info: a subprocess_info returned by call_usermodehelper_setup + * @filp: set to the write-end of a pipe + * + * This constructs a pipe, and sets the read end to be the stdin of the + * subprocess, and returns the write-end in *@filp. + */ +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp) +{ + struct file *f; + + f = create_write_pipe(); + if (IS_ERR(f)) + return PTR_ERR(f); + *filp = f; + + f = create_read_pipe(f); + if (IS_ERR(f)) { + free_write_pipe(*filp); + return PTR_ERR(f); + } + sub_info->stdin = f; + + return 0; +} +EXPORT_SYMBOL(call_usermodehelper_stdinpipe); + +/** + * call_usermodehelper_exec - start a usermode application + * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. * when -1 don't wait at all, but you get no useful error back when * the program couldn't be exec'ed. This makes it safe to call @@ -265,33 +357,24 @@ static void __call_usermodehelper(struct work_struct *work) * Runs a user-space application. The application is started * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). - * - * Must be called from process context. Returns a negative error code - * if program was not execed successfully, or 0. */ -int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) +int call_usermodehelper_exec(struct subprocess_info *sub_info, + int wait) { DECLARE_COMPLETION_ONSTACK(done); - struct subprocess_info *sub_info; int retval; - if (!khelper_wq) - return -EBUSY; - - if (path[0] == '\0') - return 0; + if (sub_info->path[0] == '\0') { + retval = 0; + goto out; + } - sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); - if (!sub_info) - return -ENOMEM; + if (!khelper_wq) { + retval = -EBUSY; + goto out; + } - INIT_WORK(&sub_info->work, __call_usermodehelper); sub_info->complete = &done; - sub_info->path = path; - sub_info->argv = argv; - sub_info->envp = envp; - sub_info->ring = session_keyring; sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); @@ -299,47 +382,43 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, return 0; wait_for_completion(&done); retval = sub_info->retval; - kfree(sub_info); + + out: + call_usermodehelper_freeinfo(sub_info); return retval; } -EXPORT_SYMBOL(call_usermodehelper_keys); +EXPORT_SYMBOL(call_usermodehelper_exec); +/** + * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @filp: set to the write-end of a pipe + * + * This is a simple wrapper which executes a usermode-helper function + * with a pipe as stdin. It is implemented entirely in terms of + * lower-level call_usermodehelper_* functions. + */ int call_usermodehelper_pipe(char *path, char **argv, char **envp, struct file **filp) { - DECLARE_COMPLETION(done); - struct subprocess_info sub_info = { - .work = __WORK_INITIALIZER(sub_info.work, - __call_usermodehelper), - .complete = &done, - .path = path, - .argv = argv, - .envp = envp, - .retval = 0, - }; - struct file *f; - - if (!khelper_wq) - return -EBUSY; + struct subprocess_info *sub_info; + int ret; - if (path[0] == '\0') - return 0; + sub_info = call_usermodehelper_setup(path, argv, envp); + if (sub_info == NULL) + return -ENOMEM; - f = create_write_pipe(); - if (IS_ERR(f)) - return PTR_ERR(f); - *filp = f; + ret = call_usermodehelper_stdinpipe(sub_info, filp); + if (ret < 0) + goto out; - f = create_read_pipe(f); - if (IS_ERR(f)) { - free_write_pipe(*filp); - return PTR_ERR(f); - } - sub_info.stdin = f; + return call_usermodehelper_exec(sub_info, 1); - queue_work(khelper_wq, &sub_info.work); - wait_for_completion(&done); - return sub_info.retval; + out: + call_usermodehelper_freeinfo(sub_info); + return ret; } EXPORT_SYMBOL(call_usermodehelper_pipe); -- cgit v1.2.3 From 10a0a8d4e3f6bf2d077f94344441909abe670f5a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: Add common orderly_poweroff() Various pieces of code around the kernel want to be able to trigger an orderly poweroff. This pulls them together into a single implementation. By default the poweroff command is /sbin/poweroff, but it can be set via sysctl: kernel/poweroff_cmd. This is split at whitespace, so it can include command-line arguments. This patch replaces four other instances of invoking either "poweroff" or "shutdown -h now": two sbus drivers, and acpi thermal management. sparc64 has its own "powerd"; still need to determine whether it should be replaced by orderly_poweroff(). Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: Andi Kleen Cc: Al Viro Cc: Arnd Bergmann Cc: David S. Miller --- kernel/sys.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 10 ++++++++++ 2 files changed, 68 insertions(+) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 4d141ae3e802..aeded9ad66ce 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2286,3 +2286,61 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, } return err ? -EFAULT : 0; } + +char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; + +static void argv_cleanup(char **argv, char **envp) +{ + argv_free(argv); +} + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + int argc; + char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); + static char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + int ret = -ENOMEM; + struct subprocess_info *info; + + if (argv == NULL) { + printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", + __func__, poweroff_cmd); + goto out; + } + + info = call_usermodehelper_setup(argv[0], argv, envp); + if (info == NULL) { + argv_free(argv); + goto out; + } + + call_usermodehelper_setcleanup(info, argv_cleanup); + + ret = call_usermodehelper_exec(info, -1); + + out: + if (ret && force) { + printk(KERN_WARNING "Failed to start orderly shutdown: " + "forcing the issue\n"); + + /* I guess this should try to kick off some daemon to + sync and poweroff asap. Or not even bother syncing + if we're doing an emergency shutdown? */ + emergency_sync(); + kernel_power_off(); + } + + return ret; +} +EXPORT_SYMBOL_GPL(orderly_poweroff); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7063ebc6db05..44a1d699aad7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -705,6 +706,15 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, { .ctl_name = 0 } }; -- cgit v1.2.3 From 86313c488a6848b7ec2ba04e74f25f79dd32a0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: usermodehelper: Tidy up waiting Rather than using a tri-state integer for the wait flag in call_usermodehelper_exec, define a proper enum, and use that. I've preserved the integer values so that any callers I've missed should still work OK. Signed-off-by: Jeremy Fitzhardinge Cc: James Bottomley Cc: Randy Dunlap Cc: Christoph Hellwig Cc: Andi Kleen Cc: Paul Mackerras Cc: Johannes Berg Cc: Ralf Baechle Cc: Bjorn Helgaas Cc: Joel Becker Cc: Tony Luck Cc: Kay Sievers Cc: Srivatsa Vaddagiri Cc: Oleg Nesterov Cc: David Howells --- kernel/cpuset.c | 2 +- kernel/kmod.c | 27 ++++++++++++++++----------- kernel/sys.c | 2 +- 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b4796d850140..57e6448b171e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf) envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; - call_usermodehelper(argv[0], argv, envp, 0); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); kfree(pathbuf); } diff --git a/kernel/kmod.c b/kernel/kmod.c index d2dce71115d8..78d365c524ed 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -119,7 +119,7 @@ struct subprocess_info { char **argv; char **envp; struct key *ring; - int wait; + enum umh_wait wait; int retval; struct file *stdin; void (*cleanup)(char **argv, char **envp); @@ -225,7 +225,7 @@ static int wait_for_helper(void *data) sub_info->retval = ret; } - if (sub_info->wait < 0) + if (sub_info->wait == UMH_NO_WAIT) call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); @@ -238,26 +238,31 @@ static void __call_usermodehelper(struct work_struct *work) struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); pid_t pid; - int wait = sub_info->wait; + enum umh_wait wait = sub_info->wait; /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ - if (wait) + if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); - if (wait < 0) - return; + switch (wait) { + case UMH_NO_WAIT: + break; - if (pid < 0) { + case UMH_WAIT_PROC: + if (pid > 0) + break; sub_info->retval = pid; + /* FALLTHROUGH */ + + case UMH_WAIT_EXEC: complete(sub_info->complete); - } else if (!wait) - complete(sub_info->complete); + } } /** @@ -359,7 +364,7 @@ EXPORT_SYMBOL(call_usermodehelper_stdinpipe); * (ie. it runs with full root capabilities). */ int call_usermodehelper_exec(struct subprocess_info *sub_info, - int wait) + enum umh_wait wait) { DECLARE_COMPLETION_ONSTACK(done); int retval; @@ -378,7 +383,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); - if (wait < 0) /* task has freed sub_info */ + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ return 0; wait_for_completion(&done); retval = sub_info->retval; diff --git a/kernel/sys.c b/kernel/sys.c index aeded9ad66ce..18987c7f6add 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2327,7 +2327,7 @@ int orderly_poweroff(bool force) call_usermodehelper_setcleanup(info, argv_cleanup); - ret = call_usermodehelper_exec(info, -1); + ret = call_usermodehelper_exec(info, UMH_NO_WAIT); out: if (ret && force) { -- cgit v1.2.3 From 471d0558045fe35f8c5f291c1ee63815eb9c2dcd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 12 Jul 2007 16:55:07 -0400 Subject: PM: Remove deprecated sysfs files This patch (as932) removes the deprecated sysfs .../power/state attribute files. Signed-off-by: Alan Stern Acked-by: Pavel Machek Signed-off-by: Greg Kroah-Hartman --- kernel/power/Kconfig | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 495b7d4dd330..73328476761c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -65,18 +65,6 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. -config PM_SYSFS_DEPRECATED - bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" - depends on PM && SYSFS - default n - help - The driver model started out with a sysfs file intended to provide - a userspace hook for device power management. This feature has never - worked very well, except for limited testing purposes, and so it will - be removed. It's not clear that a generic mechanism could really - handle the wide variability of device power states; any replacements - are likely to be bus or driver specific. - config SOFTWARE_SUSPEND bool "Software Suspend (Hibernation)" depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) -- cgit v1.2.3 From 83c54070ee1a2d05c89793884bea1a03f2851ed4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 19 Jul 2007 01:47:05 -0700 Subject: mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Bryan Wu Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: "Luck, Tony" Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: Greg Ungerer Cc: Matthew Wilcox Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Acked-by: Kyle McMartin Acked-by: Haavard Skinnemoen Acked-by: Ralf Baechle Acked-by: Andi Kleen Signed-off-by: Andrew Morton [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds --- kernel/futex.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 5c3f45d07c53..a12425051ee9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -346,15 +346,20 @@ static int futex_handle_fault(unsigned long address, vma = find_vma(mm, address); if (vma && address >= vma->vm_start && (vma->vm_flags & VM_WRITE)) { - switch (handle_mm_fault(mm, vma, address, 1)) { - case VM_FAULT_MINOR: - ret = 0; - current->min_flt++; - break; - case VM_FAULT_MAJOR: + int fault; + fault = handle_mm_fault(mm, vma, address, 1); + if (unlikely((fault & VM_FAULT_ERROR))) { +#if 0 + /* XXX: let's do this when we verify it is OK */ + if (ret & VM_FAULT_OOM) + ret = -ENOMEM; +#endif + } else { ret = 0; - current->maj_flt++; - break; + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; } } if (!fshared) -- cgit v1.2.3 From 328616e3b76859f1abdd08a8df1ddbb7bb81f807 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Jul 2007 01:47:26 -0700 Subject: freezer: run show_state() when freezing times out To see which tasks are stuck where. Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Alan Stern Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index e0233d8422b9..b850173e7561 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -157,6 +157,7 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) freeze_user_space ? "user space processes" : "kernel threads", TIMEOUT / HZ, todo); + show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { if (freeze_user_space && !is_user_space(p)) -- cgit v1.2.3 From a0349828d6d6f95c445674c2953ee9db75c11f8f Mon Sep 17 00:00:00 2001 From: Ben Collins Date: Thu, 19 Jul 2007 01:47:27 -0700 Subject: PM: Do not require dev spew to get PM_DEBUG In order to enable things like PM_TRACE, you're required to enable PM_DEBUG, which sends a large spew of messages on boot, and often times can overflow dmesg buffer. Create new PM_VERBOSE and shift that to be the option that enables drivers/base/power's messages. Signed-off-by: Ben Collins Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/Kconfig | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 73328476761c..7358609e4735 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -33,13 +33,20 @@ config PM_DEBUG bool "Power Management Debug Support" depends on PM ---help--- - This option enables verbose debugging support in the Power Management - code. This is helpful when debugging and reporting various PM bugs, - like suspend support. + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_VERBOSE + bool "Verbose Power Management debugging" + depends on PM_DEBUG + default n + ---help--- + This option enables verbose messages from the Power Management code. config DISABLE_CONSOLE_SUSPEND bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" - depends on PM && PM_DEBUG + depends on PM_DEBUG default n ---help--- This option turns off the console suspend mechanism that prevents @@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND config PM_TRACE bool "Suspend/resume event tracing" - depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL + depends on PM_DEBUG && X86_32 && EXPERIMENTAL default n ---help--- This enables some cheesy code to save the last PM event point in the -- cgit v1.2.3 From 127067a9c994dff16b280f409cc7b18a54a63719 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:28 -0700 Subject: swsusp: remove incorrect code from user.c In the face of the recent change of suspend code ordering (cf. http://marc.info/?l=linux-acpi&m=117938245931603&w=2) we should also modify the code ordering in swsusp so that hibernation_ops->prepare() is executed after device_suspend(). However, for this purpose it seems reasonable to eliminate the code duplication between kernel/power/disk.c and kernel/power/user.c first. By eliminating it we can reduce the size of user.c quite substantially and remove the maintenance difficulty with making essentially the same changes in two different places. Moreover, we should also remove the calls to "platform" functions from the restore code path, since it doesn't carry out any power transition of the system, but we generally need to disable the GPEs before the restore if the 'platform' hibernation mode has been used. To do this, we can introduce two new hibernation_ops to be used in the restore code. This patch: Make the code hibernation code in kernel/power/user.c be functionally equivalent to the corresponding code in kernel/power/disk.c , as it should be. The calls to the platform functions removed by this patch are incorrect. They should be replaced with some other "platform" invocations that will be introduced in one of the subsequent patches. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index d65305b515b1..09468ec61124 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -181,34 +181,25 @@ static inline int snapshot_suspend(int platform_suspend) return error; } -static inline int snapshot_restore(int platform_suspend) +static inline int snapshot_restore(void) { int error; mutex_lock(&pm_mutex); pm_prepare_console(); - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } suspend_console(); error = device_suspend(PMSG_PRETHAW); if (error) - goto Resume_devices; + goto Finish; error = disable_nonboot_cpus(); if (!error) error = swsusp_resume(); enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - + Finish: device_resume(); resume_console(); - Finish: pm_restore_console(); mutex_unlock(&pm_mutex); return error; @@ -274,7 +265,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_restore(data->platform_suspend); + error = snapshot_restore(); break; case SNAPSHOT_FREE: -- cgit v1.2.3 From 7777fab989b5d006903188c966058ebcd2d6342a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:29 -0700 Subject: swsusp: remove code duplication between disk.c and user.c Currently, much of the code in kernel/power/disk.c is duplicated in kernel/power/user.c , mainly for historical reasons. By eliminating this code duplication we can reduce the size of user.c quite substantially and remove the maintenance difficulty resulting from it. [bunk@stusta.de: kernel/power/disk.c: make code static] Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 184 +++++++++++++++++++++++++++++---------------------- kernel/power/power.h | 5 +- kernel/power/user.c | 96 ++------------------------- 3 files changed, 115 insertions(+), 170 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f445b9cd60fb..47882bfa610e 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -45,7 +45,7 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; -struct hibernation_ops *hibernation_ops; +static struct hibernation_ops *hibernation_ops; /** * hibernation_set_ops - set the global hibernate operations @@ -74,9 +74,9 @@ void hibernation_set_ops(struct hibernation_ops *ops) * platform driver if so configured and return an error code if it fails */ -static int platform_prepare(void) +static int platform_prepare(int platform_mode) { - return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? + return (platform_mode && hibernation_ops) ? hibernation_ops->prepare() : 0; } @@ -85,12 +85,103 @@ static int platform_prepare(void) * using the platform driver (must be called after platform_prepare()) */ -static void platform_finish(void) +static void platform_finish(int platform_mode) { - if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) + if (platform_mode && hibernation_ops) hibernation_ops->finish(); } +/** + * hibernation_snapshot - quiesce devices and create the hibernation + * snapshot image. + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the power transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_snapshot(int platform_mode) +{ + int error; + + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + goto Finish; + + error = platform_prepare(platform_mode); + if (error) + goto Finish; + + suspend_console(); + error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_devices; + + error = disable_nonboot_cpus(); + if (!error) { + if (hibernation_mode != HIBERNATION_TEST) { + in_suspend = 1; + error = swsusp_suspend(); + /* Control returns here after successful restore */ + } else { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + } + } + enable_nonboot_cpus(); + Resume_devices: + platform_finish(platform_mode); + device_resume(); + resume_console(); + Finish: + return error; +} + +/** + * hibernation_restore - quiesce devices and restore the hibernation + * snapshot image. If successful, control returns in hibernation_snaphot() + * + * Must be called with pm_mutex held + */ + +int hibernation_restore(void) +{ + int error; + + pm_prepare_console(); + suspend_console(); + error = device_suspend(PMSG_PRETHAW); + if (error) + goto Finish; + + error = disable_nonboot_cpus(); + if (!error) + error = swsusp_resume(); + + enable_nonboot_cpus(); + Finish: + device_resume(); + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - enter the hibernation state using the + * platform driver (if available) + */ + +int hibernation_platform_enter(void) +{ + if (hibernation_ops) { + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + return hibernation_ops->enter(); + } else { + return -ENOSYS; + } +} + /** * power_down - Shut the machine down for hibernation. * @@ -111,11 +202,7 @@ static void power_down(void) kernel_restart(NULL); break; case HIBERNATION_PLATFORM: - if (hibernation_ops) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - hibernation_ops->enter(); - break; - } + hibernation_platform_enter(); } kernel_halt(); /* @@ -171,62 +258,17 @@ int hibernate(void) mdelay(5000); goto Thaw; } - - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Thaw; - - error = platform_prepare(); - if (error) - goto Thaw; - - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_devices; - } - error = disable_nonboot_cpus(); - if (error) - goto Enable_cpus; - - if (hibernation_mode == HIBERNATION_TEST) { - printk("swsusp debug: Waiting for 5 seconds.\n"); - mdelay(5000); - goto Enable_cpus; - } - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - error = swsusp_suspend(); - if (error) - goto Enable_cpus; - - if (in_suspend) { - enable_nonboot_cpus(); - platform_finish(); - device_resume(); - resume_console(); + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (in_suspend && !error) { pr_debug("PM: writing image.\n"); error = swsusp_write(); + swsusp_free(); if (!error) power_down(); - else { - swsusp_free(); - goto Thaw; - } } else { pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); } - - swsusp_free(); - Enable_cpus: - enable_nonboot_cpus(); - Resume_devices: - platform_finish(); - device_resume(); - resume_console(); Thaw: mutex_unlock(&pm_mutex); unprepare_processes(); @@ -301,29 +343,11 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); error = swsusp_read(); - if (error) { - swsusp_free(); - goto Thaw; - } - - pr_debug("PM: Preparing devices for restore.\n"); - - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Free; - - error = disable_nonboot_cpus(); if (!error) - swsusp_resume(); + hibernation_restore(); - enable_nonboot_cpus(); - Free: - swsusp_free(); - device_resume(); - resume_console(); - Thaw: printk(KERN_ERR "PM: Restore failed, recovering.\n"); + swsusp_free(); unprepare_processes(); Done: free_basic_memory_bitmaps(); @@ -333,7 +357,7 @@ static int software_resume(void) Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); - return 0; + return error; } late_initcall(software_resume); diff --git a/kernel/power/power.h b/kernel/power/power.h index 51381487103f..70c378b3f85a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -25,7 +25,10 @@ struct swsusp_info { */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) -extern struct hibernation_ops *hibernation_ops; +/* kernel/power/disk.c */ +extern int hibernation_snapshot(int platform_mode); +extern int hibernation_restore(void); +extern int hibernation_platform_enter(void); #endif extern int pfn_is_nosave(unsigned long); diff --git a/kernel/power/user.c b/kernel/power/user.c index 09468ec61124..bfed3b924093 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -128,83 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } -static inline int platform_prepare(void) -{ - int error = 0; - - if (hibernation_ops) - error = hibernation_ops->prepare(); - - return error; -} - -static inline void platform_finish(void) -{ - if (hibernation_ops) - hibernation_ops->finish(); -} - -static inline int snapshot_suspend(int platform_suspend) -{ - int error; - - mutex_lock(&pm_mutex); - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Finish; - - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) - goto Resume_devices; - - error = disable_nonboot_cpus(); - if (!error) { - in_suspend = 1; - error = swsusp_suspend(); - } - enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - - device_resume(); - resume_console(); - Finish: - mutex_unlock(&pm_mutex); - return error; -} - -static inline int snapshot_restore(void) -{ - int error; - - mutex_lock(&pm_mutex); - pm_prepare_console(); - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Finish; - - error = disable_nonboot_cpus(); - if (!error) - error = swsusp_resume(); - - enable_nonboot_cpus(); - Finish: - device_resume(); - resume_console(); - pm_restore_console(); - mutex_unlock(&pm_mutex); - return error; -} - static int snapshot_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -251,7 +174,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_suspend(data->platform_suspend); + error = hibernation_snapshot(data->platform_suspend); if (!error) error = put_user(in_suspend, (unsigned int __user *)arg); if (!error) @@ -265,7 +188,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_restore(); + error = hibernation_restore(); break; case SNAPSHOT_FREE: @@ -377,19 +300,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, switch (arg) { case PMOPS_PREPARE: - if (hibernation_ops) { - data->platform_suspend = 1; - error = 0; - } else { - error = -ENOSYS; - } + data->platform_suspend = 1; + error = 0; break; case PMOPS_ENTER: - if (data->platform_suspend) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - error = hibernation_ops->enter(); - } + if (data->platform_suspend) + error = hibernation_platform_enter(); + break; case PMOPS_FINISH: -- cgit v1.2.3 From a634cc10164d1c229fbeca33923e6a0ed939e894 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:30 -0700 Subject: swsusp: introduce restore platform operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least on some machines it is necessary to prepare the ACPI firmware for the restoration of the system memory state from the hibernation image if the "platform" mode of hibernation has been used. Namely, in that cases we need to disable the GPEs before replacing the "boot" kernel with the "frozen" kernel (cf. http://bugzilla.kernel.org/show_bug.cgi?id=7887). After the restore they will be re-enabled by hibernation_ops->finish(), but if the restore fails, they have to be re-enabled by the restore code explicitly. For this purpose we can introduce two additional hibernation operations, called pre_restore() and restore_cleanup() and call them from the restore code path. Still, they should be called if the "platform" mode of hibernation has been used, so we need to pass the information about the hibernation mode from the "frozen" kernel to the "boot" kernel in the image header. Apparently, we can't drop the disabling of GPEs before the restore because of Bug #7887 .  We also can't do it unconditionally, because the GPEs wouldn't have been enabled after a successful restore if the suspend had been done in the 'shutdown' or 'reboot' mode. In principle we could (and probably should) unconditionally disable the GPEs before each snapshot creation *and* before the restore, but then we'd have to unconditionally enable them after the snapshot creation as well as after the restore (or restore failure)   Still, for this purpose we'd need to modify acpi_enter_sleep_state_prep() and acpi_leave_sleep_state() and we'd have to introduce some mechanism synchronizing the disablind/enabling of the GPEs with the device drivers' .suspend()/.resume() routines and with disable_/enable_nonboot_cpus().  However, this would have affected the suspend (ie. s2ram) code as well as the hibernation, which I'd like to avoid in this patch series. Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 56 ++++++++++++++++++++++++++++++++++++++++++---------- kernel/power/power.h | 13 +++++++++--- kernel/power/swap.c | 20 ++++++++++++++----- kernel/power/user.c | 2 +- 4 files changed, 72 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 47882bfa610e..fa3b43b7206d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -54,7 +54,8 @@ static struct hibernation_ops *hibernation_ops; void hibernation_set_ops(struct hibernation_ops *ops) { - if (ops && !(ops->prepare && ops->enter && ops->finish)) { + if (ops && !(ops->prepare && ops->enter && ops->finish + && ops->pre_restore && ops->restore_cleanup)) { WARN_ON(1); return; } @@ -91,6 +92,31 @@ static void platform_finish(int platform_mode) hibernation_ops->finish(); } +/** + * platform_pre_restore - prepare the platform for the restoration from a + * hibernation image. If the restore fails after this function has been + * called, platform_restore_cleanup() must be called. + */ + +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - switch the platform to the normal mode of + * operation after a failing restore. If platform_pre_restore() has been + * called before the failing restore, this function must be called too, + * regardless of the result of platform_pre_restore(). + */ + +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + /** * hibernation_snapshot - quiesce devices and create the hibernation * snapshot image. @@ -141,11 +167,13 @@ int hibernation_snapshot(int platform_mode) /** * hibernation_restore - quiesce devices and restore the hibernation * snapshot image. If successful, control returns in hibernation_snaphot() + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the transition. * * Must be called with pm_mutex held */ -int hibernation_restore(void) +int hibernation_restore(int platform_mode) { int error; @@ -155,11 +183,14 @@ int hibernation_restore(void) if (error) goto Finish; - error = disable_nonboot_cpus(); - if (!error) - error = swsusp_resume(); - - enable_nonboot_cpus(); + error = platform_pre_restore(platform_mode); + if (!error) { + error = disable_nonboot_cpus(); + if (!error) + error = swsusp_resume(); + enable_nonboot_cpus(); + } + platform_restore_cleanup(platform_mode); Finish: device_resume(); resume_console(); @@ -260,8 +291,12 @@ int hibernate(void) } error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (in_suspend && !error) { + unsigned int flags = 0; + + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; pr_debug("PM: writing image.\n"); - error = swsusp_write(); + error = swsusp_write(flags); swsusp_free(); if (!error) power_down(); @@ -295,6 +330,7 @@ int hibernate(void) static int software_resume(void) { int error; + unsigned int flags; mutex_lock(&pm_mutex); if (!swsusp_resume_device) { @@ -342,9 +378,9 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); - error = swsusp_read(); + error = swsusp_read(&flags); if (!error) - hibernation_restore(); + hibernation_restore(flags & SF_PLATFORM_MODE); printk(KERN_ERR "PM: Restore failed, recovering.\n"); swsusp_free(); diff --git a/kernel/power/power.h b/kernel/power/power.h index 70c378b3f85a..eab3603b7caf 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -27,7 +27,7 @@ struct swsusp_info { /* kernel/power/disk.c */ extern int hibernation_snapshot(int platform_mode); -extern int hibernation_restore(void); +extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); #endif @@ -155,13 +155,20 @@ extern sector_t alloc_swapdev_block(int swap); extern void free_all_swap_pages(int swap); extern int swsusp_swap_in_use(void); +/* + * Flags that can be passed from the hibernatig hernel to the "boot" kernel in + * the image header. + */ +#define SF_PLATFORM_MODE 1 + +/* kernel/power/disk.c */ extern int swsusp_check(void); extern int swsusp_shrink_memory(void); extern void swsusp_free(void); extern int swsusp_suspend(void); extern int swsusp_resume(void); -extern int swsusp_read(void); -extern int swsusp_write(void); +extern int swsusp_read(unsigned int *flags_p); +extern int swsusp_write(unsigned int flags); extern void swsusp_close(void); extern int suspend_enter(suspend_state_t state); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8b1a1b837145..917aba100575 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,8 +33,9 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; sector_t image; + unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; char sig[10]; } __attribute__((packed)); @@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain) * Saving part */ -static int mark_swapfiles(sector_t start) +static int mark_swapfiles(sector_t start, unsigned int flags) { int error; @@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start) memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig,SWSUSP_SIG, 10); swsusp_header->image = start; + swsusp_header->flags = flags; error = bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { @@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages) /** * swsusp_write - Write entire image and metadata. + * @flags: flags to pass to the "boot" kernel in the image header * * It is important _NOT_ to umount filesystems at this point. We want * them synced (in case something goes wrong) but we DO not want to mark @@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages) * correctly, we'll mark system clean, anyway.) */ -int swsusp_write(void) +int swsusp_write(unsigned int flags) { struct swap_map_handle handle; struct snapshot_handle snapshot; @@ -415,7 +418,7 @@ int swsusp_write(void) if (!error) { flush_swap_writer(&handle); printk("S"); - error = mark_swapfiles(start); + error = mark_swapfiles(start, flags); printk("|\n"); } } @@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle, return error; } -int swsusp_read(void) +/** + * swsusp_read - read the hibernation image. + * @flags_p: flags passed by the "frozen" kernel in the image header should + * be written into this memeory location + */ + +int swsusp_read(unsigned int *flags_p) { int error; struct swap_map_handle handle; struct snapshot_handle snapshot; struct swsusp_info *header; + *flags_p = swsusp_header->flags; if (IS_ERR(resume_bdev)) { pr_debug("swsusp: block device not initialised\n"); return PTR_ERR(resume_bdev); diff --git a/kernel/power/user.c b/kernel/power/user.c index bfed3b924093..1f24f30b951b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -188,7 +188,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = hibernation_restore(); + error = hibernation_restore(data->platform_suspend); break; case SNAPSHOT_FREE: -- cgit v1.2.3 From 10a1803d667e209914eaada9b95525252f23ec78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:31 -0700 Subject: swsusp: fix hibernation code ordering Change the code ordering so that hibernation_ops->prepare() is called after device_suspend(). This is needed so that we don't violate the ACPI specification, which states that the _PTS and _GTS system-control methods, executed from acpi_sleep_prepare(), ought to be called after devices have been put in low power states. The "Finish" label in hibernation_restore() is moved, because device_suspend() resumes devices if the suspending of them fails and the restore code ordering should reflect the hibernation code ordering. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index fa3b43b7206d..77ac605bf20a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -133,14 +133,14 @@ int hibernation_snapshot(int platform_mode) /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) - goto Finish; - - error = platform_prepare(platform_mode); - if (error) - goto Finish; + return error; suspend_console(); error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_console; + + error = platform_prepare(platform_mode); if (error) goto Resume_devices; @@ -159,8 +159,8 @@ int hibernation_snapshot(int platform_mode) Resume_devices: platform_finish(platform_mode); device_resume(); + Resume_console: resume_console(); - Finish: return error; } @@ -191,8 +191,8 @@ int hibernation_restore(int platform_mode) enable_nonboot_cpus(); } platform_restore_cleanup(platform_mode); - Finish: device_resume(); + Finish: resume_console(); pm_restore_console(); return error; -- cgit v1.2.3 From b1457bcc3a00a0446c7f6e2f22fd24b6d8d0a309 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:31 -0700 Subject: Hibernation: prepare to enter the low power state During hibernation we call hibernation_ops->prepare() before creating the image, but then, before saving it, we cancel the power transition by calling hibernation_ops->finish(). Thus prior to calling hibernation_ops->enter() we should let the platform firmware know that we're going to enter the low power state after all. Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Cc: Nigel Cunningham Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 77ac605bf20a..885c653509c9 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -205,12 +205,23 @@ int hibernation_restore(int platform_mode) int hibernation_platform_enter(void) { + int error; + if (hibernation_ops) { kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - return hibernation_ops->enter(); + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we + * should let the firmware know that we're going to enter the + * sleep state after all + */ + error = hibernation_ops->prepare(); + if (!error) + error = hibernation_ops->enter(); } else { - return -ENOSYS; + error = -ENOSYS; } + return error; } /** -- cgit v1.2.3 From 0c1eecfb345401629aa57c9d3b077273e56c45a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:33 -0700 Subject: Freezer: avoid freezing kernel threads prematurely Kernel threads should not have TIF_FREEZE set when user space processes are being frozen, since otherwise some of them might be frozen prematurely. To prevent this from happening we can (1) make exit_mm() unset TIF_FREEZE unconditionally just after clearing tsk->mm and (2) make try_to_freeze_tasks() check if p->mm is different from zero and PF_BORROWED_MM is unset in p->flags when user space processes are to be frozen. Namely, when user space processes are being frozen, we only should set TIF_FREEZE for tasks that have p->mm different from NULL and don't have PF_BORROWED_MM set in p->flags. For this reason task_lock() must be used to prevent try_to_freeze_tasks() from racing with use_mm()/unuse_mm(), in which p->mm and p->flags.PF_BORROWED_MM are changed under task_lock(p). Also, we need to prevent the following scenario from happening: * daemonize() is called by a task spawned from a user space code path * freezer checks if the task has p->mm set and the result is positive * task enters exit_mm() and clears its TIF_FREEZE * freezer sets TIF_FREEZE for the task * task calls try_to_freeze() and goes to the refrigerator, which is wrong at that point This requires us to acquire task_lock(p) before p->flags.PF_BORROWED_MM and p->mm are examined and release it after TIF_FREEZE is set for p (or it turns out that TIF_FREEZE should not be set). Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Cc: Nigel Cunningham Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 +++ kernel/power/process.c | 64 ++++++++++++++++++++++++++------------------------ 2 files changed, 36 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index e8af8d0c2483..464c2b172f07 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -594,6 +595,8 @@ static void exit_mm(struct task_struct * tsk) tsk->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); + /* We don't want this task to be frozen prematurely */ + clear_freeze_flag(tsk); task_unlock(tsk); mmput(mm); } diff --git a/kernel/power/process.c b/kernel/power/process.c index b850173e7561..e1bcdedd1464 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -40,7 +40,7 @@ static inline void frozen_process(void) current->flags |= PF_FROZEN; wmb(); } - clear_tsk_thread_flag(current, TIF_FREEZE); + clear_freeze_flag(current); } /* Refrigerator is place where frozen processes are stored :-). */ @@ -75,17 +75,16 @@ void refrigerator(void) current->state = save; } -static inline void freeze_process(struct task_struct *p) +static void freeze_task(struct task_struct *p) { unsigned long flags; if (!freezing(p)) { rmb(); if (!frozen(p)) { + set_freeze_flag(p); if (p->state == TASK_STOPPED) force_sig_specific(SIGSTOP, p); - - freeze(p); spin_lock_irqsave(&p->sighand->siglock, flags); signal_wake_up(p, p->state == TASK_STOPPED); spin_unlock_irqrestore(&p->sighand->siglock, flags); @@ -99,18 +98,13 @@ static void cancel_freezing(struct task_struct *p) if (freezing(p)) { pr_debug(" clean up: %s\n", p->comm); - do_not_freeze(p); + clear_freeze_flag(p); spin_lock_irqsave(&p->sighand->siglock, flags); recalc_sigpending_and_wake(p); spin_unlock_irqrestore(&p->sighand->siglock, flags); } } -static inline int is_user_space(struct task_struct *p) -{ - return p->mm && !(p->flags & PF_BORROWED_MM); -} - static unsigned int try_to_freeze_tasks(int freeze_user_space) { struct task_struct *g, *p; @@ -122,20 +116,34 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) - continue; - - if (frozen(p)) + if (frozen(p) || !freezeable(p)) continue; - if (p->state == TASK_TRACED && frozen(p->parent)) { - cancel_freezing(p); - continue; + if (freeze_user_space) { + if (p->state == TASK_TRACED && + frozen(p->parent)) { + cancel_freezing(p); + continue; + } + /* + * Kernel threads should not have TIF_FREEZE set + * at this point, so we must ensure that either + * p->mm is not NULL *and* PF_BORROWED_MM is + * unset, or TIF_FRREZE is left unset. + * The task_lock() is necessary to prevent races + * with exit_mm() or use_mm()/unuse_mm() from + * occuring. + */ + task_lock(p); + if (!p->mm || (p->flags & PF_BORROWED_MM)) { + task_unlock(p); + continue; + } + freeze_task(p); + task_unlock(p); + } else { + freeze_task(p); } - if (freeze_user_space && !is_user_space(p)) - continue; - - freeze_process(p); if (!freezer_should_skip(p)) todo++; } while_each_thread(g, p); @@ -152,22 +160,16 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) * but it cleans up leftover PF_FREEZE requests. */ printk("\n"); - printk(KERN_ERR "Stopping %s timed out after %d seconds " + printk(KERN_ERR "Freezing of %s timed out after %d seconds " "(%d tasks refusing to freeze):\n", - freeze_user_space ? "user space processes" : - "kernel threads", + freeze_user_space ? "user space " : "tasks ", TIMEOUT / HZ, todo); show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { - if (freeze_user_space && !is_user_space(p)) - continue; - task_lock(p); - if (freezeable(p) && !frozen(p) && - !freezer_should_skip(p)) + if (freezing(p) && !freezer_should_skip(p)) printk(KERN_ERR " %s\n", p->comm); - cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); @@ -211,7 +213,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (is_user_space(p) == !thaw_user_space) + if (!p->mm == thaw_user_space) continue; thaw_process(p); -- cgit v1.2.3 From f4a3a7d60c9c9a961e4c970f6eb41dd1c9d3ec21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:33 -0700 Subject: Freezer: use __set_current_state in refrigerator Use __set_current_state() as appropriate in refrigerator() instead of accessing current->state directly. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Gautham R Shenoy Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index e1bcdedd1464..9b5301c73b83 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -72,7 +72,7 @@ void refrigerator(void) schedule(); } pr_debug("%s left refrigerator\n", current->comm); - current->state = save; + __set_current_state(save); } static void freeze_task(struct task_struct *p) -- cgit v1.2.3 From e7cd8a722745a01bcfac4d4a52d53391d177da20 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:34 -0700 Subject: Freezer: return int from freeze_processes Make try_to_freeze_tasks() and freeze_processes() return -EBUSY on failure instead of the number of unfrozen tasks (none of the callers actually uses this number). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Gautham R Shenoy Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index 9b5301c73b83..00cdbe5f518f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -105,7 +105,7 @@ static void cancel_freezing(struct task_struct *p) } } -static unsigned int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(int freeze_user_space) { struct task_struct *g, *p; unsigned long end_time; @@ -176,28 +176,25 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) read_unlock(&tasklist_lock); } - return todo; + return todo ? -EBUSY : 0; } /** * freeze_processes - tell processes to enter the refrigerator - * - * Returns 0 on success, or the number of processes that didn't freeze, - * although they were told to. */ int freeze_processes(void) { - unsigned int nr_unfrozen; + int error; printk("Stopping tasks ... "); - nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_USER_SPACE); + if (error) + return error; sys_sync(); - nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + if (error) + return error; printk("done.\n"); BUG_ON(in_atomic()); -- cgit v1.2.3 From c2cf7d87d804c66e063829d5ca739053e901dc15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:35 -0700 Subject: Freezer: remove redundant check in try_to_freeze_tasks We don't need to check if todo is positive before calling time_after() in try_to_freeze_tasks(), because if todo is zero at this point, the loop will be broken anyway due to the while () condition being false. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Gautham R Shenoy Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index 00cdbe5f518f..3434940a3df1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -149,7 +149,7 @@ static int try_to_freeze_tasks(int freeze_user_space) } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ - if (todo && time_after(jiffies, end_time)) + if (time_after(jiffies, end_time)) break; } while (todo); -- cgit v1.2.3 From b10d911749d37dccfa5873d2088aea3f074b9e45 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:36 -0700 Subject: PM: introduce hibernation and suspend notifiers Make it possible to register hibernation and suspend notifiers, so that subsystems can perform hibernation-related or suspend-related operations that should not be carried out by device drivers' .suspend() and .resume() routines. [akpm@linux-foundation.org: build fixes] [akpm@linux-foundation.org: cleanups] Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 16 ++++++++++++---- kernel/power/main.c | 9 +++++++++ kernel/power/power.h | 10 ++++++++++ kernel/power/user.c | 11 ++++++++--- 4 files changed, 39 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 885c653509c9..324ac0188ce1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -281,9 +281,16 @@ int hibernate(void) { int error; + mutex_lock(&pm_mutex); /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); @@ -294,7 +301,6 @@ int hibernate(void) if (error) goto Finish; - mutex_lock(&pm_mutex); if (hibernation_mode == HIBERNATION_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); @@ -316,12 +322,14 @@ int hibernate(void) swsusp_free(); } Thaw: - mutex_unlock(&pm_mutex); unprepare_processes(); Finish: free_basic_memory_bitmaps(); Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); atomic_inc(&snapshot_device_available); + Unlock: + mutex_unlock(&pm_mutex); return error; } diff --git a/kernel/power/main.c b/kernel/power/main.c index fc45ed22620f..4d26ad394fb3 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -23,6 +23,8 @@ #include "power.h" +BLOCKING_NOTIFIER_HEAD(pm_chain_head); + /*This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) @@ -78,6 +80,10 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + pm_prepare_conso