diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 10:30:07 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 10:30:07 -0700 |
| commit | fbf4432ff71b7a25bef993a5312906946d27f446 (patch) | |
| tree | cf3e0024af4b8f9376eff75743f1fa1526e40900 /kernel | |
| parent | c054be10ffdbd5507a1fd738067d76acfb4808fd (diff) | |
| parent | 0cfb6aee70bddbef6ec796b255f588ce0e126766 (diff) | |
| download | linux-fbf4432ff71b7a25bef993a5312906946d27f446.tar.gz linux-fbf4432ff71b7a25bef993a5312906946d27f446.tar.bz2 linux-fbf4432ff71b7a25bef993a5312906946d27f446.zip | |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- most of the rest of MM
- a small number of misc things
- lib/ updates
- checkpatch
- autofs updates
- ipc/ updates
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (126 commits)
ipc: optimize semget/shmget/msgget for lots of keys
ipc/sem: play nicer with large nsops allocations
ipc/sem: drop sem_checkid helper
ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t
ipc: convert sem_undo_list.refcnt from atomic_t to refcount_t
ipc: convert ipc_namespace.count from atomic_t to refcount_t
kcov: support compat processes
sh: defconfig: cleanup from old Kconfig options
mn10300: defconfig: cleanup from old Kconfig options
m32r: defconfig: cleanup from old Kconfig options
drivers/pps: use surrounding "if PPS" to remove numerous dependency checks
drivers/pps: aesthetic tweaks to PPS-related content
cpumask: make cpumask_next() out-of-line
kmod: move #ifdef CONFIG_MODULES wrapper to Makefile
kmod: split off umh headers into its own file
MAINTAINERS: clarify kmod is just a kernel module loader
kmod: split out umh code into its own file
test_kmod: flip INT checks to be consistent
test_kmod: remove paranoid UINT_MAX check on uint range processing
vfat: deduplicate hex2bin()
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 3 | ||||
| -rw-r--r-- | kernel/fork.c | 6 | ||||
| -rw-r--r-- | kernel/kcov.c | 1 | ||||
| -rw-r--r-- | kernel/kmod.c | 563 | ||||
| -rw-r--r-- | kernel/locking/rtmutex-debug.c | 2 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 35 | ||||
| -rw-r--r-- | kernel/locking/rtmutex_common.h | 12 | ||||
| -rw-r--r-- | kernel/memremap.c | 60 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 2 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 50 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 2 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 39 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 9 | ||||
| -rw-r--r-- | kernel/sched/topology.c | 2 | ||||
| -rw-r--r-- | kernel/smp.c | 2 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 2 | ||||
| -rw-r--r-- | kernel/umh.c | 568 |
19 files changed, 696 insertions, 666 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 9c323a6daa46..ed470aac53da 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,12 +5,13 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o user.o \ - signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ + signal.o sys.o umh.o workqueue.o pid.o task_work.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o +obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/fork.c b/kernel/fork.c index 24a4c0be80d5..6f1b0af00bda 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -37,6 +37,7 @@ #include <linux/binfmts.h> #include <linux/mman.h> #include <linux/mmu_notifier.h> +#include <linux/hmm.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/vmacache.h> @@ -824,6 +825,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); + hmm_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; @@ -903,6 +905,7 @@ void __mmdrop(struct mm_struct *mm) BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + hmm_mm_destroy(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); @@ -1459,8 +1462,7 @@ static void rt_mutex_init_task(struct task_struct *p) { raw_spin_lock_init(&p->pi_lock); #ifdef CONFIG_RT_MUTEXES - p->pi_waiters = RB_ROOT; - p->pi_waiters_leftmost = NULL; + p->pi_waiters = RB_ROOT_CACHED; p->pi_top_task = NULL; p->pi_blocked_on = NULL; #endif diff --git a/kernel/kcov.c b/kernel/kcov.c index cd771993f96f..3f693a0f6f3e 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -270,6 +270,7 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) static const struct file_operations kcov_fops = { .open = kcov_open, .unlocked_ioctl = kcov_ioctl, + .compat_ioctl = kcov_ioctl, .mmap = kcov_mmap, .release = kcov_close, }; diff --git a/kernel/kmod.c b/kernel/kmod.c index 2f37acde640b..bc6addd9152b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -1,23 +1,6 @@ /* - kmod, the new module loader (replaces kerneld) - Kirk Petersen - - Reorganized not to be a daemon by Adam Richter, with guidance - from Greg Zornetzer. - - Modified to avoid chroot and file sharing problems. - Mikael Pettersson - - Limit the concurrent number of kmod modprobes to catch loops from - "modprobe needs a service that is in a module". - Keith Owens <kaos@ocs.com.au> December 1999 - - Unblock all signals when we exec a usermode process. - Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000 - - call_usermodehelper wait flag, and remove exec_usermodehelper. - Rusty Russell <rusty@rustcorp.com.au> Jan 2003 -*/ + * kmod - the kernel module loader + */ #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/task.h> @@ -45,15 +28,6 @@ #include <trace/events/module.h> -#define CAP_BSET (void *)1 -#define CAP_PI (void *)2 - -static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; -static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; -static DEFINE_SPINLOCK(umh_sysctl_lock); -static DECLARE_RWSEM(umhelper_sem); - -#ifdef CONFIG_MODULES /* * Assuming: * @@ -202,536 +176,3 @@ int __request_module(bool wait, const char *fmt, ...) return ret; } EXPORT_SYMBOL(__request_module); - -#endif /* CONFIG_MODULES */ - -static void call_usermodehelper_freeinfo(struct subprocess_info *info) -{ - if (info->cleanup) - (*info->cleanup)(info); - kfree(info); -} - -static void umh_complete(struct subprocess_info *sub_info) -{ - struct completion *comp = xchg(&sub_info->complete, NULL); - /* - * See call_usermodehelper_exec(). If xchg() returns NULL - * we own sub_info, the UMH_KILLABLE caller has gone away - * or the caller used UMH_NO_WAIT. - */ - if (comp) - complete(comp); - else - call_usermodehelper_freeinfo(sub_info); -} - -/* - * This is the task which runs the usermode application - */ -static int call_usermodehelper_exec_async(void *data) -{ - struct subprocess_info *sub_info = data; - struct cred *new; - int retval; - - spin_lock_irq(¤t->sighand->siglock); - flush_signal_handlers(current, 1); - spin_unlock_irq(¤t->sighand->siglock); - - /* - * Our parent (unbound workqueue) runs with elevated scheduling - * priority. Avoid propagating that into the userspace child. - */ - set_user_nice(current, 0); - - retval = -ENOMEM; - new = prepare_kernel_cred(current); - if (!new) - goto out; - - spin_lock(&umh_sysctl_lock); - new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); - new->cap_inheritable = cap_intersect(usermodehelper_inheritable, - new->cap_inheritable); - spin_unlock(&umh_sysctl_lock); - - if (sub_info->init) { - retval = sub_info->init(sub_info, new); - if (retval) { - abort_creds(new); - goto out; - } - } - - commit_creds(new); - - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); -out: - sub_info->retval = retval; - /* - * call_usermodehelper_exec_sync() will call umh_complete - * if UHM_WAIT_PROC. - */ - if (!(sub_info->wait & UMH_WAIT_PROC)) - umh_complete(sub_info); - if (!retval) - return 0; - do_exit(0); -} - -/* Handles UMH_WAIT_PROC. */ -static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) -{ - pid_t pid; - - /* If SIGCLD is ignored sys_wait4 won't populate the status. */ - kernel_sigaction(SIGCHLD, SIG_DFL); - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); - if (pid < 0) { - sub_info->retval = pid; - } else { - int ret = -ECHILD; - /* - * Normally it is bogus to call wait4() from in-kernel because - * wait4() wants to write the exit code to a userspace address. - * But call_usermodehelper_exec_sync() always runs as kernel - * thread (workqueue) and put_user() to a kernel address works - * OK for kernel threads, due to their having an mm_segment_t - * which spans the entire address space. - * - * Thus the __user pointer cast is valid here. - */ - sys_wait4(pid, (int __user *)&ret, 0, NULL); - - /* - * If ret is 0, either call_usermodehelper_exec_async failed and - * the real error code is already in sub_info->retval or - * sub_info->retval is 0 anyway, so don't mess with it then. - */ - if (ret) - sub_info->retval = ret; - } - - /* Restore default kernel sig handler */ - kernel_sigaction(SIGCHLD, SIG_IGN); - - umh_complete(sub_info); -} - -/* - * We need to create the usermodehelper kernel thread from a task that is affine - * to an optimized set of CPUs (or nohz housekeeping ones) such that they - * inherit a widest affinity irrespective of call_usermodehelper() callers with - * possibly reduced affinity (eg: per-cpu workqueues). We don't want - * usermodehelper targets to contend a busy CPU. - * - * Unbound workqueues provide such wide affinity and allow to block on - * UMH_WAIT_PROC requests without blocking pending request (up to some limit). - * - * Besides, workqueues provide the privilege level that caller might not have - * to perform the usermodehelper request. - * - */ -static void call_usermodehelper_exec_work(struct work_struct *work) -{ - struct subprocess_info *sub_info = - container_of(work, struct subprocess_info, work); - - if (sub_info->wait & UMH_WAIT_PROC) { - call_usermodehelper_exec_sync(sub_info); - } else { - pid_t pid; - /* - * Use CLONE_PARENT to reparent it to kthreadd; we do not - * want to pollute current->children, and we need a parent - * that always ignores SIGCHLD to ensure auto-reaping. - */ - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, - CLONE_PARENT | SIGCHLD); - if (pid < 0) { - sub_info->retval = pid; - umh_complete(sub_info); - } - } -} - -/* - * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY - * (used for preventing user land processes from being created after the user - * land has been frozen during a system-wide hibernation or suspend operation). - * Should always be manipulated under umhelper_sem acquired for write. - */ -static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; - -/* Number of helpers running */ -static atomic_t running_helpers = ATOMIC_INIT(0); - -/* - * Wait queue head used by usermodehelper_disable() to wait for all running - * helpers to finish. - */ -static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); - -/* - * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled - * to become 'false'. - */ -static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); - -/* - * Time to wait for running_helpers to become zero before the setting of - * usermodehelper_disabled in usermodehelper_disable() fails - */ -#define RUNNING_HELPERS_TIMEOUT (5 * HZ) - -int usermodehelper_read_trylock(void) -{ - DEFINE_WAIT(wait); - int ret = 0; - - down_read(&umhelper_sem); - for (;;) { - prepare_to_wait(&usermodehelper_disabled_waitq, &wait, - TASK_INTERRUPTIBLE); - if (!usermodehelper_disabled) - break; - - if (usermodehelper_disabled == UMH_DISABLED) - ret = -EAGAIN; - - up_read(&umhelper_sem); - - if (ret) - break; - - schedule(); - try_to_freeze(); - - down_read(&umhelper_sem); - } - finish_wait(&usermodehelper_disabled_waitq, &wait); - return ret; -} -EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); - -long usermodehelper_read_lock_wait(long timeout) -{ - DEFINE_WAIT(wait); - - if (timeout < 0) - return -EINVAL; - - down_read(&umhelper_sem); - for (;;) { - prepare_to_wait(&usermodehelper_disabled_waitq, &wait, - TASK_UNINTERRUPTIBLE); - if (!usermodehelper_disabled) - break; - - up_read(&umhelper_sem); - - timeout = schedule_timeout(timeout); - if (!timeout) - break; - - down_read(&umhelper_sem); - } - finish_wait(&usermodehelper_disabled_waitq, &wait); - return timeout; -} -EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); - -void usermodehelper_read_unlock(void) -{ - up_read(&umhelper_sem); -} -EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); - -/** - * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. - * @depth: New value to assign to usermodehelper_disabled. - * - * Change the value of usermodehelper_disabled (under umhelper_sem locked for - * writing) and wakeup tasks waiting for it to change. - */ -void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) -{ - down_write(&umhelper_sem); - usermodehelper_disabled = depth; - wake_up(&usermodehelper_disabled_waitq); - up_write(&umhelper_sem); -} - -/** - * __usermodehelper_disable - Prevent new helpers from being started. - * @depth: New value to assign to usermodehelper_disabled. - * - * Set usermodehelper_disabled to @depth and wait for running helpers to exit. - */ -int __usermodehelper_disable(enum umh_disable_depth depth) -{ - long retval; - - if (!depth) - return -EINVAL; - - down_write(&umhelper_sem); - usermodehelper_disabled = depth; - up_write(&umhelper_sem); - - /* - * From now on call_usermodehelper_exec() won't start any new - * helpers, so it is sufficient if running_helpers turns out to - * be zero at one point (it may be increased later, but that - * doesn't matter). - */ - retval = wait_event_timeout(running_helpers_waitq, - atomic_read(&running_helpers) == 0, - RUNNING_HELPERS_TIMEOUT); - if (retval) - return 0; - - __usermodehelper_set_disable_depth(UMH_ENABLED); - return -EAGAIN; -} - -static void helper_lock(void) -{ - atomic_inc(&running_helpers); - smp_mb__after_atomic(); -} - -static void helper_unlock(void) -{ - if (atomic_dec_and_test(&running_helpers)) - wake_up(&running_helpers_waitq); -} - -/** - * call_usermodehelper_setup - prepare to call a usermode helper - * @path: path to usermode executable - * @argv: arg vector for process - * @envp: environment for process - * @gfp_mask: gfp mask for memory allocation - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * Returns either %NULL on allocation failure, or a subprocess_info - * structure. This should be passed to call_usermodehelper_exec to - * exec the process and free the structure. - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, - char **envp, gfp_t gfp_mask, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - struct subprocess_info *sub_info; - sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); - if (!sub_info) - goto out; - - INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); - -#ifdef CONFIG_STATIC_USERMODEHELPER - sub_info->path = CONFIG_STATIC_USERMODEHELPER_PATH; -#else - sub_info->path = path; -#endif - sub_info->argv = argv; - sub_info->envp = envp; - - sub_info->cleanup = cleanup; - sub_info->init = init; - sub_info->data = data; - out: - return sub_info; -} -EXPORT_SYMBOL(call_usermodehelper_setup); - -/** - * call_usermodehelper_exec - start a usermode application - * @sub_info: information about the subprocessa - * @wait: wait for the application to finish and return status. - * when UMH_NO_WAIT don't wait at all, but you get no useful error back - * when the program couldn't be exec'ed. This makes it safe to call - * from interrupt context. - * - * Runs a user-space application. The application is started - * asynchronously if wait is not set, and runs as a child of system workqueues. - * (ie. it runs with full root capabilities and optimized affinity). - */ -int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) -{ - DECLARE_COMPLETION_ONSTACK(done); - int retval = 0; - - if (!sub_info->path) { - call_usermodehelper_freeinfo(sub_info); - return -EINVAL; - } - helper_lock(); - if (usermodehelper_disabled) { - retval = -EBUSY; - goto out; - } - - /* - * If there is no binary for us to call, then just return and get out of - * here. This allows us to set STATIC_USERMODEHELPER_PATH to "" and - * disable all call_usermodehelper() calls. - */ - if (strlen(sub_info->path) == 0) - goto out; - - /* - * Set the completion pointer only if there is a waiter. - * This makes it possible to use umh_complete to free - * the data structure in case of UMH_NO_WAIT. - */ - sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; - sub_info->wait = wait; - - queue_work(system_unbound_wq, &sub_info->work); - if (wait == UMH_NO_WAIT) /* task has freed sub_info */ - goto unlock; - - if (wait & UMH_KILLABLE) { - retval = wait_for_completion_killable(&done); - if (!retval) - goto wait_done; - - /* umh_complete() will see NULL and free sub_info */ - if (xchg(&sub_info->complete, NULL)) - goto unlock; - /* fallthrough, umh_complete() was already called */ - } - - wait_for_completion(&done); -wait_done: - retval = sub_info->retval; -out: - call_usermodehelper_freeinfo(sub_info); -unlock: - helper_unlock(); - return retval; -} -EXPORT_SYMBOL(call_usermodehelper_exec); - -/** - * call_usermodehelper() - prepare and start a usermode application - * @path: path to usermode executable - * @argv: arg vector for process - * @envp: environment for process - * @wait: wait for the application to finish and return status. - * when UMH_NO_WAIT don't wait at all, but you get no useful error back - * when the program couldn't be exec'ed. This makes it safe to call - * from interrupt context. - * - * This function is the equivalent to use call_usermodehelper_setup() and - * call_usermodehelper_exec(). - */ -int call_usermodehelper(const char *path, char **argv, char **envp, int wait) -{ - struct subprocess_info *info; - gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - - info = call_usermodehelper_setup(path, argv, envp, gfp_mask, - NULL, NULL, NULL); - if (info == NULL) - return -ENOMEM; - - return call_usermodehelper_exec(info, wait); -} -EXPORT_SYMBOL(call_usermodehelper); - -static int proc_cap_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table t; - unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; - kernel_cap_t new_cap; - int err, i; - - if (write && (!capable(CAP_SETPCAP) || - !capable(CAP_SYS_MODULE))) - return -EPERM; - - /* - * convert from the global kernel_cap_t to the ulong array to print to - * userspace if this is a read. - */ - spin_lock(&umh_sysctl_lock); - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { - if (table->data == CAP_BSET) - cap_array[i] = usermodehelper_bset.cap[i]; - else if (table->data == CAP_PI) - cap_array[i] = usermodehelper_inheritable.cap[i]; - else - BUG(); - } - spin_unlock(&umh_sysctl_lock); - - t = *table; - t.data = &cap_array; - - /* - * actually read or write and array of ulongs from userspace. Remember - * these are least significant 32 bits first - */ - err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); - if (err < 0) - return err; - - /* - * convert from the sysctl array of ulongs to the kernel_cap_t - * internal representation - */ - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) - new_cap.cap[i] = cap_array[i]; - - /* - * Drop everything not in the new_cap (but don't add things) - */ - spin_lock(&umh_sysctl_lock); - if (write) { - if (table->data == CAP_BSET) - usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); - if (table->data == CAP_PI) - usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); - } - spin_unlock(&umh_sysctl_lock); - - return 0; -} - -struct ctl_table usermodehelper_table[] = { - { - .procname = "bset", - .data = CAP_BSET, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), - .mode = 0600, - .proc_handler = proc_cap_handler, - }, - { - .procname = "inheritable", - .data = CAP_PI, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), - .mode = 0600, - .proc_handler = proc_cap_handler, - }, - { } -}; diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index ac35e648b0e5..f4a74e78d467 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -58,7 +58,7 @@ static void printk_lock(struct rt_mutex *lock, int print_owner) void rt_mutex_debug_task_free(struct task_struct *task) { - DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters)); + DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 649dc9d3951a..6f3dba6e4e9e 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -271,10 +271,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left, static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &lock->waiters.rb_node; + struct rb_node **link = &lock->waiters.rb_root.rb_node; struct rb_node *parent = NULL; struct rt_mutex_waiter *entry; - int leftmost = 1; + bool leftmost = true; while (*link) { parent = *link; @@ -283,15 +283,12 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) - lock->waiters_leftmost = &waiter->tree_entry; - rb_link_node(&waiter->tree_entry, parent, link); - rb_insert_color(&waiter->tree_entry, &lock->waiters); + rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost); } static void @@ -300,20 +297,17 @@ rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) if (RB_EMPTY_NODE(&waiter->tree_entry)) return; - if (lock->waiters_leftmost == &waiter->tree_entry) - lock->waiters_leftmost = rb_next(&waiter->tree_entry); - - rb_erase(&waiter->tree_entry, &lock->waiters); + rb_erase_cached(&waiter->tree_entry, &lock->waiters); RB_CLEAR_NODE(&waiter->tree_entry); } static void rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &task->pi_waiters.rb_node; + struct rb_node **link = &task->pi_waiters.rb_root.rb_node; struct rb_node *parent = NULL; struct rt_mutex_waiter *entry; - int leftmost = 1; + bool leftmost = true; while (*link) { parent = *link; @@ -322,15 +316,12 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) - task->pi_waiters_leftmost = &waiter->pi_tree_entry; - rb_link_node(&waiter->pi_tree_entry, parent, link); - rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); + rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost); } static void @@ -339,10 +330,7 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) return; - if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) - task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); - - rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); + rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); RB_CLEAR_NODE(&waiter->pi_tree_entry); } @@ -1657,8 +1645,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name, { lock->owner = NULL; raw_spin_lock_init(&lock->wait_lock); - lock->waiters = RB_ROOT; - lock->waiters_leftmost = NULL; + lock->waiters = RB_ROOT_CACHED; if (name && key) debug_rt_mutex_init(lock, name, key); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 8d039b928d61..7453be0485a5 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -45,7 +45,7 @@ struct rt_mutex_waiter { static inline int rt_mutex_has_waiters(struct rt_mutex *lock) { - return !RB_EMPTY_ROOT(&lock->waiters); + return !RB_EMPTY_ROOT(&lock->waiters.rb_root); } static inline struct rt_mutex_waiter * @@ -53,8 +53,8 @@ rt_mutex_top_waiter(struct rt_mutex *lock) { struct rt_mutex_waiter *w; - w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter, - tree_entry); + w = rb_entry(lock->waiters.rb_leftmost, + struct rt_mutex_waiter, tree_entry); BUG_ON(w->lock != lock); return w; @@ -62,14 +62,14 @@ rt_mutex_top_waiter(struct rt_mutex *lock) static inline int task_has_pi_waiters(struct task_struct *p) { - return !RB_EMPTY_ROOT(&p->pi_waiters); + return !RB_EMPTY_ROOT(&p->pi_waiters.rb_root); } static inline struct rt_mutex_waiter * task_top_pi_waiter(struct task_struct *p) { - return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter, - pi_tree_entry); + return rb_entry(p->pi_waiters.rb_leftmost, + struct rt_mutex_waiter, pi_tree_entry); } #else diff --git a/kernel/memremap.c b/kernel/memremap.c index 066e73c2fcc9..6bcbfbf1a8fd 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -11,13 +11,14 @@ * General Public License for more details. */ #include <linux/radix-tree.h> -#include <linux/memremap.h> #include <linux/device.h> #include <linux/types.h> #include <linux/pfn_t.h> #include <linux/io.h> #include <linux/mm.h> #include <linux/memory_hotplug.h> +#include <linux/swap.h> +#include <linux/swapops.h> #ifndef ioremap_cache /* temporary while we convert existing ioremap_cache users to memremap */ @@ -219,6 +220,34 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff) for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \ pgoff += 1UL << order, order = order_at((res), pgoff)) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp) +{ + struct page *page = device_private_entry_to_page(entry); + + /* + * The page_fault() callback must migrate page back to system memory + * so that CPU can access it. This might fail for various reasons + * (device issue, device was unsafely unplugged, ...). When such + * error conditions happen, the callback must return VM_FAULT_SIGBUS. + * + * Note that because memory cgroup charges are accounted to the device + * memory, this should never fail because of memory restrictions (but + * allocation of regular system page might still fail because we are + * out of memory). + * + * There is a more in-depth description of what that callback can and + * cannot do, in include/linux/memremap.h + */ + return page->pgmap->page_fault(vma, addr, page, flags, pmdp); +} +EXPORT_SYMBOL(device_private_entry_fault); +#endif /* CONFIG_DEVICE_PRIVATE */ + static void pgmap_radix_release(struct resource *res) { unsigned long pgoff, order; @@ -356,6 +385,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, } pgmap->ref = ref; pgmap->res = &page_map->res; + pgmap->type = MEMORY_DEVICE_HOST; + pgmap->page_fault = NULL; + pgmap->page_free = NULL; + pgmap->data = NULL; mutex_lock(&pgmap_lock); error = 0; @@ -466,3 +499,28 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) return pgmap ? pgmap->altmap : NULL; } #endif /* CONFIG_ZONE_DEVICE */ + + +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page) +{ + int count = page_ref_dec_return(page); + + /* + * If refcount is 1 then page is freed and refcount is stable as nobody + * holds a reference on the page. + */ + if (count == 1) { + /* Clear Active bit in case of parallel mark_page_accessed */ + __ClearPageActive(page); + __ClearPageWaiters(page); + + page->mapping = NULL; + mem_cgroup_uncharge(page); + + page->pgmap->page_free(page, page->pgmap->data); + } else if (!count) + __put_page(page); +} +EXPORT_SYMBOL(put_zone_device_private_or_public_page); +#en |
