diff options
57 files changed, 1932 insertions, 1424 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index fc103d7a0474..cdb20d41a44a 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -310,6 +310,12 @@ over a rather long period of time, but improvements are always welcome! code under the influence of preempt_disable(), you instead need to use synchronize_irq() or synchronize_sched(). + This same limitation also applies to synchronize_rcu_bh() + and synchronize_srcu(), as well as to the asynchronous and + expedited forms of the three primitives, namely call_rcu(), + call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(), + synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited(). + 12. Any lock acquired by an RCU callback must be acquired elsewhere with softirq disabled, e.g., via spin_lock_irqsave(), spin_lock_bh(), etc. Failing to disable irq on a given diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 523364e4e1f1..1927151b386b 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -99,7 +99,7 @@ In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is printed: INFO: rcu_preempt detected stall on CPU - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1 + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending (t=65000 jiffies) The "(64628 ticks this GP)" indicates that this CPU has taken more @@ -116,13 +116,13 @@ number between the two "/"s is the value of the nesting, which will be a small positive number if in the idle loop and a very large positive number (as shown above) otherwise. -For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the -CPU is not in the process of trying to force itself into dyntick-idle -state, the "." indicates that the CPU has not given up forcing RCU -into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1" -indicates that the CPU has not recented forced RCU into dyntick-idle -mode (it would otherwise indicate the number of microseconds remaining -in this forced state). +For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is +not in the process of trying to force itself into dyntick-idle state, the +"." indicates that the CPU has not given up forcing RCU into dyntick-idle +mode (it would be "H" otherwise), and the "timer not pending" indicates +that the CPU has not recently forced RCU into dyntick-idle mode (it +would otherwise indicate the number of microseconds remaining in this +forced state). Multiple Warnings From One Stall diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index f6f15ce39903..672d19083252 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -333,23 +333,23 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The output of "cat rcu/rcu_pending" looks as follows: rcu_sched: - 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 - 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 - 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 - 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 - 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 - 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 - 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 - 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 + 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nn=146741 + 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nn=155792 + 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nn=136629 + 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nn=137723 + 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nn=123110 + 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nn=137456 + 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nn=120834 + 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nn=144888 rcu_bh: - 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 - 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 - 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 - 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 - 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 - 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 - 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 - 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nn=145314 + 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nn=143180 + 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nn=117936 + 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nn=134863 + 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nn=110671 + 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nn=133235 + 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nn=110921 + 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nn=118542 As always, this is once again split into "rcu_sched" and "rcu_bh" portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional @@ -377,17 +377,6 @@ o "gpc" is the number of times that an old grace period had o "gps" is the number of times that a new grace period had started, but this CPU was not yet aware of it. -o "nf" is the number of times that this CPU suspected that the - current grace period had run for too long, and thus needed to - be forced. - - Please note that "forcing" consists of sending resched IPIs - to holdout CPUs. If that CPU really still is in an old RCU - read-side critical section, then we really do have to wait for it. - The assumption behing "forcing" is that the CPU is not still in - an old RCU read-side critical section, but has not yet responded - for some other reason. - o "nn" is the number of times that this CPU needed nothing. Alert readers will note that the rcu "nn" number for a given CPU very closely matches the rcu_bh "np" number for that same CPU. This diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 69ee188515e7..bf0f6de2aa00 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -873,7 +873,7 @@ d. Do you need to treat NMI handlers, hardirq handlers, and code segments with preemption disabled (whether via preempt_disable(), local_irq_save(), local_bh_disable(), or some other mechanism) as if they were explicit RCU readers? - If so, you need RCU-sched. + If so, RCU-sched is the only choice that will work for you. e. Do you need RCU grace periods to complete even in the face of softirq monopolization of one or more of the CPUs? For @@ -884,7 +884,12 @@ f. Is your workload too update-intensive for normal use of RCU, but inappropriate for other synchronization mechanisms? If so, consider SLAB_DESTROY_BY_RCU. But please be careful! -g. Otherwise, use RCU. +g. Do you need read-side critical sections that are respected + even though they are in the middle of the idle loop, during + user-mode execution, or on an offlined CPU? If so, SRCU is the + only choice that will work for you. + +h. Otherwise, use RCU. Of course, this all assumes that you have determined that RCU is in fact the right tool for your job. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c1..55ada0471f93 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2385,6 +2385,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc5..1401a7587973 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -281,4 +281,14 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_RCU_USER_QS + bool + help + Provide kernel entry/exit hooks necessary for userspace + RCU extended quiescent state. Syscalls need to be wrapped inside + rcu_user_exit()-rcu_user_enter() through the slow path using + TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs + are already protected inside rcu_irq_enter/rcu_irq_exit() but + preemption or signal handling on irq exit still need to be protected. + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index d6fde98b74b3..83638aa096d5 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tty.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/reg.h> #include <asm/uaccess.h> @@ -54,9 +55,12 @@ cpu_idle(void) /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ + rcu_idle_enter(); while (!need_resched()) cpu_relax(); - schedule(); + + rcu_idle_exit(); + schedule_preempt_disabled(); } } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 35ddc02bfa4a..a41ad90a97a6 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,6 +166,7 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); + preempt_disable(); /* Do nothing. */ cpu_idle(); } diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 66fd01728790..7f65be6f7f17 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/elfcore.h> #include <linux/mqueue.h> #include <linux/reboot.h> +#include <linux/rcupdate.h> //#define DEBUG @@ -74,6 +75,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); /* @@ -86,6 +88,7 @@ void cpu_idle (void) idle = default_idle; idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index ff95f50efea5..2eb7fa5bf9d8 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/pagemap.h> +#include <linux/rcupdate.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> @@ -69,12 +70,14 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); if (!frv_dma_inprogress && idle) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0e9c315be104..f153ed1a4c08 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/reboot.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -78,8 +79,10 @@ void (*idle)(void) = default_idle; void cpu_idle(void) { while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index dd6fc1449741..3e316ec0b835 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/kdebug.h> #include <linux/utsname.h> #include <linux/tracehook.h> +#include <linux/rcupdate.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -279,6 +280,7 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); if (can_do_pal_halt) { current_thread_info()->status &= ~TS_POLLING; /* @@ -309,6 +311,7 @@ cpu_idle (void) normal_xtp(); #endif } + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); if (cpu_is_offline(cpu)) diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 3a4a32b27208..384e63f3a4c4 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/hardirq.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -82,6 +83,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void) = pm_idle; @@ -90,6 +92,7 @@ void cpu_idle (void) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index c488e3cfab53..ac2892e49c7c 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/init_task.h> #include <linux/mqueue.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -75,8 +76,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 7dab0cd36466..e9cceba193b6 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -107,6 +108,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ for (;;) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); @@ -121,6 +123,7 @@ void cpu_idle(void) } idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 2c05a9292a81..8c6b6b6561f0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -48,6 +48,7 @@ #include <linux/unistd.h> #include <linux/kallsyms.h> #include <linux/uaccess.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/asm-offsets.h> @@ -69,8 +70,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); } diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 2707023c7563..637970cfd3f4 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -27,6 +27,7 @@ #include <linux/reboot.h> #include <linux/elfcore.h> #include <linux/pm.h> +#include <linux/rcupdate.h> void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -50,9 +51,10 @@ void __noreturn cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); - + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb4..c17de0db6736 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch(from, to); switch_to(from, to, from); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 50a1d1f9b6d3..20c49b8450b8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -97,6 +97,7 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS || UPROBES) diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 000000000000..d1ac07a23979 --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007ec..c535d847e3b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,6 +89,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +128,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +144,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323f..60c78917190c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } |
