diff options
33 files changed, 351 insertions, 139 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 5a9238a2883c..467251f7fef6 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2129,6 +2129,8 @@ Some of the relevant points of interest are as follows: <li> <a href="#Hotplug CPU">Hotplug CPU</a>. <li> <a href="#Scheduler and RCU">Scheduler and RCU</a>. <li> <a href="#Tracing and RCU">Tracing and RCU</a>. +<li> <a href="#Accesses to User Memory and RCU"> +Accesses to User Memory and RCU</a>. <li> <a href="#Energy Efficiency">Energy Efficiency</a>. <li> <a href="#Scheduling-Clock Interrupts and RCU"> Scheduling-Clock Interrupts and RCU</a>. @@ -2512,7 +2514,7 @@ disabled across the entire RCU read-side critical section. <p> It is possible to use tracing on RCU code, but tracing itself uses RCU. -For this reason, <tt>rcu_dereference_raw_notrace()</tt> +For this reason, <tt>rcu_dereference_raw_check()</tt> is provided for use by tracing, which avoids the destructive recursion that could otherwise ensue. This API is also used by virtualization in some architectures, @@ -2521,6 +2523,75 @@ cannot be used. The tracing folks both located the requirement and provided the needed fix, so this surprise requirement was relatively painless. +<h3><a name="Accesses to User Memory and RCU"> +Accesses to User Memory and RCU</a></h3> + +<p> +The kernel needs to access user-space memory, for example, to access +data referenced by system-call parameters. +The <tt>get_user()</tt> macro does this job. + +<p> +However, user-space memory might well be paged out, which means +that <tt>get_user()</tt> might well page-fault and thus block while +waiting for the resulting I/O to complete. +It would be a very bad thing for the compiler to reorder +a <tt>get_user()</tt> invocation into an RCU read-side critical +section. +For example, suppose that the source code looked like this: + +<blockquote> +<pre> + 1 rcu_read_lock(); + 2 p = rcu_dereference(gp); + 3 v = p->value; + 4 rcu_read_unlock(); + 5 get_user(user_v, user_p); + 6 do_something_with(v, user_v); +</pre> +</blockquote> + +<p> +The compiler must not be permitted to transform this source code into +the following: + +<blockquote> +<pre> + 1 rcu_read_lock(); + 2 p = rcu_dereference(gp); + 3 get_user(user_v, user_p); // BUG: POSSIBLE PAGE FAULT!!! + 4 v = p->value; + 5 rcu_read_unlock(); + 6 do_something_with(v, user_v); +</pre> +</blockquote> + +<p> +If the compiler did make this transformation in a +<tt>CONFIG_PREEMPT=n</tt> kernel build, and if <tt>get_user()</tt> did +page fault, the result would be a quiescent state in the middle +of an RCU read-side critical section. +This misplaced quiescent state could result in line 4 being +a use-after-free access, which could be bad for your kernel's +actuarial statistics. +Similar examples can be constructed with the call to <tt>get_user()</tt> +preceding the <tt>rcu_read_lock()</tt>. + +<p> +Unfortunately, <tt>get_user()</tt> doesn't have any particular +ordering properties, and in some architectures the underlying <tt>asm</tt> +isn't even marked <tt>volatile</tt>. +And even if it was marked <tt>volatile</tt>, the above access to +<tt>p->value</tt> is not volatile, so the compiler would not have any +reason to keep those two accesses in order. + +<p> +Therefore, the Linux-kernel definitions of <tt>rcu_read_lock()</tt> +and <tt>rcu_read_unlock()</tt> must act as compiler barriers, +at least for outermost instances of <tt>rcu_read_lock()</tt> and +<tt>rcu_read_unlock()</tt> within a nested set of RCU read-side critical +sections. + <h3><a name="Energy Efficiency">Energy Efficiency</a></h3> <p> diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 13e88fc00f01..f48f4621ccbc 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -57,6 +57,12 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that CONFIG_PREEMPT_RCU case, you might see stall-warning messages. + You can use the rcutree.kthread_prio kernel boot parameter to + increase the scheduling priority of RCU's kthreads, which can + help avoid this problem. However, please note that doing this + can increase your system's context-switch rate and thus degrade + performance. + o A periodic interrupt whose handler takes longer than the time interval between successive pairs of interrupts. This can prevent RCU's kthreads and softirq handlers from running. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7ccd158b3894..f3fcd6140ee1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4047,6 +4047,10 @@ rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_cpu_stall_ftrace_dump= [KNL] + Dump ftrace buffer after reporting RCU CPU + stall warning. + rcupdate.rcu_cpu_stall_suppress= [KNL] Suppress RCU CPU stall warning messages. diff --git a/MAINTAINERS b/MAINTAINERS index 6426db5198f0..527317026492 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9326,7 +9326,7 @@ F: drivers/misc/lkdtm/* LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM) M: Alan Stern <stern@rowland.harvard.edu> -M: Andrea Parri <andrea.parri@amarulasolutions.com> +M: Andrea Parri <parri.andrea@gmail.com> M: Will Deacon <will@kernel.org> M: Peter Zijlstra <peterz@infradead.org> M: Boqun Feng <boqun.feng@gmail.com> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index aab8ba40ce38..4b0bab2607e4 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -264,15 +264,13 @@ int __cpu_disable(void) return 0; } -static DECLARE_COMPLETION(cpu_died); - /* * called on the thread which is asking for a CPU to be shutdown - * waits until shutdown has completed, or it is timed out. */ void __cpu_die(unsigned int cpu) { - if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + if (!cpu_wait_death(cpu, 5)) { pr_err("CPU%u: cpu didn't die\n", cpu); return; } @@ -319,7 +317,7 @@ void arch_cpu_idle_dead(void) * this returns, power and/or clocks can be removed at any point * from this CPU and its cache by platform_cpu_kill(). */ - complete(&cpu_died); + (void)cpu_report_death(); /* * Ensure that the cache lines associated with that completion are diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index bb7c8cc77f1a..04b2b927bb5a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm, */ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) { - return rcu_dereference_raw_notrace(kvm->memslots[0]); + return rcu_dereference_raw_check(kvm->memslots[0]); } extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 7389db538c30..6fa42e9c4e6f 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -29,6 +29,7 @@ static bool pci_mmcfg_running_state; static bool pci_mmcfg_arch_init_failed; static DEFINE_MUTEX(pci_mmcfg_lock); +#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map) LIST_HEAD(pci_mmcfg_list); @@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new) struct pci_mmcfg_region *cfg; /* keep list sorted by segment and starting bus number */ - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) { + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) { if (cfg->segment > new->segment || (cfg->segment == new->segment && cfg->start_bus >= new->start_bus)) { @@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) { struct pci_mmcfg_region *cfg; - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) if (cfg->segment == segment && cfg->start_bus <= bus && bus <= cfg->end_bus) return cfg; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9c0edf2fc0dd..2f9d0d20b836 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/highmem.h> +#include <linux/lockdep.h> #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/kmod.h> @@ -80,6 +81,7 @@ struct acpi_ioremap { static LIST_HEAD(acpi_ioremaps); static DEFINE_MUTEX(acpi_ioremap_lock); +#define acpi_ioremap_lock_held() lock_is_held(&acpi_ioremap_lock.dep_map) static void __init acpi_request_region (struct acpi_generic_address *gas, unsigned int length, char *desc) @@ -206,7 +208,7 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, &acpi_ioremaps, list) + list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held()) if (map->phys <= phys && phys + size <= map->phys + map->size) return map; @@ -249,7 +251,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, &acpi_ioremaps, list) + list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held()) if (map->virt <= virt && virt + size <= map->virt + map->size) return map; diff --git a/drivers/base/base.h b/drivers/base/base.h index b405436ee28e..0d32544b6f91 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -165,6 +165,7 @@ static inline int devtmpfs_init(void) { return 0; } /* Device links support */ extern int device_links_read_lock(void); extern void device_links_read_unlock(int idx); +extern int device_links_read_lock_held(void); extern int device_links_check_suppliers(struct device *dev); extern void device_links_driver_bound(struct device *dev); extern void device_links_driver_cleanup(struct device *dev); diff --git a/drivers/base/core.c b/drivers/base/core.c index 636058bbf48a..eede79630ceb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -68,6 +68,11 @@ void device_links_read_unlock(int idx) { srcu_read_unlock(&device_links_srcu, idx); } + +int device_links_read_lock_held(void) +{ + return srcu_read_lock_held(&device_links_srcu); +} #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); @@ -91,6 +96,13 @@ void device_links_read_unlock(int not_used) { up_read(&device_links_lock); } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int device_links_read_lock_held(void) +{ + return lockdep_is_held(&device_links_lock); +} +#endif #endif /* !CONFIG_SRCU */ /** diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b75335508d2c..50def99df970 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -287,7 +287,8 @@ static int rpm_get_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) { int retval; if (!(link->flags & DL_FLAG_PM_RUNTIME) || @@ -309,7 +310,8 @@ static void rpm_put_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) { if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) continue; @@ -1640,7 +1642,8 @@ void pm_runtime_clean_up_links(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.consumers, s_node) { + list_for_each_entry_rcu(link, &dev->links.consumers, s_node, + device_links_read_lock_held()) { if (link->flags & DL_FLAG_STATELESS) continue; @@ -1662,7 +1665,8 @@ void pm_runtime_get_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) if (link->flags & DL_FLAG_PM_RUNTIME) { link->supplier_preactivated = true; refcount_inc(&link->rpm_active); @@ -1683,7 +1687,8 @@ void pm_runtime_put_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) if (link->supplier_preactivated) { link->supplier_preactivated = false; if (refcount_dec_not_one(&link->rpm_active)) diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 9b83865d24f9..0027d4c8087c 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -31,9 +31,7 @@ struct rcu_sync { */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - !rcu_read_lock_bh_held() && - !rcu_read_lock_sched_held(), + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), "suspicious rcu_sync_is_idle() usage"); return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e91ec9ddcd30..4158b7212936 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -41,6 +41,24 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) /* + * Check during list traversal that we are within an RCU reader + */ + +#define check_arg_count_one(dummy) + +#ifdef CONFIG_PROVE_RCU_LIST +#define __list_check_rcu(dummy, cond, extra...) \ + ({ \ + check_arg_count_one(extra); \ + RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \ + "RCU-list traversed in non-reader section!"); \ + }) +#else +#define __list_check_rcu(dummy, cond, extra...) \ + ({ check_arg_count_one(extra); }) +#endif + +/* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know @@ -343,14 +361,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ +#define list_for_each_entry_rcu(pos, head, member, cond...) \ + for (__list_check_rcu(dummy, ## cond, 0), \ + pos = list_entry_rcu((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** @@ -616,13 +636,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(pos, head, member) \ - for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ +#define hlist_for_each_entry_rcu(pos, head, member, cond...) \ + for (__list_check_rcu(dummy, ## cond, 0), \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ @@ -642,10 +664,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * not do any RCU debugging or tracing. */ #define hlist_for_each_entry_rcu_notrace(pos, head, member) \ - for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\ + for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\ + pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f7167478c1d..80d6056f5855 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -221,6 +221,7 @@ int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); int rcu_read_lock_sched_held(void); +int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -241,6 +242,12 @@ static inline int rcu_read_lock_sched_held(void) { return !preemptible(); } + +static inline int rcu_read_lock_any_held(void) +{ + return !preemptible(); +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU @@ -476,7 +483,7 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4861cf8e274b..4aca3f4379d2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -620,7 +620,7 @@ static void print_lock(struct held_lock *hlock) return; } - printk(KERN_CONT "%p", hlock->instance); + printk(KERN_CONT "%px", hlock->instance); print_lock_name(lock); printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); } diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 5ec3ea4028e2..4aa02eee8f6c 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -8,6 +8,17 @@ menu "RCU Debugging" config PROVE_RCU def_bool PROVE_LOCKING +config PROVE_RCU_LIST + bool "RCU list lockdep debugging" + depends on PROVE_RCU && RCU_EXPERT + default n + help + Enable RCU lockdep checking for list usages. By default it is + turned off since there are several list RCU users that still + need to be converted to pass a lockdep expression. To prevent + false-positive splats, we keep it default disabled but once all + users are converted, we can remove this config option. + config TORTURE_TEST tristate default n diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 5290b01de534..8fd4f82c9b3d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -227,6 +227,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) #ifdef CONFIG_RCU_STALL_COMMON +extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_timeout; int rcu_jiffies_till_stall_check(void); diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 71b64648464e..822a39da0533 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -76,27 +76,6 @@ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) return !*rsclp->tails[seg]; } -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) -{ - return rsclp->head; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); - return rsclp->tails[RCU_NEXT_TAIL]; -} - void rcu_segcblist_init(struct rcu_segcblist *rsclp); void rcu_segcblist_disable(struct rcu_segcblist *rsclp); bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 7a6890b23c5f..5a879d073c1c 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -89,7 +89,7 @@ torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); -MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)"); +MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)"); static int nrealreaders; static int nrealwriters; @@ -375,6 +375,14 @@ rcu_perf_writer(void *arg) if (holdoff) schedule_timeout_uninterruptible(holdoff * HZ); + /* + * Wait until rcu_end_inkernel_boot() is called for normal GP tests + * so that RCU is not always expedited for normal GP tests. + * The system_state test is approximate, but works well in practice. + */ + while (!gp_exp && system_state != SYSTEM_RUNNING) + schedule_timeout_uninterruptible(1); + t = ktime_get_mono_fast_ns(); if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) { t_rcu_perf_writer_started = t; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index fce4e7e6f502..b22947324423 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -161,6 +161,7 @@ static atomic_long_t n_rcu_torture_timers; static long n_barrier_attempts; static long n_barrier_successes; /* did rcu_barrier test succeed? */ static struct list_head rcu_torture_removed; +static unsigned long shutdown_jiffies; static int rcu_torture_writer_state; #define RTWS_FIXED_DELAY 0 @@ -228,6 +229,15 @@ static u64 notrace rcu_trace_clock_local(void) } #endif /* #else #ifdef CONFIG_RCU_TRACE */ +/* + * Stop aggressive CPU-hog tests a bit before the end of the test in order + * to avoid interfering with test shutdown. + */ +static bool shutdown_time_arrived(void) +{ + return shutdown_secs && time_after(jiffies, shutdown_jiffies - 30 * HZ); +} + static unsigned long boost_starttime; /* jiffies of next boost test start. */ static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ @@ -1713,12 +1723,14 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) } // Give the scheduler a chance, even on nohz_full CPUs. -static void rcu_torture_fwd_prog_cond_resched(void) +static void rcu_torture_fwd_prog_cond_resched(unsigned long iter) { if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) { - if (need_resched()) + // Real call_rcu() floods hit userspace, so emulate that. + if (need_resched() || (iter & 0xfff)) schedule(); } else { + // No userspace emulation: CB invocation throttles call_rcu() cond_resched(); } } @@ -1746,7 +1758,7 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) spin_unlock_irqrestore(&rcu_fwd_lock, flags); kfree(rfcp); freed++; - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(freed); } return freed; } @@ -1785,15 +1797,17 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) WRITE_ONCE(rcu_fwd_startat, jiffies); stopat = rcu_fwd_startat + dur; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { idx = cur_ops->readlock(); udelay(10); cur_ops->readunlock(idx); if (!fwd_progress_need_resched || need_resched()) - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(1); } (*tested_tries)++; if (!time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { (*tested)++; cver = READ_ONCE(rcu_torture_current_version) - cver; @@ -1852,6 +1866,7 @@ static void rcu_torture_fwd_prog_cr(void) gps = cur_ops->get_gp_seq(); rcu_launder_gp_seq_start = gps; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { rfcp = READ_ONCE(rcu_fwd_cb_head); rfcpn = NULL; @@ -1875,7 +1890,7 @@ static void rcu_torture_fwd_prog_cr(void) rfcp->rfc_gps = 0; } cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); } stoppedat = jiffies; n_launders_cb_snap = READ_ONCE(n_launders_cb); @@ -1884,7 +1899,8 @@ static void rcu_torture_fwd_prog_cr(void) cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ (void)rcu_torture_fwd_prog_cbfree(); - if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) { + if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop) && + !shutdown_time_arrived()) { WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", __func__, @@ -2465,6 +2481,7 @@ rcu_torture_init(void) goto unwind; rcutor_hp = firsterr; } + shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); if (firsterr) goto unwind; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index cf0e886314f2..5dffade2d7cd 1 |
