diff options
-rw-r--r-- | Documentation/RCU/Design/Requirements/Requirements.html | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 2 | ||||
-rw-r--r-- | arch/x86/pci/mmconfig-shared.c | 5 | ||||
-rw-r--r-- | drivers/acpi/osl.c | 6 | ||||
-rw-r--r-- | drivers/base/base.h | 1 | ||||
-rw-r--r-- | drivers/base/core.c | 12 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 15 | ||||
-rw-r--r-- | include/linux/rcu_sync.h | 4 | ||||
-rw-r--r-- | include/linux/rculist.h | 36 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 9 | ||||
-rw-r--r-- | kernel/rcu/Kconfig.debug | 11 | ||||
-rw-r--r-- | kernel/rcu/rcuperf.c | 10 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 29 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 21 | ||||
-rw-r--r-- | kernel/rcu/update.c | 100 | ||||
-rw-r--r-- | kernel/torture.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace_internal.h | 8 | ||||
-rw-r--r-- | kernel/trace/trace.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 3 | ||||
-rwxr-xr-x | tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 2 | ||||
-rw-r--r-- | tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 1 |
21 files changed, 190 insertions, 93 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index f04c467e55c5..467251f7fef6 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2514,7 +2514,7 @@ disabled across the entire RCU read-side critical section. <p> It is possible to use tracing on RCU code, but tracing itself uses RCU. -For this reason, <tt>rcu_dereference_raw_notrace()</tt> +For this reason, <tt>rcu_dereference_raw_check()</tt> is provided for use by tracing, which avoids the destructive recursion that could otherwise ensue. This API is also used by virtualization in some architectures, diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index bb7c8cc77f1a..04b2b927bb5a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm, */ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) { - return rcu_dereference_raw_notrace(kvm->memslots[0]); + return rcu_dereference_raw_check(kvm->memslots[0]); } extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 7389db538c30..6fa42e9c4e6f 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -29,6 +29,7 @@ static bool pci_mmcfg_running_state; static bool pci_mmcfg_arch_init_failed; static DEFINE_MUTEX(pci_mmcfg_lock); +#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map) LIST_HEAD(pci_mmcfg_list); @@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new) struct pci_mmcfg_region *cfg; /* keep list sorted by segment and starting bus number */ - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) { + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) { if (cfg->segment > new->segment || (cfg->segment == new->segment && cfg->start_bus >= new->start_bus)) { @@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) { struct pci_mmcfg_region *cfg; - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) if (cfg->segment == segment && cfg->start_bus <= bus && bus <= cfg->end_bus) return cfg; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9c0edf2fc0dd..2f9d0d20b836 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/highmem.h> +#include <linux/lockdep.h> #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/kmod.h> @@ -80,6 +81,7 @@ struct acpi_ioremap { static LIST_HEAD(acpi_ioremaps); static DEFINE_MUTEX(acpi_ioremap_lock); +#define acpi_ioremap_lock_held() lock_is_held(&acpi_ioremap_lock.dep_map) static void __init acpi_request_region (struct acpi_generic_address *gas, unsigned int length, char *desc) @@ -206,7 +208,7 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, &acpi_ioremaps, list) + list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held()) if (map->phys <= phys && phys + size <= map->phys + map->size) return map; @@ -249,7 +251,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, &acpi_ioremaps, list) + list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held()) if (map->virt <= virt && virt + size <= map->virt + map->size) return map; diff --git a/drivers/base/base.h b/drivers/base/base.h index b405436ee28e..0d32544b6f91 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -165,6 +165,7 @@ static inline int devtmpfs_init(void) { return 0; } /* Device links support */ extern int device_links_read_lock(void); extern void device_links_read_unlock(int idx); +extern int device_links_read_lock_held(void); extern int device_links_check_suppliers(struct device *dev); extern void device_links_driver_bound(struct device *dev); extern void device_links_driver_cleanup(struct device *dev); diff --git a/drivers/base/core.c b/drivers/base/core.c index 636058bbf48a..eede79630ceb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -68,6 +68,11 @@ void device_links_read_unlock(int idx) { srcu_read_unlock(&device_links_srcu, idx); } + +int device_links_read_lock_held(void) +{ + return srcu_read_lock_held(&device_links_srcu); +} #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); @@ -91,6 +96,13 @@ void device_links_read_unlock(int not_used) { up_read(&device_links_lock); } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int device_links_read_lock_held(void) +{ + return lockdep_is_held(&device_links_lock); +} +#endif #endif /* !CONFIG_SRCU */ /** diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b75335508d2c..50def99df970 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -287,7 +287,8 @@ static int rpm_get_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) { int retval; if (!(link->flags & DL_FLAG_PM_RUNTIME) || @@ -309,7 +310,8 @@ static void rpm_put_suppliers(struct device *dev) { struct device_link *link; - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) { if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND) continue; @@ -1640,7 +1642,8 @@ void pm_runtime_clean_up_links(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.consumers, s_node) { + list_for_each_entry_rcu(link, &dev->links.consumers, s_node, + device_links_read_lock_held()) { if (link->flags & DL_FLAG_STATELESS) continue; @@ -1662,7 +1665,8 @@ void pm_runtime_get_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) if (link->flags & DL_FLAG_PM_RUNTIME) { link->supplier_preactivated = true; refcount_inc(&link->rpm_active); @@ -1683,7 +1687,8 @@ void pm_runtime_put_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) if (link->supplier_preactivated) { link->supplier_preactivated = false; if (refcount_dec_not_one(&link->rpm_active)) diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 9b83865d24f9..0027d4c8087c 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -31,9 +31,7 @@ struct rcu_sync { */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - !rcu_read_lock_bh_held() && - !rcu_read_lock_sched_held(), + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), "suspicious rcu_sync_is_idle() usage"); return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e91ec9ddcd30..4158b7212936 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -41,6 +41,24 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) /* + * Check during list traversal that we are within an RCU reader + */ + +#define check_arg_count_one(dummy) + +#ifdef CONFIG_PROVE_RCU_LIST +#define __list_check_rcu(dummy, cond, extra...) \ + ({ \ + check_arg_count_one(extra); \ + RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \ + "RCU-list traversed in non-reader section!"); \ + }) +#else +#define __list_check_rcu(dummy, cond, extra...) \ + ({ check_arg_count_one(extra); }) +#endif + +/* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know @@ -343,14 +361,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ +#define list_for_each_entry_rcu(pos, head, member, cond...) \ + for (__list_check_rcu(dummy, ## cond, 0), \ + pos = list_entry_rcu((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** @@ -616,13 +636,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. + * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(pos, head, member) \ - for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ +#define hlist_for_each_entry_rcu(pos, head, member, cond...) \ + for (__list_check_rcu(dummy, ## cond, 0), \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ @@ -642,10 +664,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * not do any RCU debugging or tracing. */ #define hlist_for_each_entry_rcu_notrace(pos, head, member) \ - for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\ + for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\ + pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f7167478c1d..80d6056f5855 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -221,6 +221,7 @@ int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); int rcu_read_lock_sched_held(void); +int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -241,6 +242,12 @@ static inline int rcu_read_lock_sched_held(void) { return !preemptible(); } + +static inline int rcu_read_lock_any_held(void) +{ + return !preemptible(); +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU @@ -476,7 +483,7 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 5ec3ea4028e2..4aa02eee8f6c 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -8,6 +8,17 @@ menu "RCU Debugging" config PROVE_RCU def_bool PROVE_LOCKING +config PROVE_RCU_LIST + bool "RCU list lockdep debugging" + depends on PROVE_RCU && RCU_EXPERT + default n + help + Enable RCU lockdep checking for list usages. By default it is + turned off since there are several list RCU users that still + need to be converted to pass a lockdep expression. To prevent + false-positive splats, we keep it default disabled but once all + users are converted, we can remove this config option. + config TORTURE_TEST tristate default n diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 7a6890b23c5f..5a879d073c1c 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -89,7 +89,7 @@ torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); -MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)"); +MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)"); static int nrealreaders; static int nrealwriters; @@ -375,6 +375,14 @@ rcu_perf_writer(void *arg) if (holdoff) schedule_timeout_uninterruptible(holdoff * HZ); + /* + * Wait until rcu_end_inkernel_boot() is called for normal GP tests + * so that RCU is not always expedited for normal GP tests. + * The system_state test is approximate, but works well in practice. + */ + while (!gp_exp && system_state != SYSTEM_RUNNING) + schedule_timeout_uninterruptible(1); + t = ktime_get_mono_fast_ns(); if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) { t_rcu_perf_writer_started = t; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index fce4e7e6f502..b22947324423 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -161,6 +161,7 @@ static atomic_long_t n_rcu_torture_timers; static long n_barrier_attempts; static long n_barrier_successes; /* did rcu_barrier test succeed? */ static struct list_head rcu_torture_removed; +static unsigned long shutdown_jiffies; static int rcu_torture_writer_state; #define RTWS_FIXED_DELAY 0 @@ -228,6 +229,15 @@ static u64 notrace rcu_trace_clock_local(void) } #endif /* #else #ifdef CONFIG_RCU_TRACE */ +/* + * Stop aggressive CPU-hog tests a bit before the end of the test in order + * to avoid interfering with test shutdown. + */ +static bool shutdown_time_arrived(void) +{ + return shutdown_secs && time_after(jiffies, shutdown_jiffies - 30 * HZ); +} + static unsigned long boost_starttime; /* jiffies of next boost test start. */ static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ @@ -1713,12 +1723,14 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) } // Give the scheduler a chance, even on nohz_full CPUs. -static void rcu_torture_fwd_prog_cond_resched(void) +static void rcu_torture_fwd_prog_cond_resched(unsigned long iter) { if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) { - if (need_resched()) + // Real call_rcu() floods hit userspace, so emulate that. + if (need_resched() || (iter & 0xfff)) schedule(); } else { + // No userspace emulation: CB invocation throttles call_rcu() cond_resched(); } } @@ -1746,7 +1758,7 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) spin_unlock_irqrestore(&rcu_fwd_lock, flags); kfree(rfcp); freed++; - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(freed); } return freed; } @@ -1785,15 +1797,17 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) WRITE_ONCE(rcu_fwd_startat, jiffies); stopat = rcu_fwd_startat + dur; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { idx = cur_ops->readlock(); udelay(10); cur_ops->readunlock(idx); if (!fwd_progress_need_resched || need_resched()) - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(1); } (*tested_tries)++; if (!time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { (*tested)++; cver = READ_ONCE(rcu_torture_current_version) - cver; @@ -1852,6 +1866,7 @@ static void rcu_torture_fwd_prog_cr(void) gps = cur_ops->get_gp_seq(); rcu_launder_gp_seq_start = gps; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { rfcp = READ_ONCE(rcu_fwd_cb_head); rfcpn = NULL; @@ -1875,7 +1890,7 @@ static void rcu_torture_fwd_prog_cr(void) rfcp->rfc_gps = 0; } cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); } stoppedat = jiffies; n_launders_cb_snap = READ_ONCE(n_launders_cb); @@ -1884,7 +1899,8 @@ static void rcu_torture_fwd_prog_cr(void) cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ (void)rcu_torture_fwd_prog_cbfree(); - if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) { + if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop) && + !shutdown_time_arrived()) { WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", __func__, @@ -2465,6 +2481,7 @@ rcu_torture_init(void) goto unwind; rcutor_hp = firsterr; } + shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); if (firsterr) goto unwind; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 307ae6ebb804..99e9d952827b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -313,15 +313,6 @@ void rcu_note_context_switch(bool preempt) ? rnp->gp_seq : rcu_seq_snap(&rnp->gp_seq)); rcu_preempt_ctxt_queue(rnp, rdp); - } else if (t->rcu_read_lock_nesting < 0 && - t->rcu_read_unlock_special.s) { - - /* - * Complete exit from RCU read-side critical section on - * behalf of preempted instance of __rcu_read_unlock(). - */ - rcu_read_unlock_special(t); - rcu_preempt_deferred_qs(t); } else { rcu_preempt_deferred_qs(t); } @@ -624,22 +615,18 @@ static void rcu_read_unlock_special(struct task_struct *t) (rdp->grpmask & rnp->expmask) || tick_nohz_full_cpu(rdp->cpu); // Need to defer quiescent state until everything is enabled. - if ((exp || in_irq()) && irqs_were_disabled && use_softirq && - (in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) { + if (irqs_were_disabled && use_softirq && + (in_interrupt() || + (exp && !t->rcu_read_unlock_special.b.deferred_qs))) { // Using softirq, safe to awaken, and we get // no help from enabling irqs, unlike bh/preempt. raise_softirq_irqoff(RCU_SOFTIRQ); - } else if (exp && irqs_were_disabled && !use_softirq && - !t->rcu_read_unlock_special.b.deferred_qs) { - // Safe to awaken and we get no help from enabling - // irqs, unlike bh/preempt. - invoke_rcu_core(); } else { // Enabling BH or preempt does reschedule, so... // Also if no expediting or NO_HZ_FULL, slow is OK. set_tsk_need_resched(current); set_preempt_need_resched(); - if (IS_ENABLED(CONFIG_IRQ_WORK) && + if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled && !rdp->defer_qs_iw_pending && exp) { // Get scheduler to re-evaluate and call hooks. // If !IRQ_WORK, FQS scan will eventually IPI. diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 64e9cc8609e7..1861103662db 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -61,9 +61,15 @@ module_param(rcu_normal_after_boot, int, 0); #ifdef CONFIG_DEBUG_LOCK_ALLOC /** - * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? + * rcu_read_lock_held_common() - might we be in RCU-sched read-side critical section? + * @ret: Best guess answer if lockdep cannot be relied on * - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an + * Returns true if lockdep must be ignored, in which case *ret contains + * the best guess described below. Otherwise returns false, in which + * case *ret tells the caller nothing and the caller should instead + * consult lockdep. + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, set *ret to nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling @@ -75,35 +81,45 @@ module_param(rcu_normal_after_boot, int, 0); * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. * - * Note that if the CPU is in the idle loop from an RCU point of - * view (ie: that we are in the section between rcu_idle_enter() and - * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU - * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs - * that are in such a section, considering these as in extended quiescent - * state, so such a CPU is effectively never in an RCU read-side critical - * section regardless of what RCU primitives it invokes. This state of - * affairs is required --- we need to keep an RCU-free window in idle - * where the CPU may possibly enter into low power mode. This way we can - * notice an extended quiescent state to other CPUs that started a grace - * period. Otherwise we would delay any grace period as long as we run in - * the idle task. + * Note that if the CPU is in the idle loop from an RCU point of view (ie: + * that we are in the section between rcu_idle_enter() and rcu_idle_exit()) + * then rcu_read_lock_held() sets *ret to false even if the CPU did an + * rcu_read_lock(). The reason for this is that RCU ignores CPUs that are + * in such a section, considering these as in extended quiescent state, + * so such a CPU is effectively never in an RCU read-side critical section + * regardless of what RCU primitives it invokes. This state of affairs is + * required --- we need to keep an RCU-free window in idle where the CPU may + * possibly enter into low power mode. This way we can notice an extended + * quiescent state to other CPUs that started a grace period. Otherwise + * we would delay any grace period as long as we run in the idle task. * - * Similarly, we avoid claiming an SRCU read lock held if the current + * Similarly, we avoid claiming an RCU read lock held if the current * CPU is offline. */ +static bool rcu_read_lock_held_common(bool *ret) +{ + if (!debug_lockdep_rcu_enabled()) { + *ret = 1; + return true; + } + if (!rcu_is_watching()) { + *ret = 0; + return true; + } + if (!rcu_lockdep_current_cpu_online()) { + *ret = 0; + return true; + } + return false; +} + int rcu_read_lock_sched_held(void) { - int lockdep_opinion = 0; + bool ret; - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; - if (debug_locks) - lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || !preemptible(); + if (rcu_read_lock_held_common(&ret)) + return ret; + return lock_is_held(&rcu_sched_lock_map) || !preemptible(); } EXPORT_SYMBOL(rcu_read_lock_sched_held); #endif @@ -260,12 +276,10 @@ NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled); */ int rcu_read_lock_held(void) { - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; + bool ret; + + if (rcu_read_lock_held_common(&ret)) + return ret; return lock_is_held(&rcu_lock_map); } EXPORT_SYMBOL_GPL(rcu_read_lock_held); @@ -287,16 +301,28 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_held); */ int rcu_read_lock_bh_held(void) { - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; + bool ret; + + if (rcu_read_lock_held_common(&ret)) + return ret; return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); +int rcu_read_lock_any_held(void) +{ + bool ret; + + if (rcu_read_lock_held_common(&ret)) + return ret; + if (lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_sched_lock_map)) + return 1; + return !preemptible(); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_any_held); + #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** diff --git a/kernel/torture.c b/kernel/torture.c index a8d9bdfba7c3..7c13f5558b71 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -263,7 +263,6 @@ static void torture_onoff_cleanup(void) onoff_task = NULL; #endif /* #ifdef CONFIG_HOTPLUG_CPU */ } -EXPORT_SYMBOL_GPL(torture_onoff_cleanup); /* * Print online/offline testing statistics. @@ -449,7 +448,6 @@ static void torture_shuffle_cleanup(void) } shuffler_task = NULL; } -EXPORT_SYMBOL_GPL(torture_shuffle_cleanup); /* * Variables for auto-shutdown. This allows "lights out" torture runs diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 0515a2096f90..0456e0a3dab1 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -6,22 +6,22 @@ /* * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw_notrace() is that elements removed from this list + * can use rcu_dereference_raw_check() is that elements removed from this list * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle + * mechanism. The rcu_dereference_raw_check() calls are needed to handle * concurrent insertions into the ftrace_global_list. * * Silly Alpha and silly pointer-speculation compiler optimizations! */ #define do_for_each_ftrace_op(op, list) \ - op = rcu_dereference_raw_notrace(list); \ + op = rcu_dereference_raw_check(list); \ do /* * Optimized for just a single item in the list (as that is the normal case). */ #define while_for_each_ftrace_op(op) \ - while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ + while (likely(op = rcu_dereference_raw_check((op)->next)) && \ unlikely((op) != &ftrace_list_end)) extern struct ftrace_ops __rcu *ftrace_ops_list; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 525a97fbbc60..642474b26ba7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2642,10 +2642,10 @@ static void ftrace_exports(struct ring_buffer_event *event) preempt_disable_notrace(); - export = rcu_dereference_raw_notrace(ftrace_exports_list); + export = rcu_dereference_raw_check(ftrace_exports_list); while (export) { trace_process_export(export, event); - export = rcu_dereference_raw_notrace(export->next); + export = rcu_dereference_raw_check(export->next); } preempt_enable_notrace(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e8bc939b56dd..dde77f72e03e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -124,7 +124,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id) h = id & (FIB_TABLE_HASHSZ - 1); head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist, + lockdep_rtnl_is_held()) { if (tb->tb_id == id) return tb; } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 27b7b5693ede..33c669619736 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -227,7 +227,7 @@ then must_continue=yes fi last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" - if test -z "last_ts" + if test -z "$last_ts" then last_ts=0 fi |