summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.ibm.com>2019-08-13 14:41:48 -0700
committerPaul E. McKenney <paulmck@linux.ibm.com>2019-08-13 14:41:48 -0700
commit07f038a408fb215fd656de78304b6ff4c7e4e490 (patch)
tree7e78a89f7a5981382d252cafcc58d9a4d66c9957
parent6738ff85c3ee8073d5b030cb26241d0009d4ce29 (diff)
parentcfcdef5e30469f3f2d6786ad35fc3fdef2a3833f (diff)
downloadlinux-07f038a408fb215fd656de78304b6ff4c7e4e490.tar.gz
linux-07f038a408fb215fd656de78304b6ff4c7e4e490.tar.bz2
linux-07f038a408fb215fd656de78304b6ff4c7e4e490.zip
Merge LKMM and RCU commits
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html73
-rw-r--r--Documentation/RCU/stallwarn.txt6
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/kernel/smp.c6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/x86/pci/mmconfig-shared.c5
-rw-r--r--drivers/acpi/osl.c6
-rw-r--r--drivers/base/base.h1
-rw-r--r--drivers/base/core.c12
-rw-r--r--drivers/base/power/runtime.c15
-rw-r--r--include/linux/rcu_segcblist.h9
-rw-r--r--include/linux/rcu_sync.h4
-rw-r--r--include/linux/rculist.h36
-rw-r--r--include/linux/rcupdate.h9
-rw-r--r--include/trace/events/rcu.h4
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/rcu/Kconfig.debug11
-rw-r--r--kernel/rcu/rcu.h1
-rw-r--r--kernel/rcu/rcu_segcblist.c174
-rw-r--r--kernel/rcu/rcu_segcblist.h54
-rw-r--r--kernel/rcu/rcuperf.c10
-rw-r--r--kernel/rcu/rcutorture.c30
-rw-r--r--kernel/rcu/srcutree.c5
-rw-r--r--kernel/rcu/tree.c205
-rw-r--r--kernel/rcu/tree.h81
-rw-r--r--kernel/rcu/tree_exp.h8
-rw-r--r--kernel/rcu/tree_plugin.h1195
-rw-r--r--kernel/rcu/tree_stall.h9
-rw-r--r--kernel/rcu/update.c105
-rw-r--r--kernel/sched/core.c57
-rw-r--r--kernel/sched/idle.c5
-rw-r--r--kernel/torture.c2
-rw-r--r--kernel/trace/ftrace_internal.h8
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--net/ipv4/fib_frontend.c3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
38 files changed, 1439 insertions, 740 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 5a9238a2883c..467251f7fef6 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2129,6 +2129,8 @@ Some of the relevant points of interest are as follows:
<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
+<li> <a href="#Accesses to User Memory and RCU">
+Accesses to User Memory and RCU</a>.
<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
<li> <a href="#Scheduling-Clock Interrupts and RCU">
Scheduling-Clock Interrupts and RCU</a>.
@@ -2512,7 +2514,7 @@ disabled across the entire RCU read-side critical section.
<p>
It is possible to use tracing on RCU code, but tracing itself
uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+For this reason, <tt>rcu_dereference_raw_check()</tt>
is provided for use by tracing, which avoids the destructive
recursion that could otherwise ensue.
This API is also used by virtualization in some architectures,
@@ -2521,6 +2523,75 @@ cannot be used.
The tracing folks both located the requirement and provided the
needed fix, so this surprise requirement was relatively painless.
+<h3><a name="Accesses to User Memory and RCU">
+Accesses to User Memory and RCU</a></h3>
+
+<p>
+The kernel needs to access user-space memory, for example, to access
+data referenced by system-call parameters.
+The <tt>get_user()</tt> macro does this job.
+
+<p>
+However, user-space memory might well be paged out, which means
+that <tt>get_user()</tt> might well page-fault and thus block while
+waiting for the resulting I/O to complete.
+It would be a very bad thing for the compiler to reorder
+a <tt>get_user()</tt> invocation into an RCU read-side critical
+section.
+For example, suppose that the source code looked like this:
+
+<blockquote>
+<pre>
+ 1 rcu_read_lock();
+ 2 p = rcu_dereference(gp);
+ 3 v = p-&gt;value;
+ 4 rcu_read_unlock();
+ 5 get_user(user_v, user_p);
+ 6 do_something_with(v, user_v);
+</pre>
+</blockquote>
+
+<p>
+The compiler must not be permitted to transform this source code into
+the following:
+
+<blockquote>
+<pre>
+ 1 rcu_read_lock();
+ 2 p = rcu_dereference(gp);
+ 3 get_user(user_v, user_p); // BUG: POSSIBLE PAGE FAULT!!!
+ 4 v = p-&gt;value;
+ 5 rcu_read_unlock();
+ 6 do_something_with(v, user_v);
+</pre>
+</blockquote>
+
+<p>
+If the compiler did make this transformation in a
+<tt>CONFIG_PREEMPT=n</tt> kernel build, and if <tt>get_user()</tt> did
+page fault, the result would be a quiescent state in the middle
+of an RCU read-side critical section.
+This misplaced quiescent state could result in line&nbsp;4 being
+a use-after-free access, which could be bad for your kernel's
+actuarial statistics.
+Similar examples can be constructed with the call to <tt>get_user()</tt>
+preceding the <tt>rcu_read_lock()</tt>.
+
+<p>
+Unfortunately, <tt>get_user()</tt> doesn't have any particular
+ordering properties, and in some architectures the underlying <tt>asm</tt>
+isn't even marked <tt>volatile</tt>.
+And even if it was marked <tt>volatile</tt>, the above access to
+<tt>p-&gt;value</tt> is not volatile, so the compiler would not have any
+reason to keep those two accesses in order.
+
+<p>
+Therefore, the Linux-kernel definitions of <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> must act as compiler barriers,
+at least for outermost instances of <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> within a nested set of RCU read-side critical
+sections.
+
<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
<p>
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 13e88fc00f01..f48f4621ccbc 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -57,6 +57,12 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
CONFIG_PREEMPT_RCU case, you might see stall-warning
messages.
+ You can use the rcutree.kthread_prio kernel boot parameter to
+ increase the scheduling priority of RCU's kthreads, which can
+ help avoid this problem. However, please note that doing this
+ can increase your system's context-switch rate and thus degrade
+ performance.
+
o A periodic interrupt whose handler takes longer than the time
interval between successive pairs of interrupts. This can
prevent RCU's kthreads and softirq handlers from running.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7ccd158b3894..79b983bedcaa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3837,12 +3837,13 @@
RCU_BOOST is not set, valid values are 0-99 and
the default is zero (non-realtime operation).
- rcutree.rcu_nocb_leader_stride= [KNL]
- Set the number of NOCB kthread groups, which
- defaults to the square root of the number of
- CPUs. Larger numbers reduces the wakeup overhead
- on the per-CPU grace-period kthreads, but increases
- that same overhead on each group's leader.
+ rcutree.rcu_nocb_gp_stride= [KNL]
+ Set the number of NOCB callback kthreads in
+ each group, which defaults to the square root
+ of the number of CPUs. Larger numbers reduce
+ the wakeup overhead on the global grace-period
+ kthread, but increases that same overhead on
+ each group's NOCB grace-period kthread.
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
@@ -4047,6 +4048,10 @@
rcutorture.verbose= [KNL]
Enable additional printk() statements.
+ rcupdate.rcu_cpu_stall_ftrace_dump= [KNL]
+ Dump ftrace buffer after reporting RCU CPU
+ stall warning.
+
rcupdate.rcu_cpu_stall_suppress= [KNL]
Suppress RCU CPU stall warning messages.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6426db5198f0..527317026492 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9326,7 +9326,7 @@ F: drivers/misc/lkdtm/*
LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
M: Alan Stern <stern@rowland.harvard.edu>
-M: Andrea Parri <andrea.parri@amarulasolutions.com>
+M: Andrea Parri <parri.andrea@gmail.com>
M: Will Deacon <will@kernel.org>
M: Peter Zijlstra <peterz@infradead.org>
M: Boqun Feng <boqun.feng@gmail.com>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index aab8ba40ce38..4b0bab2607e4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -264,15 +264,13 @@ int __cpu_disable(void)
return 0;
}
-static DECLARE_COMPLETION(cpu_died);
-
/*
* called on the thread which is asking for a CPU to be shutdown -
* waits until shutdown has completed, or it is timed out.
*/
void __cpu_die(unsigned int cpu)
{
- if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+ if (!cpu_wait_death(cpu, 5)) {
pr_err("CPU%u: cpu didn't die\n", cpu);
return;
}
@@ -319,7 +317,7 @@ void arch_cpu_idle_dead(void)
* this returns, power and/or clocks can be removed at any point
* from this CPU and its cache by platform_cpu_kill().
*/
- complete(&cpu_died);
+ (void)cpu_report_death();
/*
* Ensure that the cache lines associated with that completion are
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index bb7c8cc77f1a..04b2b927bb5a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
*/
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
{
- return rcu_dereference_raw_notrace(kvm->memslots[0]);
+ return rcu_dereference_raw_check(kvm->memslots[0]);
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;
static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
LIST_HEAD(pci_mmcfg_list);
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
struct pci_mmcfg_region *cfg;
/* keep list sorted by segment and starting bus number */
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
if (cfg->segment > new->segment ||
(cfg->segment == new->segment &&
cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
{
struct pci_mmcfg_region *cfg;
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
if (cfg->segment == segment &&
cfg->start_bus <= bus && bus <= cfg->end_bus)
return cfg;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 9c0edf2fc0dd..2f9d0d20b836 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/lockdep.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/kmod.h>
@@ -80,6 +81,7 @@ struct acpi_ioremap {
static LIST_HEAD(acpi_ioremaps);
static DEFINE_MUTEX(acpi_ioremap_lock);
+#define acpi_ioremap_lock_held() lock_is_held(&acpi_ioremap_lock.dep_map)
static void __init acpi_request_region (struct acpi_generic_address *gas,
unsigned int length, char *desc)
@@ -206,7 +208,7 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size)
{
struct acpi_ioremap *map;
- list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+ list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
if (map->phys <= phys &&
phys + size <= map->phys + map->size)
return map;
@@ -249,7 +251,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
{
struct acpi_ioremap *map;
- list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+ list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
if (map->virt <= virt &&
virt + size <= map->virt + map->size)
return map;
diff --git a/drivers/base/base.h b/drivers/base/base.h
index b405436ee28e..0d32544b6f91 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -165,6 +165,7 @@ static inline int devtmpfs_init(void) { return 0; }
/* Device links support */
extern int device_links_read_lock(void);
extern void device_links_read_unlock(int idx);
+extern int device_links_read_lock_held(void);
extern int device_links_check_suppliers(struct device *dev);
extern void device_links_driver_bound(struct device *dev);
extern void device_links_driver_cleanup(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 636058bbf48a..eede79630ceb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -68,6 +68,11 @@ void device_links_read_unlock(int idx)
{
srcu_read_unlock(&device_links_srcu, idx);
}
+
+int device_links_read_lock_held(void)
+{
+ return srcu_read_lock_held(&device_links_srcu);
+}
#else /* !CONFIG_SRCU */
static DECLARE_RWSEM(device_links_lock);
@@ -91,6 +96,13 @@ void device_links_read_unlock(int not_used)
{
up_read(&device_links_lock);
}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int device_links_read_lock_held(void)
+{
+ return lockdep_is_held(&device_links_lock);
+}
+#endif
#endif /* !CONFIG_SRCU */
/**
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b75335508d2c..50def99df970 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -287,7 +287,8 @@ static int rpm_get_suppliers(struct device *dev)
{
struct device_link *link;
- list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+ device_links_read_lock_held()) {
int retval;
if (!(link->flags & DL_FLAG_PM_RUNTIME) ||
@@ -309,7 +310,8 @@ static void rpm_put_suppliers(struct device *dev)
{
struct device_link *link;
- list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+ device_links_read_lock_held()) {
if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
continue;
@@ -1640,7 +1642,8 @@ void pm_runtime_clean_up_links(struct device *dev)
idx = device_links_read_lock();
- list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
+ list_for_each_entry_rcu(link, &dev->links.consumers, s_node,
+ device_links_read_lock_held()) {
if (link->flags & DL_FLAG_STATELESS)
continue;
@@ -1662,7 +1665,8 @@ void pm_runtime_get_suppliers(struct device *dev)
idx = device_links_read_lock();
- list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+ device_links_read_lock_held())
if (link->flags & DL_FLAG_PM_RUNTIME) {
link->supplier_preactivated = true;
refcount_inc(&link->rpm_active);
@@ -1683,7 +1687,8 @@ void pm_runtime_put_suppliers(struct device *dev)
idx = device_links_read_lock();
- list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+ device_links_read_lock_held())
if (link->supplier_preactivated) {
link->supplier_preactivated = false;
if (refcount_dec_not_one(&link->rpm_active))
diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 87404cb015f1..646759042333 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -14,6 +14,9 @@
#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
#define __INCLUDE_LINUX_RCU_SEGCBLIST_H
+#include <linux/types.h>
+#include <linux/atomic.h>
+
/* Simple unsegmented callback lists. */
struct rcu_cblist {
struct rcu_head *head;
@@ -65,8 +68,14 @@ struct rcu_segcblist {
struct rcu_head *head;
struct rcu_head **tails[RCU_CBLIST_NSEGS];
unsigned long gp_seq[RCU_CBLIST_NSEGS];
+#ifdef CONFIG_RCU_NOCB_CPU
+ atomic_long_t len;
+#else
long len;
+#endif
long len_lazy;
+ u8 enabled;
+ u8 offloaded;
};
#define RCU_SEGCBLIST_INITIALIZER(n) \
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 9b83865d24f9..0027d4c8087c 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -31,9 +31,7 @@ struct rcu_sync {
*/
static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
{
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
- !rcu_read_lock_bh_held() &&
- !rcu_read_lock_sched_held(),
+ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
"suspicious rcu_sync_is_idle() usage");
return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
}
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e91ec9ddcd30..4158b7212936 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -41,6 +41,24 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
/*
+ * Check during list traversal that we are within an RCU reader
+ */
+
+#define check_arg_count_one(dummy)
+
+#ifdef CONFIG_PROVE_RCU_LIST
+#define __list_check_rcu(dummy, cond, extra...) \
+ ({ \
+ check_arg_count_one(extra); \
+ RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \
+ "RCU-list traversed in non-reader section!"); \
+ })
+#else
+#define __list_check_rcu(dummy, cond, extra...) \
+ ({ check_arg_count_one(extra); })
+#endif
+
+/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
@@ -343,14 +361,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_head within the struct.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as list_add_rcu()
* as long as the traversal is guarded by rcu_read_lock().
*/
-#define list_for_each_entry_rcu(pos, head, member) \
- for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
+#define list_for_each_entry_rcu(pos, head, member, cond...) \
+ for (__list_check_rcu(dummy, ## cond, 0), \
+ pos = list_entry_rcu((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
@@ -616,13 +636,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
* as long as the traversal is guarded by rcu_read_lock().
*/
-#define hlist_for_each_entry_rcu(pos, head, member) \
- for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+#define hlist_for_each_entry_rcu(pos, head, member, cond...) \
+ for (__list_check_rcu(dummy, ## cond, 0), \
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
typeof(*(pos)), member); \
pos; \
pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
@@ -642,10 +664,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
* not do any RCU debugging or tracing.
*/
#define hlist_for_each_entry_rcu_notrace(pos, head, member) \
- for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\
+ for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\
typeof(*(pos)), member); \
pos; \
- pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\
+ pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\
&(pos)->member)), typeof(*(pos)), member))
/**
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f7167478c1d..80d6056f5855 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -221,6 +221,7 @@ int debug_lockdep_rcu_enabled(void);
int rcu_read_lock_held(void);
int rcu_read_lock_bh_held(void);
int rcu_read_lock_sched_held(void);
+int rcu_read_lock_any_held(void);
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -241,6 +242,12 @@ static inline int rcu_read_lock_sched_held(void)
{
return !preemptible();
}
+
+static inline int rcu_read_lock_any_held(void)
+{
+ return !preemptible();
+}
+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU
@@ -476,7 +483,7 @@ do { \
* The no-tracing version of rcu_dereference_raw() must not call
* rcu_read_lock_held().
*/
-#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
+#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu)
/**
* rcu_dereference_protected() - fetch RCU pointer when updates prevented
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 02a3f78f7cd8..694bd040cf51 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -100,7 +100,6 @@ TRACE_EVENT_RCU(rcu_grace_period,
* "Startedroot": Requested a nocb grace period based on root-node data.
* "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period.
- * "ResumeWait": Resume waiting after signal.
* "EndWait": Complete wait.
* "Cleanup": Clean up rcu_node structure after previous GP.
* "CleanupMore": Clean up, and another GP is needed.
@@ -267,7 +266,8 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock,
* "WakeNotPoll": Don't wake rcuo kthread because it is polling.
* "DeferredWake": Carried out the "IsDeferred" wakeup.
* "Poll": Start of new polling cycle for rcu_nocb_poll.
- * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll.
+ * "Sleep": Sleep waiting for GP for !rcu_nocb_poll.
+ * "CBSleep": Sleep waiting for CBs for !rcu_nocb_poll.
* "WokeEmpty": rcuo kthread woke to find empty list.
* "WokeNonEmpty": rcuo kthread woke to find non-empty list.
* "WaitQueue": Enqueue partially done, timed wait for it to complete.
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4861cf8e274b..4aca3f4379d2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -620,7 +620,7 @@ static void print_lock(struct held_lock *hlock)
return;
}
- printk(KERN_CONT "%p", hlock->instance);
+ printk(KERN_CONT "%px", hlock->instance);
print_lock_name(lock);
printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
}
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 5ec3ea4028e2..4aa02eee8f6c 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -8,6 +8,17 @@ menu "RCU Debugging"
config PROVE_RCU
def_bool PROVE_LOCKING
+config PROVE_RCU_LIST
+ bool "RCU list lockdep debugging"
+ depends on PROVE_RCU && RCU_EXPERT
+ default n
+ help
+ Enable RCU lockdep checking for list usages. By default it is
+ turned off since there are several list RCU users that still
+ need to be converted to pass a lockdep expression. To prevent
+ false-positive splats, we keep it default disabled but once all
+ users are converted, we can remove this config option.
+
config TORTURE_TEST
tristate
default n
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5290b01de534..8fd4f82c9b3d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -227,6 +227,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
#ifdef CONFIG_RCU_STALL_COMMON
+extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void);
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 9bd5f6023c21..495c58ce1640 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -24,6 +24,49 @@ void rcu_cblist_init(struct rcu_cblist *rclp)
}
/*
+ * Enqueue an rcu_head structure onto the specified callback list.
+ * This function assumes that the callback is non-lazy because it
+ * is intended for use by no-CBs CPUs, which do not distinguish
+ * between lazy and non-lazy RCU callbacks.
+ */
+void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp)
+{
+ *rclp->tail = rhp;
+ rclp->tail = &rhp->next;
+ WRITE_ONCE(rclp->len, rclp->len + 1);
+}
+
+/*
+ * Flush the second rcu_cblist structure onto the first one, obliterating
+ * any contents of the first. If rhp is non-NULL, enqueue it as the sole
+ * element of the second rcu_cblist structure, but ensuring that the second
+ * rcu_cblist structure, if initially non-empty, always appears non-empty
+ * throughout the process. If rdp is NULL, the second rcu_cblist structure
+ * is instead initialized to empty.
+ */
+void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
+ struct rcu_cblist *srclp,
+ struct rcu_head *rhp)
+{
+ drclp->head = srclp->head;
+ if (drclp->head)
+ drclp->tail = srclp->tail;
+ else
+ drclp->tail = &drclp->head;
+ drclp->len = srclp->len;
+ drclp->len_lazy = srclp->len_lazy;
+ if (!rhp) {
+ rcu_cblist_init(srclp);
+ } else {
+ rhp->next = NULL;
+ srclp->head = rhp;
+ srclp->tail = &rhp->next;
+ WRITE_ONCE(srclp->len, 1);