summaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/Kconfig5
-rw-r--r--kernel/rcu/rcu.h16
-rw-r--r--kernel/rcu/rcu_segcblist.c216
-rw-r--r--kernel/rcu/rcu_segcblist.h57
-rw-r--r--kernel/rcu/rcutorture.c395
-rw-r--r--kernel/rcu/refscale.c23
-rw-r--r--kernel/rcu/srcutiny.c77
-rw-r--r--kernel/rcu/srcutree.c147
-rw-r--r--kernel/rcu/tasks.h104
-rw-r--r--kernel/rcu/tree.c101
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_plugin.h367
-rw-r--r--kernel/rcu/tree_stall.h60
-rw-r--r--kernel/rcu/update.c4
15 files changed, 1339 insertions, 237 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index cdc57b4f6d48..3128b7cf8e1f 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -95,6 +95,7 @@ config TASKS_RUDE_RCU
config TASKS_TRACE_RCU
def_bool 0
+ select IRQ_WORK
help
This option enables a task-based RCU implementation that uses
explicit rcu_read_lock_trace() read-side markers, and allows
@@ -188,8 +189,8 @@ config RCU_FAST_NO_HZ
config RCU_BOOST
bool "Enable RCU priority boosting"
- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
- default n
+ depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT
+ default y if PREEMPT_RT
help
This option boosts the priority of preempted RCU readers that
block the current preemptible RCU grace period for too long.
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 59ef1ae6dc37..bf0827d4b659 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -378,7 +378,11 @@ do { \
smp_mb__after_unlock_lock(); \
} while (0)
-#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
+#define raw_spin_unlock_rcu_node(p) \
+do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_unlock(&ACCESS_PRIVATE(p, lock)); \
+} while (0)
#define raw_spin_lock_irq_rcu_node(p) \
do { \
@@ -387,7 +391,10 @@ do { \
} while (0)
#define raw_spin_unlock_irq_rcu_node(p) \
- raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
+do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)); \
+} while (0)
#define raw_spin_lock_irqsave_rcu_node(p, flags) \
do { \
@@ -396,7 +403,10 @@ do { \
} while (0)
#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
- raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
+do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags); \
+} while (0)
#define raw_spin_trylock_rcu_node(p) \
({ \
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 2d2a6b6b9dfb..7f181c9675f7 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -7,10 +7,10 @@
* Authors: Paul E. McKenney <paulmck@linux.ibm.com>
*/
-#include <linux/types.h>
-#include <linux/kernel.h>
+#include <linux/cpu.h>
#include <linux/interrupt.h>
-#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
#include "rcu_segcblist.h"
@@ -88,23 +88,135 @@ static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
#endif
}
+/* Get the length of a segment of the rcu_segcblist structure. */
+static long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
+{
+ return READ_ONCE(rsclp->seglen[seg]);
+}
+
+/* Return number of callbacks in segmented callback list by summing seglen. */
+long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp)
+{
+ long len = 0;
+ int i;
+
+ for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
+ len += rcu_segcblist_get_seglen(rsclp, i);
+
+ return len;
+}
+
+/* Set the length of a segment of the rcu_segcblist structure. */
+static void rcu_segcblist_set_seglen(struct rcu_segcblist *rsclp, int seg, long v)
+{
+ WRITE_ONCE(rsclp->seglen[seg], v);
+}
+
+/* Increase the numeric length of a segment by a specified amount. */
+static void rcu_segcblist_add_seglen(struct rcu_segcblist *rsclp, int seg, long v)
+{
+ WRITE_ONCE(rsclp->seglen[seg], rsclp->seglen[seg] + v);
+}
+
+/* Move from's segment length to to's segment. */
+static void rcu_segcblist_move_seglen(struct rcu_segcblist *rsclp, int from, int to)
+{
+ long len;
+
+ if (from == to)
+ return;
+
+ len = rcu_segcblist_get_seglen(rsclp, from);
+ if (!len)
+ return;
+
+ rcu_segcblist_add_seglen(rsclp, to, len);
+ rcu_segcblist_set_seglen(rsclp, from, 0);
+}
+
+/* Increment segment's length. */
+static void rcu_segcblist_inc_seglen(struct rcu_segcblist *rsclp, int seg)
+{
+ rcu_segcblist_add_seglen(rsclp, seg, 1);
+}
+
/*
* Increase the numeric length of an rcu_segcblist structure by the
* specified amount, which can be negative. This can cause the ->len
* field to disagree with the actual number of callbacks on the structure.
* This increase is fully ordered with respect to the callers accesses
* both before and after.
+ *
+ * So why on earth is a memory barrier required both before and after
+ * the update to the ->len field???
+ *
+ * The reason is that rcu_barrier() locklessly samples each CPU's ->len
+ * field, and if a given CPU's field is zero, avoids IPIing that CPU.
+ * This can of course race with both queuing and invoking of callbacks.
+ * Failing to correctly handle either of these races could result in
+ * rcu_barrier() failing to IPI a CPU that actually had callbacks queued
+ * which rcu_barrier() was obligated to wait on. And if rcu_barrier()
+ * failed to wait on such a callback, unloading certain kernel modules
+ * would result in calls to functions whose code was no longer present in
+ * the kernel, for but one example.
+ *
+ * Therefore, ->len transitions from 1->0 and 0->1 have to be carefully
+ * ordered with respect with both list modifications and the rcu_barrier().
+ *
+ * The queuing case is CASE 1 and the invoking case is CASE 2.
+ *
+ * CASE 1: Suppose that CPU 0 has no callbacks queued, but invokes
+ * call_rcu() just as CPU 1 invokes rcu_barrier(). CPU 0's ->len field
+ * will transition from 0->1, which is one of the transitions that must
+ * be handled carefully. Without the full memory barriers after the ->len
+ * update and at the beginning of rcu_barrier(), the following could happen:
+ *
+ * CPU 0 CPU 1
+ *
+ * call_rcu().
+ * rcu_barrier() sees ->len as 0.
+ * set ->len = 1.
+ * rcu_barrier() does nothing.
+ * module is unloaded.
+ * callback invokes unloaded function!
+ *
+ * With the full barriers, any case where rcu_barrier() sees ->len as 0 will
+ * have unambiguously preceded the return from the racing call_rcu(), which
+ * means that this call_rcu() invocation is OK to not wait on. After all,
+ * you are supposed to make sure that any problematic call_rcu() invocations
+ * happen before the rcu_barrier().
+ *
+ *
+ * CASE 2: Suppose that CPU 0 is invoking its last callback just as
+ * CPU 1 invokes rcu_barrier(). CPU 0's ->len field will transition from
+ * 1->0, which is one of the transitions that must be handled carefully.
+ * Without the full memory barriers before the ->len update and at the
+ * end of rcu_barrier(), the following could happen:
+ *
+ * CPU 0 CPU 1
+ *
+ * start invoking last callback
+ * set ->len = 0 (reordered)
+ * rcu_barrier() sees ->len as 0
+ * rcu_barrier() does nothing.
+ * module is unloaded
+ * callback executing after unloaded!
+ *
+ * With the full barriers, any case where rcu_barrier() sees ->len as 0
+ * will be fully ordered after the completion of the callback function,
+ * so that the module unloading operation is completely safe.
+ *
*/
-static void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
+void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
{
#ifdef CONFIG_RCU_NOCB_CPU
- smp_mb__before_atomic(); /* Up to the caller! */
+ smp_mb__before_atomic(); // Read header comment above.
atomic_long_add(v, &rsclp->len);
- smp_mb__after_atomic(); /* Up to the caller! */
+ smp_mb__after_atomic(); // Read header comment above.
#else
- smp_mb(); /* Up to the caller! */
+ smp_mb(); // Read header comment above.
WRITE_ONCE(rsclp->len, rsclp->len + v);
- smp_mb(); /* Up to the caller! */
+ smp_mb(); // Read header comment above.
#endif
}
@@ -120,26 +232,6 @@ void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
}
/*
- * Exchange the numeric length of the specified rcu_segcblist structure
- * with the specified value. This can cause the ->len field to disagree
- * with the actual number of callbacks on the structure. This exchange is
- * fully ordered with respect to the callers accesses both before and after.
- */
-static long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
-{
-#ifdef CONFIG_RCU_NOCB_CPU
- return atomic_long_xchg(&rsclp->len, v);
-#else
- long ret = rsclp->len;
-
- smp_mb(); /* Up to the caller! */
- WRITE_ONCE(rsclp->len, v);
- smp_mb(); /* Up to the caller! */
- return ret;
-#endif
-}
-
-/*
* Initialize an rcu_segcblist structure.
*/
void rcu_segcblist_init(struct rcu_segcblist *rsclp)
@@ -149,10 +241,12 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp)
BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
rsclp->head = NULL;
- for (i = 0; i < RCU_CBLIST_NSEGS; i++)
+ for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
rsclp->tails[i] = &rsclp->head;
+ rcu_segcblist_set_seglen(rsclp, i, 0);
+ }
rcu_segcblist_set_len(rsclp, 0);
- rsclp->enabled = 1;
+ rcu_segcblist_set_flags(rsclp, SEGCBLIST_ENABLED);
}
/*
@@ -163,16 +257,21 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
{
WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
- rsclp->enabled = 0;
+ rcu_segcblist_clear_flags(rsclp, SEGCBLIST_ENABLED);
}
/*
* Mark the specified rcu_segcblist structure as offloaded. This
* structure must be empty.
*/
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp)
+void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
{
- rsclp->offloaded = 1;
+ if (offload) {
+ rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY);
+ rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED);
+ } else {
+ rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
+ }
}
/*
@@ -245,7 +344,7 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
struct rcu_head *rhp)
{
rcu_segcblist_inc_len(rsclp);
- smp_mb(); /* Ensure counts are updated before callback is enqueued. */
+ rcu_segcblist_inc_seglen(rsclp, RCU_NEXT_TAIL);
rhp->next = NULL;
WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp);
WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next);
@@ -274,6 +373,7 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
if (rsclp->tails[i] != rsclp->tails[i - 1])
break;
+ rcu_segcblist_inc_seglen(rsclp, i);
WRITE_ONCE(*rsclp->tails[i], rhp);
for (; i <= RCU_NEXT_TAIL; i++)
WRITE_ONCE(rsclp->tails[i], &rhp->next);
@@ -281,21 +381,6 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
}
/*
- * Extract only the counts from the specified rcu_segcblist structure,
- * and place them in the specified rcu_cblist structure. This function
- * supports both callback orphaning and invocation, hence the separation
- * of counts and callbacks. (Callbacks ready for invocation must be
- * orphaned and adopted separately from pending callbacks, but counts
- * apply to all callbacks. Locking must be used to make sure that
- * both orphaned-callbacks lists are consistent.)
- */
-void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp)
-{
- rclp->len = rcu_segcblist_xchg_len(rsclp, 0);
-}
-
-/*
* Extract only those callbacks ready to be invoked from the specified
* rcu_segcblist structure and place them in the specified rcu_cblist
* structure.
@@ -307,6 +392,7 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
if (!rcu_segcblist_ready_cbs(rsclp))
return; /* Nothing to do. */
+ rclp->len = rcu_segcblist_get_seglen(rsclp, RCU_DONE_TAIL);
*rclp->tail = rsclp->head;
WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]);
WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
@@ -314,6 +400,7 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
WRITE_ONCE(rsclp->tails[i], &rsclp->head);
+ rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0);
}
/*
@@ -330,11 +417,15 @@ void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
if (!rcu_segcblist_pend_cbs(rsclp))
return; /* Nothing to do. */
+ rclp->len = 0;
*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
- for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
+ for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) {
+ rclp->len += rcu_segcblist_get_seglen(rsclp, i);
WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]);
+ rcu_segcblist_set_seglen(rsclp, i, 0);
+ }
}
/*
@@ -345,7 +436,6 @@ void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp)
{
rcu_segcblist_add_len(rsclp, rclp->len);
- rclp->len = 0;
}
/*
@@ -359,6 +449,7 @@ void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
if (!rclp->head)
return; /* No callbacks to move. */
+ rcu_segcblist_add_seglen(rsclp, RCU_DONE_TAIL, rclp->len);
*rclp->tail = rsclp->head;
WRITE_ONCE(rsclp->head, rclp->head);
for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
@@ -379,6 +470,8 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
{
if (!rclp->head)
return; /* Nothing to do. */
+
+ rcu_segcblist_add_seglen(rsclp, RCU_NEXT_TAIL, rclp->len);
WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head);
WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail);
}
@@ -403,6 +496,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
break;
WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
+ rcu_segcblist_move_seglen(rsclp, i, RCU_DONE_TAIL);
}
/* If no callbacks moved, nothing more need be done. */
@@ -423,6 +517,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
break; /* No more callbacks. */
WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]);
+ rcu_segcblist_move_seglen(rsclp, i, j);
rsclp->gp_seq[j] = rsclp->gp_seq[i];
}
}
@@ -444,7 +539,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
*/
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
{
- int i;
+ int i, j;
WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
@@ -487,6 +582,10 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
if (rcu_segcblist_restempty(rsclp, i) || ++i >= RCU_NEXT_TAIL)
return false;
+ /* Accounting: everything below i is about to get merged into i. */
+ for (j = i + 1; j <= RCU_NEXT_TAIL; j++)
+ rcu_segcblist_move_seglen(rsclp, j, i);
+
/*
* Merge all later callbacks, including newly arrived callbacks,
* into the segment located by the for-loop above. Assign "seq"
@@ -514,13 +613,24 @@ void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_cblist donecbs;
struct rcu_cblist pendcbs;
+ lockdep_assert_cpus_held();
+
rcu_cblist_init(&donecbs);
rcu_cblist_init(&pendcbs);
- rcu_segcblist_extract_count(src_rsclp, &donecbs);
+
rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
+
+ /*
+ * No need smp_mb() before setting length to 0, because CPU hotplug
+ * lock excludes rcu_barrier.
+ */
+ rcu_segcblist_set_len(src_rsclp, 0);
+
rcu_segcblist_insert_count(dst_rsclp, &donecbs);
+ rcu_segcblist_insert_count(dst_rsclp, &pendcbs);
rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
+
rcu_segcblist_init(src_rsclp);
}
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 492262bcb591..9a19328ff251 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -15,6 +15,9 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
return READ_ONCE(rclp->len);
}
+/* Return number of callbacks in segmented callback list by summing seglen. */
+long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp);
+
void rcu_cblist_init(struct rcu_cblist *rclp);
void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp);
void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
@@ -50,19 +53,51 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
#endif
}
+static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
+ int flags)
+{
+ rsclp->flags |= flags;
+}
+
+static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
+ int flags)
+{
+ rsclp->flags &= ~flags;
+}
+
+static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
+ int flags)
+{
+ return READ_ONCE(rsclp->flags) & flags;
+}
+
/*
* Is the specified rcu_segcblist enabled, for example, not corresponding
* to an offline CPU?
*/
static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
{
- return rsclp->enabled;
+ return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED);
}
-/* Is the specified rcu_segcblist offloaded? */
+/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
{
- return IS_ENABLED(CONFIG_RCU_NOCB_CPU) && rsclp->offloaded;
+ if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+ !rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY))
+ return true;
+
+ return false;
+}
+
+static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
+{
+ int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED;
+
+ if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags)
+ return true;
+
+ return false;
}
/*
@@ -75,10 +110,22 @@ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
return !READ_ONCE(*READ_ONCE(rsclp->tails[seg]));
}
+/*
+ * Is the specified segment of the specified rcu_segcblist structure
+ * empty of callbacks?
+ */
+static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
+{
+ if (seg == RCU_DONE_TAIL)
+ return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
+ return rsclp->tails[seg - 1] == rsclp->tails[seg];
+}
+
void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp);
+void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v);
void rcu_segcblist_init(struct rcu_segcblist *rsclp);
void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp);
+void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload);
bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
@@ -88,8 +135,6 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
struct rcu_head *rhp);
bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
struct rcu_head *rhp);
-void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
- struct rcu_cblist *rclp);
void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 528ed10b78fd..99657ffa6688 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -85,6 +85,7 @@ torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(bool, gp_normal, false,
"Use normal (non-expedited) GP wait primitives");
+torture_param(bool, gp_poll, false, "Use polling GP wait primitives");
torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
@@ -97,6 +98,8 @@ torture_param(int, object_debug, 0,
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0,
"Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
+torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
torture_param(int, read_exit_delay, 13,
"Delay between read-then-exit episodes (s)");
torture_param(int, read_exit_burst, 16,
@@ -127,10 +130,12 @@ static char *torture_type = "rcu";
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)");
+static int nrealnocbers;
static int nrealreaders;
static struct task_struct *writer_task;
static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
+static struct task_struct **nocb_tasks;
static struct task_struct *stats_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
@@ -142,11 +147,22 @@ static struct task_struct *read_exit_task;
#define RCU_TORTURE_PIPE_LEN 10
+// Mailbox-like structure to check RCU global memory ordering.
+struct rcu_torture_reader_check {
+ unsigned long rtc_myloops;
+ int rtc_chkrdr;
+ unsigned long rtc_chkloops;
+ int rtc_ready;
+ struct rcu_torture_reader_check *rtc_assigner;
+} ____cacheline_internodealigned_in_smp;
+
+// Update-side data structure used to check RCU readers.
struct rcu_torture {
struct rcu_head rtort_rcu;
int rtort_pipe_count;
struct list_head rtort_free;
int rtort_mbtest;
+ struct rcu_torture_reader_check *rtort_chkp;
};
static LIST_HEAD(rcu_torture_freelist);
@@ -157,10 +173,13 @@ static DEFINE_SPINLOCK(rcu_torture_lock);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
+static struct rcu_torture_reader_check *rcu_torture_reader_mbchk;
static atomic_t n_rcu_torture_alloc;
static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
+static atomic_t n_rcu_torture_mbchk_fail;
+static atomic_t n_rcu_torture_mbchk_tries;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_barrier_error;
static long n_rcu_torture_boost_ktrerror;
@@ -174,6 +193,8 @@ static unsigned long n_read_exits;
static struct list_head rcu_torture_removed;
static unsigned long shutdown_jiffies;
static unsigned long start_gp_seq;
+static atomic_long_t n_nocb_offload;
+static atomic_long_t n_nocb_deoffload;
static int rcu_torture_writer_state;
#define RTWS_FIXED_DELAY 0
@@ -183,9 +204,11 @@ static int rcu_torture_writer_state;
#define RTWS_EXP_SYNC 4
#define RTWS_COND_GET 5
#define RTWS_COND_SYNC 6
-#define RTWS_SYNC 7
-#define RTWS_STUTTER 8
-#define RTWS_STOPPING 9
+#define RTWS_POLL_GET 7
+#define RTWS_POLL_WAIT 8
+#define RTWS_SYNC 9
+#define RTWS_STUTTER 10
+#define RTWS_STOPPING 11
static const char * const rcu_torture_writer_state_names[] = {
"RTWS_FIXED_DELAY",
"RTWS_DELAY",
@@ -194,6 +217,8 @@ static const char * const rcu_torture_writer_state_names[] = {
"RTWS_EXP_SYNC",
"RTWS_COND_GET",
"RTWS_COND_SYNC",
+ "RTWS_POLL_GET",
+ "RTWS_POLL_WAIT",
"RTWS_SYNC",
"RTWS_STUTTER",
"RTWS_STOPPING",
@@ -311,7 +336,9 @@ struct rcu_torture_ops {
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*exp_sync)(void);
- unsigned long (*get_state)(void);
+ unsigned long (*get_gp_state)(void);
+ unsigned long (*start_gp_poll)(void);
+ bool (*poll_gp_state)(unsigned long oldstate);
void (*cond_sync)(unsigned long oldstate);
call_rcu_func_t call;
void (*cb_barrier)(void);
@@ -386,7 +413,12 @@ static bool
rcu_torture_pipe_update_one(struct rcu_torture *rp)
{
int i;
+ struct rcu_torture_reader_check *rtrcp = READ_ONCE(rp->rtort_chkp);
+ if (rtrcp) {
+ WRITE_ONCE(rp->rtort_chkp, NULL);
+ smp_store_release(&rtrcp->rtc_ready, 1); // Pair with smp_load_acquire().
+ }
i = READ_ONCE(rp->rtort_pipe_count);
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
@@ -461,7 +493,7 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
- .get_state = get_state_synchronize_rcu,
+ .get_gp_state = get_state_synchronize_rcu,
.cond_sync = cond_synchronize_rcu,
.call = call_rcu,
.cb_barrier = rcu_barrier,
@@ -570,6 +602,21 @@ static void srcu_torture_synchronize(void)
synchronize_srcu(srcu_ctlp);
}
+static unsigned long srcu_torture_get_gp_state(void)
+{
+ return get_state_synchronize_srcu(srcu_ctlp);
+}
+
+static unsigned long srcu_torture_start_gp_poll(void)
+{
+ return start_poll_synchronize_srcu(srcu_ctlp);
+}
+
+static bool srcu_torture_poll_gp_state(unsigned long oldstate)
+{
+ return poll_state_synchronize_srcu(srcu_ctlp, oldstate);
+}
+
static void srcu_torture_call(struct rcu_head *head,
rcu_callback_t func)
{
@@ -601,6 +648,9 @@ static struct rcu_torture_ops srcu_ops = {
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
+ .get_gp_state = srcu_torture_get_gp_state,
+ .start_gp_poll = srcu_torture_start_gp_poll,
+ .poll_gp_state = srcu_torture_poll_gp_state,
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
@@ -1018,42 +1068,26 @@ rcu_torture_fqs(void *arg)
return 0;
}
+// Used by writers to randomly choose from the available grace-period
+// primitives. The only purpose of the initialization is to size the array.
+static int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, RTWS_COND_GET, RTWS_POLL_GET, RTWS_SYNC };
+static int nsynctypes;
+
/*
- * RCU torture writer kthread. Repeatedly substitutes a new structure
- * for that pointed to by rcu_torture_current, freeing the old structure
- * after a series of grace periods (the "pipeline").
+ * Determine which grace-period primitives are available.
*/
-static int
-rcu_torture_writer(void *arg)
+static void rcu_torture_write_types(void)
{
- bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
- int expediting = 0;
- unsigned long gp_snap;
bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
- bool gp_sync1 = gp_sync;
- int i;
- int oldnice = task_nice(current);
- struct rcu_torture *rp;
- struct rcu_torture *old_rp;
- static DEFINE_TORTURE_RANDOM(rand);
- bool stutter_waited;
- int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
- RTWS_COND_GET, RTWS_SYNC };
- int nsynctypes = 0;
-
- VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
- if (!can_expedite)
- pr_alert("%s" TORTURE_FLAG
- " GP expediting controlled from boot/sysfs for %s.\n",
- torture_type, cur_ops->name);
+ bool gp_poll1 = gp_poll, gp_sync1 = gp_sync;
/* Initialize synctype[] array. If none set, take default. */
- if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
- gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
- if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
+ if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_poll1 && !gp_sync1)
+ gp_cond1 = gp_exp1 = gp_normal1 = gp_poll1 = gp_sync1 = true;
+ if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) {
synctype[nsynctypes++] = RTWS_COND_GET;
pr_info("%s: Testing conditional GPs.\n", __func__);
- } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
+ } else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) {
pr_alert("%s: gp_cond without primitives.\n", __func__);
}
if (gp_exp1 && cur_ops->exp_sync) {
@@ -1068,12 +1102,46 @@ rcu_torture_writer(void *arg)
} else if (gp_normal && !cur_ops->deferred_free) {
pr_alert("%s: gp_normal without primitives.\n", __func__);
}
+ if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
+ synctype[nsynctypes++] = RTWS_POLL_GET;
+ pr_info("%s: Testing polling GPs.\n", __func__);
+ } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
+ pr_alert("%s: gp_poll without primitives.\n", __func__);
+ }
if (gp_sync1 && cur_ops->sync) {
synctype[nsynctypes++] = RTWS_SYNC;
pr_info("%s: Testing normal GPs.\n", __func__);
} else if (gp_sync && !cur_ops->sync) {
pr_alert("%s: gp_sync without primitives.\n", __func__);
}
+}
+
+/*
+ * RCU torture writer kthread. Repeatedly substitutes a new structure
+ * for that pointed to by rcu_torture_current, freeing the old structure
+ * after a series of grace periods (the "pipeline").
+ */
+static int
+rcu_torture_writer(void *arg)
+{
+ bool boot_ended;
+ bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
+ unsigned long cookie;
+ int expediting = 0;
+ unsigned long gp_snap;
+ int i;
+ int idx;
+ int oldnice = task_nice(current);
+ struct rcu_torture *rp;
+ struct rcu_torture *old_rp;
+ static DEFINE_TORTURE_RANDOM(rand);
+ bool stutter_waited;
+
+ VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
+ if (!can_expedite)
+ pr_alert("%s" TORTURE_FLAG
+ " GP expediting controlled from boot/sysfs for %s.\n",
+ torture_type, cur_ops->name);
if (WARN_ONCE(nsynctypes == 0,
"rcu_torture_writer: No update-side primitives.\n")) {
/*
@@ -1087,7 +1155,7 @@ rcu_torture_writer(void *arg)
do {
rcu_torture_writer_state = RTWS_FIXED_DELAY;
- schedule_timeout_uninterruptible(1);
+ torture_hrtimeout_us(500, 1000, &rand);
rp = rcu_torture_alloc();
if (rp == NULL)
continue;
@@ -1107,6 +1175,18 @@ rcu_torture_writer(void *arg)
atomic_inc(&rcu_torture_wcount[i]);
WRITE_ONCE(old_rp->rtort_pipe_count,
old_rp->rtort_pipe_count + 1);
+ if (cur_ops->get_gp_state && cur_ops->poll_gp_state) {
+ idx = cur_ops->readlock();
+ cookie = cur_ops->get_gp_state();
+ WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE &&
+ cur_ops->poll_gp_state(cookie),
+ "%s: Cookie check 1 failed %s(%d) %lu->%lu\n",
+ __func__,
+ rcu_torture_writer_state_getname(),
+ rcu_torture_writer_state,
+ cookie, cur_ops->get_gp_state());
+ cur_ops->readunlock(idx);
+ }
switch (synctype[torture_random(&rand) % nsynctypes]) {
case RTWS_DEF_FREE:
rcu_torture_writer_state = RTWS_DEF_FREE;
@@ -1119,15 +1199,21 @@ rcu_torture_writer(void *arg)
break;
case RTWS_COND_GET:
rcu_torture_writer_state = RTWS_COND_GET;
- gp_snap = cur_ops->get_state();
- i = torture_random(&rand) % 16;
- if (i != 0)
- schedule_timeout_interruptible(i);
- udelay(torture_random(&rand) % 1000);
+ gp_snap = cur_ops->get_gp_state();
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
rcu_torture_writer_state = RTWS_COND_SYNC;
cur_ops->cond_sync(gp_snap);
rcu_torture_pipe_update(old_rp);
break;
+ case RTWS_POLL_GET:
+ rcu_torture_writer_state = RTWS_POLL_GET;
+ gp_snap = cur_ops->start_gp_poll();
+ rcu_torture_writer_state = RTWS_POLL_WAIT;
+ while (!cur_ops->poll_gp_state(gp_snap))
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+ &rand);
+ rcu_torture_pipe_update(old_rp);
+ break;
case RTWS_SYNC:
rcu_torture_writer_state = RTWS_SYNC;
cur_ops->sync();
@@ -1137,6 +1223,14 @@ rcu_torture_writer(void *arg)
WARN_ON_ONCE(1);
break;
}
+ if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+ WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE &&
+ !cur_ops->poll_gp_state(cookie),
+ "%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
+ __func__,
+ rcu_torture_writer_state_getname(),
+ rcu_torture_writer_state,
+ cookie, cur_ops->get_gp_state());
}
WRITE_ONCE(rcu_torture_current_version,
rcu_torture_current_version + 1);
@@ -1155,12 +1249,13 @@ rcu_torture_writer(void *arg)
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
+ boot_ended = rcu_inkernel_boot_has_ended();
stutter_waited = stutter_wait("rcu_torture_writer");
if (stutter_waited &&
!READ_ONCE(rcu_fwd_cb_nodela