diff options
Diffstat (limited to 'kernel/rcu')
| -rw-r--r-- | kernel/rcu/Kconfig | 5 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 16 | ||||
| -rw-r--r-- | kernel/rcu/rcu_segcblist.c | 216 | ||||
| -rw-r--r-- | kernel/rcu/rcu_segcblist.h | 57 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 395 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 23 | ||||
| -rw-r--r-- | kernel/rcu/srcutiny.c | 77 | ||||
| -rw-r--r-- | kernel/rcu/srcutree.c | 147 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 104 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 101 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 367 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 60 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 4 |
15 files changed, 1339 insertions, 237 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index cdc57b4f6d48..3128b7cf8e1f 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -95,6 +95,7 @@ config TASKS_RUDE_RCU config TASKS_TRACE_RCU def_bool 0 + select IRQ_WORK help This option enables a task-based RCU implementation that uses explicit rcu_read_lock_trace() read-side markers, and allows @@ -188,8 +189,8 @@ config RCU_FAST_NO_HZ config RCU_BOOST bool "Enable RCU priority boosting" - depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT - default n + depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT + default y if PREEMPT_RT help This option boosts the priority of preempted RCU readers that block the current preemptible RCU grace period for too long. diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 59ef1ae6dc37..bf0827d4b659 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -378,7 +378,11 @@ do { \ smp_mb__after_unlock_lock(); \ } while (0) -#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock)) +#define raw_spin_unlock_rcu_node(p) \ +do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_unlock(&ACCESS_PRIVATE(p, lock)); \ +} while (0) #define raw_spin_lock_irq_rcu_node(p) \ do { \ @@ -387,7 +391,10 @@ do { \ } while (0) #define raw_spin_unlock_irq_rcu_node(p) \ - raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) +do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)); \ +} while (0) #define raw_spin_lock_irqsave_rcu_node(p, flags) \ do { \ @@ -396,7 +403,10 @@ do { \ } while (0) #define raw_spin_unlock_irqrestore_rcu_node(p, flags) \ - raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) +do { \ + lockdep_assert_irqs_disabled(); \ + raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags); \ +} while (0) #define raw_spin_trylock_rcu_node(p) \ ({ \ diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 2d2a6b6b9dfb..7f181c9675f7 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -7,10 +7,10 @@ * Authors: Paul E. McKenney <paulmck@linux.ibm.com> */ -#include <linux/types.h> -#include <linux/kernel.h> +#include <linux/cpu.h> #include <linux/interrupt.h> -#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/types.h> #include "rcu_segcblist.h" @@ -88,23 +88,135 @@ static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v) #endif } +/* Get the length of a segment of the rcu_segcblist structure. */ +static long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg) +{ + return READ_ONCE(rsclp->seglen[seg]); +} + +/* Return number of callbacks in segmented callback list by summing seglen. */ +long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp) +{ + long len = 0; + int i; + + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) + len += rcu_segcblist_get_seglen(rsclp, i); + + return len; +} + +/* Set the length of a segment of the rcu_segcblist structure. */ +static void rcu_segcblist_set_seglen(struct rcu_segcblist *rsclp, int seg, long v) +{ + WRITE_ONCE(rsclp->seglen[seg], v); +} + +/* Increase the numeric length of a segment by a specified amount. */ +static void rcu_segcblist_add_seglen(struct rcu_segcblist *rsclp, int seg, long v) +{ + WRITE_ONCE(rsclp->seglen[seg], rsclp->seglen[seg] + v); +} + +/* Move from's segment length to to's segment. */ +static void rcu_segcblist_move_seglen(struct rcu_segcblist *rsclp, int from, int to) +{ + long len; + + if (from == to) + return; + + len = rcu_segcblist_get_seglen(rsclp, from); + if (!len) + return; + + rcu_segcblist_add_seglen(rsclp, to, len); + rcu_segcblist_set_seglen(rsclp, from, 0); +} + +/* Increment segment's length. */ +static void rcu_segcblist_inc_seglen(struct rcu_segcblist *rsclp, int seg) +{ + rcu_segcblist_add_seglen(rsclp, seg, 1); +} + /* * Increase the numeric length of an rcu_segcblist structure by the * specified amount, which can be negative. This can cause the ->len * field to disagree with the actual number of callbacks on the structure. * This increase is fully ordered with respect to the callers accesses * both before and after. + * + * So why on earth is a memory barrier required both before and after + * the update to the ->len field??? + * + * The reason is that rcu_barrier() locklessly samples each CPU's ->len + * field, and if a given CPU's field is zero, avoids IPIing that CPU. + * This can of course race with both queuing and invoking of callbacks. + * Failing to correctly handle either of these races could result in + * rcu_barrier() failing to IPI a CPU that actually had callbacks queued + * which rcu_barrier() was obligated to wait on. And if rcu_barrier() + * failed to wait on such a callback, unloading certain kernel modules + * would result in calls to functions whose code was no longer present in + * the kernel, for but one example. + * + * Therefore, ->len transitions from 1->0 and 0->1 have to be carefully + * ordered with respect with both list modifications and the rcu_barrier(). + * + * The queuing case is CASE 1 and the invoking case is CASE 2. + * + * CASE 1: Suppose that CPU 0 has no callbacks queued, but invokes + * call_rcu() just as CPU 1 invokes rcu_barrier(). CPU 0's ->len field + * will transition from 0->1, which is one of the transitions that must + * be handled carefully. Without the full memory barriers after the ->len + * update and at the beginning of rcu_barrier(), the following could happen: + * + * CPU 0 CPU 1 + * + * call_rcu(). + * rcu_barrier() sees ->len as 0. + * set ->len = 1. + * rcu_barrier() does nothing. + * module is unloaded. + * callback invokes unloaded function! + * + * With the full barriers, any case where rcu_barrier() sees ->len as 0 will + * have unambiguously preceded the return from the racing call_rcu(), which + * means that this call_rcu() invocation is OK to not wait on. After all, + * you are supposed to make sure that any problematic call_rcu() invocations + * happen before the rcu_barrier(). + * + * + * CASE 2: Suppose that CPU 0 is invoking its last callback just as + * CPU 1 invokes rcu_barrier(). CPU 0's ->len field will transition from + * 1->0, which is one of the transitions that must be handled carefully. + * Without the full memory barriers before the ->len update and at the + * end of rcu_barrier(), the following could happen: + * + * CPU 0 CPU 1 + * + * start invoking last callback + * set ->len = 0 (reordered) + * rcu_barrier() sees ->len as 0 + * rcu_barrier() does nothing. + * module is unloaded + * callback executing after unloaded! + * + * With the full barriers, any case where rcu_barrier() sees ->len as 0 + * will be fully ordered after the completion of the callback function, + * so that the module unloading operation is completely safe. + * */ -static void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v) +void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v) { #ifdef CONFIG_RCU_NOCB_CPU - smp_mb__before_atomic(); /* Up to the caller! */ + smp_mb__before_atomic(); // Read header comment above. atomic_long_add(v, &rsclp->len); - smp_mb__after_atomic(); /* Up to the caller! */ + smp_mb__after_atomic(); // Read header comment above. #else - smp_mb(); /* Up to the caller! */ + smp_mb(); // Read header comment above. WRITE_ONCE(rsclp->len, rsclp->len + v); - smp_mb(); /* Up to the caller! */ + smp_mb(); // Read header comment above. #endif } @@ -120,26 +232,6 @@ void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp) } /* - * Exchange the numeric length of the specified rcu_segcblist structure - * with the specified value. This can cause the ->len field to disagree - * with the actual number of callbacks on the structure. This exchange is - * fully ordered with respect to the callers accesses both before and after. - */ -static long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v) -{ -#ifdef CONFIG_RCU_NOCB_CPU - return atomic_long_xchg(&rsclp->len, v); -#else - long ret = rsclp->len; - - smp_mb(); /* Up to the caller! */ - WRITE_ONCE(rsclp->len, v); - smp_mb(); /* Up to the caller! */ - return ret; -#endif -} - -/* * Initialize an rcu_segcblist structure. */ void rcu_segcblist_init(struct rcu_segcblist *rsclp) @@ -149,10 +241,12 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp) BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); rsclp->head = NULL; - for (i = 0; i < RCU_CBLIST_NSEGS; i++) + for (i = 0; i < RCU_CBLIST_NSEGS; i++) { rsclp->tails[i] = &rsclp->head; + rcu_segcblist_set_seglen(rsclp, i, 0); + } rcu_segcblist_set_len(rsclp, 0); - rsclp->enabled = 1; + rcu_segcblist_set_flags(rsclp, SEGCBLIST_ENABLED); } /* @@ -163,16 +257,21 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) { WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); - rsclp->enabled = 0; + rcu_segcblist_clear_flags(rsclp, SEGCBLIST_ENABLED); } /* * Mark the specified rcu_segcblist structure as offloaded. This * structure must be empty. */ -void rcu_segcblist_offload(struct rcu_segcblist *rsclp) +void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { - rsclp->offloaded = 1; + if (offload) { + rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY); + rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED); + } else { + rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED); + } } /* @@ -245,7 +344,7 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, struct rcu_head *rhp) { rcu_segcblist_inc_len(rsclp); - smp_mb(); /* Ensure counts are updated before callback is enqueued. */ + rcu_segcblist_inc_seglen(rsclp, RCU_NEXT_TAIL); rhp->next = NULL; WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); @@ -274,6 +373,7 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) if (rsclp->tails[i] != rsclp->tails[i - 1]) break; + rcu_segcblist_inc_seglen(rsclp, i); WRITE_ONCE(*rsclp->tails[i], rhp); for (; i <= RCU_NEXT_TAIL; i++) WRITE_ONCE(rsclp->tails[i], &rhp->next); @@ -281,21 +381,6 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, } /* - * Extract only the counts from the specified rcu_segcblist structure, - * and place them in the specified rcu_cblist structure. This function - * supports both callback orphaning and invocation, hence the separation - * of counts and callbacks. (Callbacks ready for invocation must be - * orphaned and adopted separately from pending callbacks, but counts - * apply to all callbacks. Locking must be used to make sure that - * both orphaned-callbacks lists are consistent.) - */ -void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rclp->len = rcu_segcblist_xchg_len(rsclp, 0); -} - -/* * Extract only those callbacks ready to be invoked from the specified * rcu_segcblist structure and place them in the specified rcu_cblist * structure. @@ -307,6 +392,7 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, if (!rcu_segcblist_ready_cbs(rsclp)) return; /* Nothing to do. */ + rclp->len = rcu_segcblist_get_seglen(rsclp, RCU_DONE_TAIL); *rclp->tail = rsclp->head; WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); @@ -314,6 +400,7 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) WRITE_ONCE(rsclp->tails[i], &rsclp->head); + rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0); } /* @@ -330,11 +417,15 @@ void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, if (!rcu_segcblist_pend_cbs(rsclp)) return; /* Nothing to do. */ + rclp->len = 0; *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); - for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) + for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) { + rclp->len += rcu_segcblist_get_seglen(rsclp, i); WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); + rcu_segcblist_set_seglen(rsclp, i, 0); + } } /* @@ -345,7 +436,6 @@ void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp) { rcu_segcblist_add_len(rsclp, rclp->len); - rclp->len = 0; } /* @@ -359,6 +449,7 @@ void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, if (!rclp->head) return; /* No callbacks to move. */ + rcu_segcblist_add_seglen(rsclp, RCU_DONE_TAIL, rclp->len); *rclp->tail = rsclp->head; WRITE_ONCE(rsclp->head, rclp->head); for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) @@ -379,6 +470,8 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, { if (!rclp->head) return; /* Nothing to do. */ + + rcu_segcblist_add_seglen(rsclp, RCU_NEXT_TAIL, rclp->len); WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); } @@ -403,6 +496,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) break; WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); + rcu_segcblist_move_seglen(rsclp, i, RCU_DONE_TAIL); } /* If no callbacks moved, nothing more need be done. */ @@ -423,6 +517,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) break; /* No more callbacks. */ WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); + rcu_segcblist_move_seglen(rsclp, i, j); rsclp->gp_seq[j] = rsclp->gp_seq[i]; } } @@ -444,7 +539,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) */ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) { - int i; + int i, j; WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) @@ -487,6 +582,10 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) if (rcu_segcblist_restempty(rsclp, i) || ++i >= RCU_NEXT_TAIL) return false; + /* Accounting: everything below i is about to get merged into i. */ + for (j = i + 1; j <= RCU_NEXT_TAIL; j++) + rcu_segcblist_move_seglen(rsclp, j, i); + /* * Merge all later callbacks, including newly arrived callbacks, * into the segment located by the for-loop above. Assign "seq" @@ -514,13 +613,24 @@ void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, struct rcu_cblist donecbs; struct rcu_cblist pendcbs; + lockdep_assert_cpus_held(); + rcu_cblist_init(&donecbs); rcu_cblist_init(&pendcbs); - rcu_segcblist_extract_count(src_rsclp, &donecbs); + rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); + + /* + * No need smp_mb() before setting length to 0, because CPU hotplug + * lock excludes rcu_barrier. + */ + rcu_segcblist_set_len(src_rsclp, 0); + rcu_segcblist_insert_count(dst_rsclp, &donecbs); + rcu_segcblist_insert_count(dst_rsclp, &pendcbs); rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); + rcu_segcblist_init(src_rsclp); } diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 492262bcb591..9a19328ff251 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -15,6 +15,9 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) return READ_ONCE(rclp->len); } +/* Return number of callbacks in segmented callback list by summing seglen. */ +long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp); + void rcu_cblist_init(struct rcu_cblist *rclp); void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp); void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, @@ -50,19 +53,51 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) #endif } +static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp, + int flags) +{ + rsclp->flags |= flags; +} + +static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp, + int flags) +{ + rsclp->flags &= ~flags; +} + +static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp, + int flags) +{ + return READ_ONCE(rsclp->flags) & flags; +} + /* * Is the specified rcu_segcblist enabled, for example, not corresponding * to an offline CPU? */ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) { - return rsclp->enabled; + return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED); } -/* Is the specified rcu_segcblist offloaded? */ +/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) { - return IS_ENABLED(CONFIG_RCU_NOCB_CPU) && rsclp->offloaded; + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + !rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY)) + return true; + + return false; +} + +static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp) +{ + int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED; + + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags) + return true; + + return false; } /* @@ -75,10 +110,22 @@ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) return !READ_ONCE(*READ_ONCE(rsclp->tails[seg])); } +/* + * Is the specified segment of the specified rcu_segcblist structure + * empty of callbacks? + */ +static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) +{ + if (seg == RCU_DONE_TAIL) + return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; + return rsclp->tails[seg - 1] == rsclp->tails[seg]; +} + void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp); +void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v); void rcu_segcblist_init(struct rcu_segcblist *rsclp); void rcu_segcblist_disable(struct rcu_segcblist *rsclp); -void rcu_segcblist_offload(struct rcu_segcblist *rsclp); +void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload); bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); @@ -88,8 +135,6 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, struct rcu_head *rhp); bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, struct rcu_head *rhp); -void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp); void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp); void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 528ed10b78fd..99657ffa6688 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -85,6 +85,7 @@ torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); +torture_param(bool, gp_poll, false, "Use polling GP wait primitives"); torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers"); @@ -97,6 +98,8 @@ torture_param(int, object_debug, 0, torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable"); +torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable"); +torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)"); torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)"); torture_param(int, read_exit_burst, 16, @@ -127,10 +130,12 @@ static char *torture_type = "rcu"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)"); +static int nrealnocbers; static int nrealreaders; static struct task_struct *writer_task; static struct task_struct **fakewriter_tasks; static struct task_struct **reader_tasks; +static struct task_struct **nocb_tasks; static struct task_struct *stats_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; @@ -142,11 +147,22 @@ static struct task_struct *read_exit_task; #define RCU_TORTURE_PIPE_LEN 10 +// Mailbox-like structure to check RCU global memory ordering. +struct rcu_torture_reader_check { + unsigned long rtc_myloops; + int rtc_chkrdr; + unsigned long rtc_chkloops; + int rtc_ready; + struct rcu_torture_reader_check *rtc_assigner; +} ____cacheline_internodealigned_in_smp; + +// Update-side data structure used to check RCU readers. struct rcu_torture { struct rcu_head rtort_rcu; int rtort_pipe_count; struct list_head rtort_free; int rtort_mbtest; + struct rcu_torture_reader_check *rtort_chkp; }; static LIST_HEAD(rcu_torture_freelist); @@ -157,10 +173,13 @@ static DEFINE_SPINLOCK(rcu_torture_lock); static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count); static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch); static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; +static struct rcu_torture_reader_check *rcu_torture_reader_mbchk; static atomic_t n_rcu_torture_alloc; static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; +static atomic_t n_rcu_torture_mbchk_fail; +static atomic_t n_rcu_torture_mbchk_tries; static atomic_t n_rcu_torture_error; static long n_rcu_torture_barrier_error; static long n_rcu_torture_boost_ktrerror; @@ -174,6 +193,8 @@ static unsigned long n_read_exits; static struct list_head rcu_torture_removed; static unsigned long shutdown_jiffies; static unsigned long start_gp_seq; +static atomic_long_t n_nocb_offload; +static atomic_long_t n_nocb_deoffload; static int rcu_torture_writer_state; #define RTWS_FIXED_DELAY 0 @@ -183,9 +204,11 @@ static int rcu_torture_writer_state; #define RTWS_EXP_SYNC 4 #define RTWS_COND_GET 5 #define RTWS_COND_SYNC 6 -#define RTWS_SYNC 7 -#define RTWS_STUTTER 8 -#define RTWS_STOPPING 9 +#define RTWS_POLL_GET 7 +#define RTWS_POLL_WAIT 8 +#define RTWS_SYNC 9 +#define RTWS_STUTTER 10 +#define RTWS_STOPPING 11 static const char * const rcu_torture_writer_state_names[] = { "RTWS_FIXED_DELAY", "RTWS_DELAY", @@ -194,6 +217,8 @@ static const char * const rcu_torture_writer_state_names[] = { "RTWS_EXP_SYNC", "RTWS_COND_GET", "RTWS_COND_SYNC", + "RTWS_POLL_GET", + "RTWS_POLL_WAIT", "RTWS_SYNC", "RTWS_STUTTER", "RTWS_STOPPING", @@ -311,7 +336,9 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*exp_sync)(void); - unsigned long (*get_state)(void); + unsigned long (*get_gp_state)(void); + unsigned long (*start_gp_poll)(void); + bool (*poll_gp_state)(unsigned long oldstate); void (*cond_sync)(unsigned long oldstate); call_rcu_func_t call; void (*cb_barrier)(void); @@ -386,7 +413,12 @@ static bool rcu_torture_pipe_update_one(struct rcu_torture *rp) { int i; + struct rcu_torture_reader_check *rtrcp = READ_ONCE(rp->rtort_chkp); + if (rtrcp) { + WRITE_ONCE(rp->rtort_chkp, NULL); + smp_store_release(&rtrcp->rtc_ready, 1); // Pair with smp_load_acquire(). + } i = READ_ONCE(rp->rtort_pipe_count); if (i > RCU_TORTURE_PIPE_LEN) i = RCU_TORTURE_PIPE_LEN; @@ -461,7 +493,7 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, - .get_state = get_state_synchronize_rcu, + .get_gp_state = get_state_synchronize_rcu, .cond_sync = cond_synchronize_rcu, .call = call_rcu, .cb_barrier = rcu_barrier, @@ -570,6 +602,21 @@ static void srcu_torture_synchronize(void) synchronize_srcu(srcu_ctlp); } +static unsigned long srcu_torture_get_gp_state(void) +{ + return get_state_synchronize_srcu(srcu_ctlp); +} + +static unsigned long srcu_torture_start_gp_poll(void) +{ + return start_poll_synchronize_srcu(srcu_ctlp); +} + +static bool srcu_torture_poll_gp_state(unsigned long oldstate) +{ + return poll_state_synchronize_srcu(srcu_ctlp, oldstate); +} + static void srcu_torture_call(struct rcu_head *head, rcu_callback_t func) { @@ -601,6 +648,9 @@ static struct rcu_torture_ops srcu_ops = { .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, .exp_sync = srcu_torture_synchronize_expedited, + .get_gp_state = srcu_torture_get_gp_state, + .start_gp_poll = srcu_torture_start_gp_poll, + .poll_gp_state = srcu_torture_poll_gp_state, .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, @@ -1018,42 +1068,26 @@ rcu_torture_fqs(void *arg) return 0; } +// Used by writers to randomly choose from the available grace-period +// primitives. The only purpose of the initialization is to size the array. +static int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, RTWS_COND_GET, RTWS_POLL_GET, RTWS_SYNC }; +static int nsynctypes; + /* - * RCU torture writer kthread. Repeatedly substitutes a new structure - * for that pointed to by rcu_torture_current, freeing the old structure - * after a series of grace periods (the "pipeline"). + * Determine which grace-period primitives are available. */ -static int -rcu_torture_writer(void *arg) +static void rcu_torture_write_types(void) { - bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); - int expediting = 0; - unsigned long gp_snap; bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; - bool gp_sync1 = gp_sync; - int i; - int oldnice = task_nice(current); - struct rcu_torture *rp; - struct rcu_torture *old_rp; - static DEFINE_TORTURE_RANDOM(rand); - bool stutter_waited; - int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, - RTWS_COND_GET, RTWS_SYNC }; - int nsynctypes = 0; - - VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); - if (!can_expedite) - pr_alert("%s" TORTURE_FLAG - " GP expediting controlled from boot/sysfs for %s.\n", - torture_type, cur_ops->name); + bool gp_poll1 = gp_poll, gp_sync1 = gp_sync; /* Initialize synctype[] array. If none set, take default. */ - if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) - gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; - if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) { + if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_poll1 && !gp_sync1) + gp_cond1 = gp_exp1 = gp_normal1 = gp_poll1 = gp_sync1 = true; + if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) { synctype[nsynctypes++] = RTWS_COND_GET; pr_info("%s: Testing conditional GPs.\n", __func__); - } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) { + } else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) { pr_alert("%s: gp_cond without primitives.\n", __func__); } if (gp_exp1 && cur_ops->exp_sync) { @@ -1068,12 +1102,46 @@ rcu_torture_writer(void *arg) } else if (gp_normal && !cur_ops->deferred_free) { pr_alert("%s: gp_normal without primitives.\n", __func__); } + if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) { + synctype[nsynctypes++] = RTWS_POLL_GET; + pr_info("%s: Testing polling GPs.\n", __func__); + } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { + pr_alert("%s: gp_poll without primitives.\n", __func__); + } if (gp_sync1 && cur_ops->sync) { synctype[nsynctypes++] = RTWS_SYNC; pr_info("%s: Testing normal GPs.\n", __func__); } else if (gp_sync && !cur_ops->sync) { pr_alert("%s: gp_sync without primitives.\n", __func__); } +} + +/* + * RCU torture writer kthread. Repeatedly substitutes a new structure + * for that pointed to by rcu_torture_current, freeing the old structure + * after a series of grace periods (the "pipeline"). + */ +static int +rcu_torture_writer(void *arg) +{ + bool boot_ended; + bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); + unsigned long cookie; + int expediting = 0; + unsigned long gp_snap; + int i; + int idx; + int oldnice = task_nice(current); + struct rcu_torture *rp; + struct rcu_torture *old_rp; + static DEFINE_TORTURE_RANDOM(rand); + bool stutter_waited; + + VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); + if (!can_expedite) + pr_alert("%s" TORTURE_FLAG + " GP expediting controlled from boot/sysfs for %s.\n", + torture_type, cur_ops->name); if (WARN_ONCE(nsynctypes == 0, "rcu_torture_writer: No update-side primitives.\n")) { /* @@ -1087,7 +1155,7 @@ rcu_torture_writer(void *arg) do { rcu_torture_writer_state = RTWS_FIXED_DELAY; - schedule_timeout_uninterruptible(1); + torture_hrtimeout_us(500, 1000, &rand); rp = rcu_torture_alloc(); if (rp == NULL) continue; @@ -1107,6 +1175,18 @@ rcu_torture_writer(void *arg) atomic_inc(&rcu_torture_wcount[i]); WRITE_ONCE(old_rp->rtort_pipe_count, old_rp->rtort_pipe_count + 1); + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) { + idx = cur_ops->readlock(); + cookie = cur_ops->get_gp_state(); + WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE && + cur_ops->poll_gp_state(cookie), + "%s: Cookie check 1 failed %s(%d) %lu->%lu\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cookie, cur_ops->get_gp_state()); + cur_ops->readunlock(idx); + } switch (synctype[torture_random(&rand) % nsynctypes]) { case RTWS_DEF_FREE: rcu_torture_writer_state = RTWS_DEF_FREE; @@ -1119,15 +1199,21 @@ rcu_torture_writer(void *arg) break; case RTWS_COND_GET: rcu_torture_writer_state = RTWS_COND_GET; - gp_snap = cur_ops->get_state(); - i = torture_random(&rand) % 16; - if (i != 0) - schedule_timeout_interruptible(i); - udelay(torture_random(&rand) % 1000); + gp_snap = cur_ops->get_gp_state(); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); rcu_torture_writer_state = RTWS_COND_SYNC; cur_ops->cond_sync(gp_snap); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET: + rcu_torture_writer_state = RTWS_POLL_GET; + gp_snap = cur_ops->start_gp_poll(); + rcu_torture_writer_state = RTWS_POLL_WAIT; + while (!cur_ops->poll_gp_state(gp_snap)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_SYNC: rcu_torture_writer_state = RTWS_SYNC; cur_ops->sync(); @@ -1137,6 +1223,14 @@ rcu_torture_writer(void *arg) WARN_ON_ONCE(1); break; } + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE && + !cur_ops->poll_gp_state(cookie), + "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cookie, cur_ops->get_gp_state()); } WRITE_ONCE(rcu_torture_current_version, rcu_torture_current_version + 1); @@ -1155,12 +1249,13 @@ rcu_torture_writer(void *arg) !rcu_gp_is_normal(); } rcu_torture_writer_state = RTWS_STUTTER; + boot_ended = rcu_inkernel_boot_has_ended(); stutter_waited = stutter_wait("rcu_torture_writer"); if (stutter_waited && !READ_ONCE(rcu_fwd_cb_nodela |
