diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 16:28:19 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 16:28:19 -0700 |
| commit | 94d18ee9340e00ee3455bb45661484093e3b2674 (patch) | |
| tree | 6274a11e0b1201e0f1b7181649af922972d46f94 /kernel | |
| parent | d75a43c645c26ab58118bd35405666a12971350d (diff) | |
| parent | 4a0fa886ab79ea85e8d1be2b0df143d8249779be (diff) | |
| download | linux-94d18ee9340e00ee3455bb45661484093e3b2674.tar.gz linux-94d18ee9340e00ee3455bb45661484093e3b2674.tar.bz2 linux-94d18ee9340e00ee3455bb45661484093e3b2674.zip | |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar:
"This cycle's RCU changes were:
- A few more RCU flavor consolidation cleanups.
- Updates to RCU's list-traversal macros improving lockdep usability.
- Forward-progress improvements for no-CBs CPUs: Avoid ignoring
incoming callbacks during grace-period waits.
- Forward-progress improvements for no-CBs CPUs: Use ->cblist
structure to take advantage of others' grace periods.
- Also added a small commit that avoids needlessly inflicting
scheduler-clock ticks on callback-offloaded CPUs.
- Forward-progress improvements for no-CBs CPUs: Reduce contention on
->nocb_lock guarding ->cblist.
- Forward-progress improvements for no-CBs CPUs: Add ->nocb_bypass
list to further reduce contention on ->nocb_lock guarding ->cblist.
- Miscellaneous fixes.
- Torture-test updates.
- minor LKMM updates"
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (86 commits)
MAINTAINERS: Update from paulmck@linux.ibm.com to paulmck@kernel.org
rcu: Don't include <linux/ktime.h> in rcutiny.h
rcu: Allow rcu_do_batch() to dynamically adjust batch sizes
rcu/nocb: Don't wake no-CBs GP kthread if timer posted under overload
rcu/nocb: Reduce __call_rcu_nocb_wake() leaf rcu_node ->lock contention
rcu/nocb: Reduce nocb_cb_wait() leaf rcu_node ->lock contention
rcu/nocb: Advance CBs after merge in rcutree_migrate_callbacks()
rcu/nocb: Avoid synchronous wakeup in __call_rcu_nocb_wake()
rcu/nocb: Print no-CBs diagnostics when rcutorture writer unduly delayed
rcu/nocb: EXP Check use and usefulness of ->nocb_lock_contended
rcu/nocb: Add bypass callback queueing
rcu/nocb: Atomic ->len field in rcu_segcblist structure
rcu/nocb: Unconditionally advance and wake for excessive CBs
rcu/nocb: Reduce ->nocb_lock contention with separate ->nocb_gp_lock
rcu/nocb: Reduce contention at no-CBs invocation-done time
rcu/nocb: Reduce contention at no-CBs registry-time CB advancement
rcu/nocb: Round down for number of no-CBs grace-period kthreads
rcu/nocb: Avoid ->nocb_lock capture by corresponding CPU
rcu/nocb: Avoid needless wakeups of no-CBs grace-period kthread
rcu/nocb: Make __call_rcu_nocb_wake() safe for many callbacks
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/Kconfig.debug | 11 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 1 | ||||
| -rw-r--r-- | kernel/rcu/rcu_segcblist.c | 174 | ||||
| -rw-r--r-- | kernel/rcu/rcu_segcblist.h | 54 | ||||
| -rw-r--r-- | kernel/rcu/rcuperf.c | 10 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 30 | ||||
| -rw-r--r-- | kernel/rcu/srcutree.c | 5 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 205 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 81 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 8 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 1195 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 9 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 105 | ||||
| -rw-r--r-- | kernel/sched/core.c | 57 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 5 | ||||
| -rw-r--r-- | kernel/torture.c | 2 | ||||
| -rw-r--r-- | kernel/trace/ftrace_internal.h | 8 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 4 |
19 files changed, 1263 insertions, 703 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4861cf8e274b..4aca3f4379d2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -620,7 +620,7 @@ static void print_lock(struct held_lock *hlock) return; } - printk(KERN_CONT "%p", hlock->instance); + printk(KERN_CONT "%px", hlock->instance); print_lock_name(lock); printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); } diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 5ec3ea4028e2..4aa02eee8f6c 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -8,6 +8,17 @@ menu "RCU Debugging" config PROVE_RCU def_bool PROVE_LOCKING +config PROVE_RCU_LIST + bool "RCU list lockdep debugging" + depends on PROVE_RCU && RCU_EXPERT + default n + help + Enable RCU lockdep checking for list usages. By default it is + turned off since there are several list RCU users that still + need to be converted to pass a lockdep expression. To prevent + false-positive splats, we keep it default disabled but once all + users are converted, we can remove this config option. + config TORTURE_TEST tristate default n diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 5290b01de534..8fd4f82c9b3d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -227,6 +227,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) #ifdef CONFIG_RCU_STALL_COMMON +extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_timeout; int rcu_jiffies_till_stall_check(void); diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 9bd5f6023c21..495c58ce1640 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -24,6 +24,49 @@ void rcu_cblist_init(struct rcu_cblist *rclp) } /* + * Enqueue an rcu_head structure onto the specified callback list. + * This function assumes that the callback is non-lazy because it + * is intended for use by no-CBs CPUs, which do not distinguish + * between lazy and non-lazy RCU callbacks. + */ +void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp) +{ + *rclp->tail = rhp; + rclp->tail = &rhp->next; + WRITE_ONCE(rclp->len, rclp->len + 1); +} + +/* + * Flush the second rcu_cblist structure onto the first one, obliterating + * any contents of the first. If rhp is non-NULL, enqueue it as the sole + * element of the second rcu_cblist structure, but ensuring that the second + * rcu_cblist structure, if initially non-empty, always appears non-empty + * throughout the process. If rdp is NULL, the second rcu_cblist structure + * is instead initialized to empty. + */ +void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, + struct rcu_cblist *srclp, + struct rcu_head *rhp) +{ + drclp->head = srclp->head; + if (drclp->head) + drclp->tail = srclp->tail; + else + drclp->tail = &drclp->head; + drclp->len = srclp->len; + drclp->len_lazy = srclp->len_lazy; + if (!rhp) { + rcu_cblist_init(srclp); + } else { + rhp->next = NULL; + srclp->head = rhp; + srclp->tail = &rhp->next; + WRITE_ONCE(srclp->len, 1); + srclp->len_lazy = 0; + } +} + +/* * Dequeue the oldest rcu_head structure from the specified callback * list. This function assumes that the callback is non-lazy, but * the caller can later invoke rcu_cblist_dequeued_lazy() if it @@ -44,6 +87,67 @@ struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) return rhp; } +/* Set the length of an rcu_segcblist structure. */ +void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v) +{ +#ifdef CONFIG_RCU_NOCB_CPU + atomic_long_set(&rsclp->len, v); +#else + WRITE_ONCE(rsclp->len, v); +#endif +} + +/* + * Increase the numeric length of an rcu_segcblist structure by the + * specified amount, which can be negative. This can cause the ->len + * field to disagree with the actual number of callbacks on the structure. + * This increase is fully ordered with respect to the callers accesses + * both before and after. + */ +void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v) +{ +#ifdef CONFIG_RCU_NOCB_CPU + smp_mb__before_atomic(); /* Up to the caller! */ + atomic_long_add(v, &rsclp->len); + smp_mb__after_atomic(); /* Up to the caller! */ +#else + smp_mb(); /* Up to the caller! */ + WRITE_ONCE(rsclp->len, rsclp->len + v); + smp_mb(); /* Up to the caller! */ +#endif +} + +/* + * Increase the numeric length of an rcu_segcblist structure by one. + * This can cause the ->len field to disagree with the actual number of + * callbacks on the structure. This increase is fully ordered with respect + * to the callers accesses both before and after. + */ +void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp) +{ + rcu_segcblist_add_len(rsclp, 1); +} + +/* + * Exchange the numeric length of the specified rcu_segcblist structure + * with the specified value. This can cause the ->len field to disagree + * with the actual number of callbacks on the structure. This exchange is + * fully ordered with respect to the callers accesses both before and after. + */ +long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v) +{ +#ifdef CONFIG_RCU_NOCB_CPU + return atomic_long_xchg(&rsclp->len, v); +#else + long ret = rsclp->len; + + smp_mb(); /* Up to the caller! */ + WRITE_ONCE(rsclp->len, v); + smp_mb(); /* Up to the caller! */ + return ret; +#endif +} + /* * Initialize an rcu_segcblist structure. */ @@ -56,8 +160,9 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp) rsclp->head = NULL; for (i = 0; i < RCU_CBLIST_NSEGS; i++) rsclp->tails[i] = &rsclp->head; - rsclp->len = 0; + rcu_segcblist_set_len(rsclp, 0); rsclp->len_lazy = 0; + rsclp->enabled = 1; } /* @@ -69,7 +174,16 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); - rsclp->tails[RCU_NEXT_TAIL] = NULL; + rsclp->enabled = 0; +} + +/* + * Mark the specified rcu_segcblist structure as offloaded. This + * structure must be empty. + */ +void rcu_segcblist_offload(struct rcu_segcblist *rsclp) +{ + rsclp->offloaded = 1; } /* @@ -118,6 +232,18 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) } /* + * Return false if there are no CBs awaiting grace periods, otherwise, + * return true and store the nearest waited-upon grace period into *lp. + */ +bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp) +{ + if (!rcu_segcblist_pend_cbs(rsclp)) + return false; + *lp = rsclp->gp_seq[RCU_WAIT_TAIL]; + return true; +} + +/* * Enqueue the specified callback onto the specified rcu_segcblist * structure, updating accounting as needed. Note that the ->len * field may be accessed locklessly, hence the WRITE_ONCE(). @@ -129,13 +255,13 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, struct rcu_head *rhp, bool lazy) { - WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ + rcu_segcblist_inc_len(rsclp); if (lazy) rsclp->len_lazy++; smp_mb(); /* Ensure counts are updated before callback is enqueued. */ rhp->next = NULL; - *rsclp->tails[RCU_NEXT_TAIL] = rhp; - rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; + WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); + WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); } /* @@ -155,7 +281,7 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, if (rcu_segcblist_n_cbs(rsclp) == 0) return false; - WRITE_ONCE(rsclp->len, rsclp->len + 1); + rcu_segcblist_inc_len(rsclp); if (lazy) rsclp->len_lazy++; smp_mb(); /* Ensure counts are updated before callback is entrained. */ @@ -163,9 +289,9 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) if (rsclp->tails[i] != rsclp->tails[i - 1]) break; - *rsclp->tails[i] = rhp; + WRITE_ONCE(*rsclp->tails[i], rhp); for (; i <= RCU_NEXT_TAIL; i++) - rsclp->tails[i] = &rhp->next; + WRITE_ONCE(rsclp->tails[i], &rhp->next); return true; } @@ -182,9 +308,8 @@ void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp) { rclp->len_lazy += rsclp->len_lazy; - rclp->len += rsclp->len; rsclp->len_lazy = 0; - WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ + rclp->len = rcu_segcblist_xchg_len(rsclp, 0); } /* @@ -200,12 +325,12 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, if (!rcu_segcblist_ready_cbs(rsclp)) return; /* Nothing to do. */ *rclp->tail = rsclp->head; - rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; + WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); + WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); rclp->tail = rsclp->tails[RCU_DONE_TAIL]; for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) - rsclp->tails[i] = &rsclp->head; + WRITE_ONCE(rsclp->tails[i], &rsclp->head); } /* @@ -224,9 +349,9 @@ void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, return; /* Nothing to do. */ *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; + WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) - rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; + WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]); } /* @@ -237,8 +362,7 @@ void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp) { rsclp->len_lazy += rclp->len_lazy; - /* ->len sampled locklessly. */ - WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); + rcu_segcblist_add_len(rsclp, rclp->len); rclp->len_lazy = 0; rclp->len = 0; } @@ -255,10 +379,10 @@ void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, if (!rclp->head) return; /* No callbacks to move. */ *rclp->tail = rsclp->head; - rsclp->head = rclp->head; + WRITE_ONCE(rsclp->head, rclp->head); for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) if (&rsclp->head == rsclp->tails[i]) - rsclp->tails[i] = rclp->tail; + WRITE_ONCE(rsclp->tails[i], rclp->tail); else break; rclp->head = NULL; @@ -274,8 +398,8 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, { if (!rclp->head) return; /* Nothing to do. */ - *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; - rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; + WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head); + WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail); rclp->head = NULL; rclp->tail = &rclp->head; } @@ -299,7 +423,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) break; - rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; + WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]); } /* If no callbacks moved, nothing more need be done. */ @@ -308,7 +432,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) /* Clean up tail pointers that might have been misordered above. */ for (j = RCU_WAIT_TAIL; j < i; j++) - rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; + WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); /* * Callbacks moved, so clean up the misordered ->tails[] pointers @@ -319,7 +443,7 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) break; /* No more callbacks. */ - rsclp->tails[j] = rsclp->tails[i]; + WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]); rsclp->gp_seq[j] = rsclp->gp_seq[i]; } } @@ -384,7 +508,7 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) * structure other than in the RCU_NEXT_TAIL segment. */ for (; i < RCU_NEXT_TAIL; i++) { - rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; + WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]); rsclp->gp_seq[i] = seq; } return true; diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 71b64648464e..815c2fdd3fcc 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -9,6 +9,12 @@ #include <linux/rcu_segcblist.h> +/* Return number of callbacks in the specified callback list. */ +static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) +{ + return READ_ONCE(rclp->len); +} + /* * Account for the fact that a previously dequeued callback turned out * to be marked as lazy. @@ -19,6 +25,10 @@ static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) } void rcu_cblist_init(struct rcu_cblist *rclp); +void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp); +void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, + struct rcu_cblist *srclp, + struct rcu_head *rhp); struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); /* @@ -36,13 +46,17 @@ struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); */ static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) { - return !rsclp->head; + return !READ_ONCE(rsclp->head); } /* Return number of callbacks in segmented callback list. */ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) { +#ifdef CONFIG_RCU_NOCB_CPU + return atomic_long_read(&rsclp->len); +#else return READ_ONCE(rsclp->len); +#endif } /* Return number of lazy callbacks in segmented callback list. */ @@ -54,16 +68,22 @@ static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) /* Return number of lazy callbacks in segmented callback list. */ static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) { - return rsclp->len - rsclp->len_lazy; + return rcu_segcblist_n_cbs(rsclp) - rsclp->len_lazy; } /* * Is the specified rcu_segcblist enabled, for example, not corresponding - * to an offline or callback-offloaded CPU? + * to an offline CPU? */ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) { - return !!rsclp->tails[RCU_NEXT_TAIL]; + return rsclp->enabled; +} + +/* Is the specified rcu_segcblist offloaded? */ +static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) +{ + return rsclp->offloaded; } /* @@ -73,36 +93,18 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) */ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) { - return !*rsclp->tails[seg]; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) -{ - return rsclp->head; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); - return rsclp->tails[RCU_NEXT_TAIL]; + return !READ_ONCE(*READ_ONCE(rsclp->tails[seg])); } +void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp); void rcu_segcblist_init(struct rcu_segcblist *rsclp); void rcu_segcblist_disable(struct rcu_segcblist *rsclp); +void rcu_segcblist_offload(struct rcu_segcblist *rsclp); bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); +bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp); void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, struct rcu_head *rhp, bool lazy); bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 7a6890b23c5f..5a879d073c1c 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -89,7 +89,7 @@ torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); -MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)"); +MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)"); static int nrealreaders; static int nrealwriters; @@ -375,6 +375,14 @@ rcu_perf_writer(void *arg) if (holdoff) schedule_timeout_uninterruptible(holdoff * HZ); + /* + * Wait until rcu_end_inkernel_boot() is called for normal GP tests + * so that RCU is not always expedited for normal GP tests. + * The system_state test is approximate, but works well in practice. + */ + while (!gp_exp && system_state != SYSTEM_RUNNING) + schedule_timeout_uninterruptible(1); + t = ktime_get_mono_fast_ns(); if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) { t_rcu_perf_writer_started = t; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index fce4e7e6f502..3c9feca1eab1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -161,6 +161,7 @@ static atomic_long_t n_rcu_torture_timers; static long n_barrier_attempts; static long n_barrier_successes; /* did rcu_barrier test succeed? */ static struct list_head rcu_torture_removed; +static unsigned long shutdown_jiffies; static int rcu_torture_writer_state; #define RTWS_FIXED_DELAY 0 @@ -228,6 +229,15 @@ static u64 notrace rcu_trace_clock_local(void) } #endif /* #else #ifdef CONFIG_RCU_TRACE */ +/* + * Stop aggressive CPU-hog tests a bit before the end of the test in order + * to avoid interfering with test shutdown. + */ +static bool shutdown_time_arrived(void) +{ + return shutdown_secs && time_after(jiffies, shutdown_jiffies - 30 * HZ); +} + static unsigned long boost_starttime; /* jiffies of next boost test start. */ static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ @@ -1713,12 +1723,14 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) } // Give the scheduler a chance, even on nohz_full CPUs. -static void rcu_torture_fwd_prog_cond_resched(void) +static void rcu_torture_fwd_prog_cond_resched(unsigned long iter) { if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) { - if (need_resched()) + // Real call_rcu() floods hit userspace, so emulate that. + if (need_resched() || (iter & 0xfff)) schedule(); } else { + // No userspace emulation: CB invocation throttles call_rcu() cond_resched(); } } @@ -1746,7 +1758,7 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) spin_unlock_irqrestore(&rcu_fwd_lock, flags); kfree(rfcp); freed++; - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(freed); } return freed; } @@ -1785,15 +1797,17 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) WRITE_ONCE(rcu_fwd_startat, jiffies); stopat = rcu_fwd_startat + dur; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { idx = cur_ops->readlock(); udelay(10); cur_ops->readunlock(idx); if (!fwd_progress_need_resched || need_resched()) - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(1); } (*tested_tries)++; if (!time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { (*tested)++; cver = READ_ONCE(rcu_torture_current_version) - cver; @@ -1852,6 +1866,7 @@ static void rcu_torture_fwd_prog_cr(void) gps = cur_ops->get_gp_seq(); rcu_launder_gp_seq_start = gps; while (time_before(jiffies, stopat) && + !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { rfcp = READ_ONCE(rcu_fwd_cb_head); rfcpn = NULL; @@ -1875,7 +1890,7 @@ static void rcu_torture_fwd_prog_cr(void) rfcp->rfc_gps = 0; } cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); - rcu_torture_fwd_prog_cond_resched(); + rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); } stoppedat = jiffies; n_launders_cb_snap = READ_ONCE(n_launders_cb); @@ -1884,7 +1899,8 @@ static void rcu_torture_fwd_prog_cr(void) cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ (void)rcu_torture_fwd_prog_cbfree(); - if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) { + if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop) && + !shutdown_time_arrived()) { WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", __func__, @@ -2160,6 +2176,7 @@ rcu_torture_cleanup(void) return; } + show_rcu_gp_kthreads(); rcu_torture_barrier_cleanup(); torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task); torture_stop_kthread(rcu_torture_stall, stall_task); @@ -2465,6 +2482,7 @@ rcu_torture_init(void) goto unwind; rcutor_hp = firsterr; } + shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); if (firsterr) goto unwind; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index cf0e886314f2..5dffade2d7cd 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1279,8 +1279,9 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) c0 = l0 - u0; c1 = l1 - u1; - pr_cont(" %d(%ld,%ld %1p)", - cpu, c0, c1, rcu_segcblist_head(&sdp->srcu_cblist)); + pr_cont(" %d(%ld,%ld %c)", + cpu, c0, c1, + "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]); s0 += c0; s1 += c1; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a14e5fbbea46..71395e91b876 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -56,6 +56,7 @@ #include <linux/smpboot.h> #include <linux/jiffies.h> #include <linux/sched/isolation.h> +#include <linux/sched/clock.h> #include "../time/tick-internal.h" #include "tree.h" @@ -210,9 +211,9 @@ static long rcu_get_n_cbs_cpu(int cpu) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - if (rcu_segcblist_is_enabled(&rdp->cblist)) /* Online normal CPU? */ + if (rcu_segcblist_is_enabled(&rdp->cblist)) return rcu_segcblist_n_cbs(&rdp->cblist); - return rcu_get_n_cbs_nocb_cpu(rdp); /* Works for offline, too. */ + return 0; } void rcu_softirq_qs(void) @@ -416,6 +417,12 @@ module_param(qlowmark, long, 0444); static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX; static bool rcu_kick_kthreads; +static int rcu_divisor = 7; +module_param(rcu_divisor, int, 0644); + +/* Force an exit from rcu_do_batch() after 3 milliseconds. */ +static long rcu_resched_ns = 3 * NSEC_PER_MSEC; +module_param(rcu_resched_ns, long, 0644); /* * How long the grace period must be before we start recruiting @@ -1251,6 +1258,7 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp) unsigned long gp_seq_req; bool ret = false; + rcu_lockdep_assert_cblist_protected(rdp); raw_lockdep_assert_held_rcu_node(rnp); /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ @@ -1292,7 +1300,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, unsigned long c; bool needwake; - lockdep_assert_irqs_disabled(); + rcu_lockdep_assert_cblist_protected(rdp); c = rcu_seq_snap(&rcu_state.gp_seq); if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { /* Old request still live, so mark recent callbacks. */ @@ -1318,6 +1326,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, */ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) { + rcu_lockdep_assert_cblist_protected(rdp); raw_lockdep_assert_held_rcu_node(rnp); /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ @@ -1335,6 +1344,21 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) } /* + * Move and classify callbacks, but only if doing so won't require + * that the RCU grace-period kthread be awakened. + */ +static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, + struct rcu_data *rdp) +{ + rcu_lockdep_assert_cblist_protected(rdp); + if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || + !raw_spin_trylock_rcu_node(rnp)) + return; + WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); + raw_spin_unlock_rcu_node(rnp); +} + +/* * Update CPU-local rcu_data state to record the beginnings and ends of * grace periods. The caller must hold the ->lock of the leaf rcu_node * structure corresponding to the current CPU, and must have irqs disabled. @@ -1342,8 +1366,10 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) */ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) { - bool ret; + bool ret = false; bool need_gp; + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + rcu_segcblist_is_offloaded(&rdp->cblist); raw_lockdep_assert_held_rcu_node(rnp); @@ -1353,10 +1379,12 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) /* Handle the ends of any preceding grace periods first. */ if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || unlikely(READ_ONCE(rdp->gpwrap))) { - ret = rcu_advance_cbs(rnp, rdp); /* Advance callbacks. */ + if (!offloaded) + ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */ trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend")); } else { - ret = rcu_accelerate_cbs(rnp, rdp); /* Recent callbacks. */ + if (!offloaded) + ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */ } /* Now handle the beginnings of any new-to-this-CPU grace periods. */ @@ -1657,6 +1685,7 @@ static void rcu_gp_cleanup(void) unsigned long gp_duration; bool needgp = false; unsigned long new_gp_seq; + bool offloaded; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(); struct swait_queue_head *sq; @@ -1722,7 +1751,9 @@ static void rcu_gp_cleanup(void) needgp = true; } /* Advance CBs to reduce false positives below. */ - if (!rcu_accelerate_cbs(rnp, rdp) && needgp) { + offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + rcu_segcblist_is_offloaded(&rdp->cblist); + if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); rcu_state.gp_req_activity = jiffies; trace_rcu_grace_period(rcu_state.name, @@ -1916,7 +1947,9 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) { unsigned long flags; unsigned long mask; - bool needwake; + bool needwake = false; + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + rcu_segcblist_is_offloaded(&rdp->cblist); struct rcu_node *rnp; rnp = rdp->mynode; @@ -1943,7 +1976,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. */ - needwake = rcu_accelerate_cbs(rnp, rdp); + if (!offloaded) + needwake = rcu_accelerate_cbs(rnp, rdp); rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); /* ^^^ Released rnp->lock */ |
