diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 12:00:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 12:00:13 -0700 |
commit | 9a45da9270b64b14e154093c28f746d861ab8c61 (patch) | |
tree | bbe75582da2e9d5f9cce4bc39a6cc32fcc171ee1 | |
parent | 68a32ba14177d4a21c4a9a941cf1d7aea86d436f (diff) | |
parent | 120b566d1df22a0a4543ac0e8aef875c49dd2c21 (diff) | |
download | linux-9a45da9270b64b14e154093c28f746d861ab8c61.tar.gz linux-9a45da9270b64b14e154093c28f746d861ab8c61.tar.bz2 linux-9a45da9270b64b14e154093c28f746d861ab8c61.zip |
Merge tag 'core-rcu-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar:
- Support for "N" as alias for last bit in bitmap parsing library (eg
using syntax like "nohz_full=2-N")
- kvfree_rcu updates
- mm_dump_obj() updates. (One of these is to mm, but was suggested by
Andrew Morton.)
- RCU callback offloading update
- Polling RCU grace-period interfaces
- Realtime-related RCU updates
- Tasks-RCU updates
- Torture-test updates
- Torture-test scripting updates
- Miscellaneous fixes
* tag 'core-rcu-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (77 commits)
rcutorture: Test start_poll_synchronize_rcu() and poll_state_synchronize_rcu()
rcu: Provide polling interfaces for Tiny RCU grace periods
torture: Fix kvm.sh --datestamp regex check
torture: Consolidate qemu-cmd duration editing into kvm-transform.sh
torture: Print proper vmlinux path for kvm-again.sh runs
torture: Make TORTURE_TRUST_MAKE available in kvm-again.sh environment
torture: Make kvm-transform.sh update jitter commands
torture: Add --duration argument to kvm-again.sh
torture: Add kvm-again.sh to rerun a previous torture-test
torture: Create a "batches" file for build reuse
torture: De-capitalize TORTURE_SUITE
torture: Make upper-case-only no-dot no-slash scenario names official
torture: Rename SRCU-t and SRCU-u to avoid lowercase characters
torture: Remove no-mpstat error message
torture: Record kvm-test-1-run.sh and kvm-test-1-run-qemu.sh PIDs
torture: Record jitter start/stop commands
torture: Extract kvm-test-1-run-qemu.sh from kvm-test-1-run.sh
torture: Record TORTURE_KCONFIG_GDB_ARG in qemu-cmd
torture: Abstract jitter.sh start/stop into scripts
rcu: Provide polling interfaces for Tree RCU grace periods
...
54 files changed, 1304 insertions, 422 deletions
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 3b0876c77355..588d97366a46 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -847,7 +847,7 @@ Symposium on Distributed Computing} 'It's entirely possible that the current user could be replaced by RCU and/or seqlocks, and we could get rid of brlocks entirely.' . - Steve Hemminger responds by replacing them with RCU. + Stephen Hemminger responds by replacing them with RCU. } } diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 24302cad174a..3996b54158bf 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -68,6 +68,13 @@ For example one can add to the command line following parameter: where the final item represents CPUs 100,101,125,126,150,151,... +The value "N" can be used to represent the numerically last CPU on the system, +i.e "foo_cpus=16-N" would be equivalent to "16-31" on a 32 core system. + +Keep in mind that "N" is dynamic, so if system changes cause the bitmap width +to change, such as less cores in the CPU list, then N and any ranges using N +will also change. Use the same on a small 4 core system, and "16-N" becomes +"16-3" and now the same boot input will be flagged as invalid (start > end). This document may not be entirely up to date and comprehensive. The command diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 70a30f65bfca..fa08ec0dfbe7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4077,9 +4077,7 @@ see CONFIG_RAS_CEC help text. rcu_nocbs= [KNL] - The argument is a cpu list, as described above, - except that the string "all" can be used to - specify every CPU on the system. + The argument is a cpu list, as described above. In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. @@ -4268,6 +4266,18 @@ rcuscale.kfree_rcu_test= [KNL] Set to measure performance of kfree_rcu() flooding. + rcuscale.kfree_rcu_test_double= [KNL] + Test the double-argument variant of kfree_rcu(). + If this parameter has the same value as + rcuscale.kfree_rcu_test_single, both the single- + and double-argument variants are tested. + + rcuscale.kfree_rcu_test_single= [KNL] + Test the single-argument variant of kfree_rcu(). + If this parameter has the same value as + rcuscale.kfree_rcu_test_double, both the single- + and double-argument variants are tested. + rcuscale.kfree_nthreads= [KNL] The number of threads running loops of kfree_rcu(). diff --git a/include/linux/mm.h b/include/linux/mm.h index 06094627f1c1..21115933b9b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3180,7 +3180,11 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, extern int sysctl_nr_trim_pages; +#ifdef CONFIG_PRINTK void mem_dump_obj(void *object); +#else +static inline void mem_dump_obj(void *object) {} +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 8afe886e85f1..3db96c4f45fd 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -109,7 +109,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | - * | handling callbacks. | + * | handling callbacks. Enable bypass queueing. | * ---------------------------------------------------------------------------- */ @@ -125,7 +125,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | - * | ignores callbacks. | + * | ignores callbacks. Bypass enqueue is enabled. | * ---------------------------------------------------------------------------- * | * v @@ -134,7 +134,8 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | + * | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable | + * | bypass enqueue. | * ---------------------------------------------------------------------------- * | * v diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index ff3e94779e73..d8afdb8784c1 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -161,7 +161,7 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] - * [1] Documentation/core-api/atomic_ops.rst around line 114 + * [1] Documentation/memory-barriers.txt around line 1533 * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6d855ef091ba..9455476c5ba2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -882,7 +882,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ -#define kfree_rcu kvfree_rcu +#define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf) /** * kvfree_rcu() - kvfree an object after a grace period. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2a97334eb786..35e0be326ffc 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -17,10 +17,9 @@ /* Never flag non-existent other CPUs! */ static inline bool rcu_eqs_special_set(int cpu) { return false; } -static inline unsigned long get_state_synchronize_rcu(void) -{ - return 0; -} +unsigned long get_state_synchronize_rcu(void); +unsigned long start_poll_synchronize_rcu(void); +bool poll_state_synchronize_rcu(unsigned long oldstate); static inline void cond_synchronize_rcu(unsigned long oldstate) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index df578b73960f..b89b54130f49 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -41,6 +41,8 @@ void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); unsigned long get_state_synchronize_rcu(void); +unsigned long start_poll_synchronize_rcu(void); +bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); void rcu_idle_enter(void); diff --git a/include/linux/slab.h b/include/linux/slab.h index 7ae604076767..0c97d788762c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -186,8 +186,10 @@ void kfree(const void *); void kfree_sensitive(const void *); size_t __ksize(const void *); size_t ksize(const void *); +#ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); +#endif #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR void __check_heap_object(const void *ptr, unsigned long n, struct page *page, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index df92211cf771..3de7be6dd17c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -241,7 +241,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else static inline bool vmalloc_dump_obj(void *object) { return false; } diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 5fc29400e1a2..c7711e9b6900 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -432,6 +432,34 @@ TRACE_EVENT_RCU(rcu_fqs, __entry->cpu, __entry->qsevent) ); +/* + * Tracepoint for RCU stall events. Takes a string identifying the RCU flavor + * and a string identifying which function detected the RCU stall as follows: + * + * "StallDetected": Scheduler-tick detects other CPU's stalls. + * "SelfDetected": Scheduler-tick detects a current CPU's stall. + * "ExpeditedStall": Expedited grace period detects stalls. + */ +TRACE_EVENT(rcu_stall_warning, + + TP_PROTO(const char *rcuname, const char *msg), + + TP_ARGS(rcuname, msg), + + TP_STRUCT__entry( + __field(const char *, rcuname) + __field(const char *, msg) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->msg = msg; + ), + + TP_printk("%s %s", + __entry->rcuname, __entry->msg) +); + #endif /* #if defined(CONFIG_TREE_RCU) */ /* diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 7f181c9675f7..aaa111237b60 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -261,8 +261,7 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) } /* - * Mark the specified rcu_segcblist structure as offloaded. This - * structure must be empty. + * Mark the specified rcu_segcblist structure as offloaded. */ void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 06491d5530db..dca51fe9c73f 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -625,6 +625,8 @@ rcu_scale_shutdown(void *arg) torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu()."); torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration."); torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees."); +torture_param(bool, kfree_rcu_test_double, false, "Do we run a kfree_rcu() double-argument scale test?"); +torture_param(bool, kfree_rcu_test_single, false, "Do we run a kfree_rcu() single-argument scale test?"); static struct task_struct **kfree_reader_tasks; static int kfree_nrealthreads; @@ -644,10 +646,13 @@ kfree_scale_thread(void *arg) struct kfree_obj *alloc_ptr; u64 start_time, end_time; long long mem_begin, mem_during = 0; + bool kfree_rcu_test_both; + DEFINE_TORTURE_RANDOM(tr); VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started"); set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); set_user_nice(current, MAX_NICE); + kfree_rcu_test_both = (kfree_rcu_test_single == kfree_rcu_test_double); start_time = ktime_get_mono_fast_ns(); @@ -670,7 +675,15 @@ kfree_scale_thread(void *arg) if (!alloc_ptr) return -ENOMEM; - kfree_rcu(alloc_ptr, rh); + // By default kfree_rcu_test_single and kfree_rcu_test_double are + // initialized to false. If both have the same value (false or true) + // both are randomly tested, otherwise only the one with value true + // is tested. + if ((kfree_rcu_test_single && !kfree_rcu_test_double) || + (kfree_rcu_test_both && torture_random(&tr) & 0x800)) + kfree_rcu(alloc_ptr); + else + kfree_rcu(alloc_ptr, rh); } cond_resched(); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 99657ffa6688..29d2f4c647d3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -245,11 +245,11 @@ static const char *rcu_torture_writer_state_getname(void) return rcu_torture_writer_state_names[i]; } -#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) -#define rcu_can_boost() 1 -#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ -#define rcu_can_boost() 0 -#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ +#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_PREEMPT_RT) +# define rcu_can_boost() 1 +#else +# define rcu_can_boost() 0 +#endif #ifdef CONFIG_RCU_TRACE static u64 notrace rcu_trace_clock_local(void) @@ -494,6 +494,8 @@ static struct rcu_torture_ops rcu_ops = { .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .get_gp_state = get_state_synchronize_rcu, + .start_gp_poll = start_poll_synchronize_rcu, + .poll_gp_state = poll_state_synchronize_rcu, .cond_sync = cond_synchronize_rcu, .call = call_rcu, .cb_barrier = rcu_barrier, @@ -923,9 +925,13 @@ static void rcu_torture_enable_rt_throttle(void) static bool rcu_torture_boost_failed(unsigned long start, unsigned long end) { + static int dbg_done; + if (end - start > test_boost_duration * HZ - HZ / 2) { VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed"); n_rcu_torture_boost_failure++; + if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) + cur_ops->gp_kthread_dbg(); return true; /* failed */ } @@ -948,8 +954,8 @@ static int rcu_torture_boost(void *arg) init_rcu_head_on_stack(&rbi.rcu); /* Each pass through the following loop does one boost-test cycle. */ do { - /* Track if the test failed already in this test interval? */ - bool failed = false; + bool failed = false; // Test failed already in this test interval + bool firsttime = true; /* Increment n_rcu_torture_boosts once per boost-test */ while (!kthread_should_stop()) { @@ -975,18 +981,17 @@ static int rcu_torture_boost(void *arg) /* Do one boost-test interval. */ endtime = oldstarttime + test_boost_duration * HZ; - call_rcu_time = jiffies; while (time_before(jiffies, endtime)) { /* If we don't have a callback in flight, post one. */ if (!smp_load_acquire(&rbi.inflight)) { /* RCU core before ->inflight = 1. */ smp_store_release(&rbi.inflight, 1); - call_rcu(&rbi.rcu, rcu_torture_boost_cb); + cur_ops->call(&rbi.rcu, rcu_torture_boost_cb); /* Check if the boost test failed */ - failed = failed || - rcu_torture_boost_failed(call_rcu_time, - jiffies); + if (!firsttime && !failed) + failed = rcu_torture_boost_failed(call_rcu_time, jiffies); call_rcu_time = jiffies; + firsttime = false; } if (stutter_wait("rcu_torture_boost")) sched_set_fifo_low(current); @@ -999,7 +1004,7 @@ static int rcu_torture_boost(void *arg) * this case the boost check would never happen in the above * loop so do another one here. */ - if (!failed && smp_load_acquire(&rbi.inflight)) + if (!firsttime && !failed && smp_load_acquire(&rbi.inflight)) rcu_torture_boost_failed(call_rcu_time, jiffies); /* @@ -1025,6 +1030,9 @@ checkwait: if (stutter_wait("rcu_torture_boost")) sched_set_fifo_low(current); } while (!torture_must_stop()); + while (smp_load_acquire(&rbi.inflight)) + schedule_timeout_uninterruptible(1); // rcu_barrier() deadlocks. + /* Clean up and exit. */ while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) { torture_shutdown_absorb("rcu_torture_boost"); @@ -1223,14 +1231,6 @@ rcu_torture_writer(void *arg) WARN_ON_ONCE(1); break; } - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE && - !cur_ops->poll_gp_state(cookie), - "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", - __func__, - rcu_torture_writer_state_getname(), - rcu_torture_writer_state, - cookie, cur_ops->get_gp_state()); } WRITE_ONCE(rcu_torture_current_version, rcu_torture_current_version + 1); @@ -1589,7 +1589,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) preempt_enable(); if (cur_ops->get_gp_state && cur_ops->poll_gp_state) WARN_ONCE(cur_ops->poll_gp_state(cookie), - "%s: Cookie check 3 failed %s(%d) %lu->%lu\n", + "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", __func__, rcu_torture_writer_state_getname(), rcu_torture_writer_state, @@ -1797,7 +1797,7 @@ rcu_torture_stats_print(void) WARN_ON_ONCE(n_rcu_torture_barrier_error); // rcu_barrier() WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread WARN_ON_ONCE(n_rcu_torture_boost_rterror); // can't set RT prio - WARN_ON_ONCE(n_rcu_torture_boost_failure); // RCU boost failed + WARN_ON_ONCE(n_rcu_torture_boost_failure); // boost failed (TIMER_SOFTIRQ RT prio?) WARN_ON_ONCE(i > 1); // Too-short grace period } pr_cont("Reader Pipe: "); @@ -1861,6 +1861,45 @@ rcu_torture_stats(void *arg) torture_shutdown_absorb("rcu_torture_stats"); } while (!torture_must_stop()); torture_kthread_stopping("rcu_torture_stats"); + + { + struct rcu_head *rhp; + struct kmem_cache *kcp; + static int z; + + kcp = kmem_cache_create("rcuscale", 136, 8, SLAB_STORE_USER, NULL); + rhp = kmem_cache_alloc(kcp, GFP_KERNEL); + pr_alert("mem_dump_obj() slab test: rcu_torture_stats = %px, &rhp = %px, rhp = %px, &z = %px\n", stats_task, &rhp, rhp, &z); + pr_alert("mem_dump_obj(ZERO_SIZE_PTR):"); + mem_dump_obj(ZERO_SIZE_PTR); + pr_alert("mem_dump_obj(NULL):"); + mem_dump_obj(NULL); + pr |