From 095777c417db142970adeb776fa0cb10810b8122 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 22 Jul 2015 14:07:27 -0700
Subject: locktorture: Support rtmutex torturing

Real time mutexes is one of the few general primitives
that we do not have in locktorture. Address this -- a few
considerations:

o To spice things up, enable competing thread(s) to become
rt, such that we can stress different prio boosting paths
in the rtmutex code. Introduce a ->task_boost callback,
only used by rtmutex-torturer. Tasks will boost/deboost
around every 50k (arbitrarily) lock/unlock operations.

o Hold times are similar to what we have for other locks:
only occasionally having longer hold times (per ~200k ops).
So we roughly do two full rt boost+deboosting ops with
short hold times.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/locktorture.c | 114 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 32244186f1f2..e1ca7a2fae91 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -17,12 +17,14 @@
  *
  * Copyright (C) IBM Corporation, 2014
  *
- * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *          Davidlohr Bueso <dave@stgolabs.net>
  *	Based on kernel/rcu/torture.c.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/rwlock.h>
 #include <linux/mutex.h>
@@ -91,11 +93,13 @@ struct lock_torture_ops {
 	void (*init)(void);
 	int (*writelock)(void);
 	void (*write_delay)(struct torture_random_state *trsp);
+	void (*task_boost)(struct torture_random_state *trsp);
 	void (*writeunlock)(void);
 	int (*readlock)(void);
 	void (*read_delay)(struct torture_random_state *trsp);
 	void (*readunlock)(void);
-	unsigned long flags;
+
+	unsigned long flags; /* for irq spinlocks */
 	const char *name;
 };
 
@@ -139,9 +143,15 @@ static void torture_lock_busted_write_unlock(void)
 	  /* BUGGY, do not use in real life!!! */
 }
 
+static void torture_boost_dummy(struct torture_random_state *trsp)
+{
+	/* Only rtmutexes care about priority */
+}
+
 static struct lock_torture_ops lock_busted_ops = {
 	.writelock	= torture_lock_busted_write_lock,
 	.write_delay	= torture_lock_busted_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_lock_busted_write_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -185,6 +195,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_ops = {
 	.writelock	= torture_spin_lock_write_lock,
 	.write_delay	= torture_spin_lock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_spin_lock_write_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -211,6 +222,7 @@ __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_irq_ops = {
 	.writelock	= torture_spin_lock_write_lock_irq,
 	.write_delay	= torture_spin_lock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_lock_spin_write_unlock_irq,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -275,6 +287,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_ops = {
 	.writelock	= torture_rwlock_write_lock,
 	.write_delay	= torture_rwlock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwlock_write_unlock,
 	.readlock       = torture_rwlock_read_lock,
 	.read_delay     = torture_rwlock_read_delay,
@@ -315,6 +328,7 @@ __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_irq_ops = {
 	.writelock	= torture_rwlock_write_lock_irq,
 	.write_delay	= torture_rwlock_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwlock_write_unlock_irq,
 	.readlock       = torture_rwlock_read_lock_irq,
 	.read_delay     = torture_rwlock_read_delay,
@@ -354,6 +368,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex)
 static struct lock_torture_ops mutex_lock_ops = {
 	.writelock	= torture_mutex_lock,
 	.write_delay	= torture_mutex_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_mutex_unlock,
 	.readlock       = NULL,
 	.read_delay     = NULL,
@@ -361,6 +376,90 @@ static struct lock_torture_ops mutex_lock_ops = {
 	.name		= "mutex_lock"
 };
 
+#ifdef CONFIG_RT_MUTEXES
+static DEFINE_RT_MUTEX(torture_rtmutex);
+
+static int torture_rtmutex_lock(void) __acquires(torture_rtmutex)
+{
+	rt_mutex_lock(&torture_rtmutex);
+	return 0;
+}
+
+static void torture_rtmutex_boost(struct torture_random_state *trsp)
+{
+	int policy;
+	struct sched_param param;
+	const unsigned int factor = 50000; /* yes, quite arbitrary */
+
+	if (!rt_task(current)) {
+		/*
+		 * (1) Boost priority once every ~50k operations. When the
+		 * task tries to take the lock, the rtmutex it will account
+		 * for the new priority, and do any corresponding pi-dance.
+		 */
+		if (!(torture_random(trsp) %
+		      (cxt.nrealwriters_stress * factor))) {
+			policy = SCHED_FIFO;
+			param.sched_priority = MAX_RT_PRIO - 1;
+		} else /* common case, do nothing */
+			return;
+	} else {
+		/*
+		 * The task will remain boosted for another ~500k operations,
+		 * then restored back to its original prio, and so forth.
+		 *
+		 * When @trsp is nil, we want to force-reset the task for
+		 * stopping the kthread.
+		 */
+		if (!trsp || !(torture_random(trsp) %
+			       (cxt.nrealwriters_stress * factor * 2))) {
+			policy = SCHED_NORMAL;
+			param.sched_priority = 0;
+		} else /* common case, do nothing */
+			return;
+	}
+
+	sched_setscheduler_nocheck(current, policy, &param);
+}
+
+static void torture_rtmutex_delay(struct torture_random_state *trsp)
+{
+	const unsigned long shortdelay_us = 2;
+	const unsigned long longdelay_ms = 100;
+
+	/*
+	 * We want a short delay mostly to emulate likely code, and
+	 * we want a long delay occasionally to force massive contention.
+	 */
+	if (!(torture_random(trsp) %
+	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+		mdelay(longdelay_ms);
+	if (!(torture_random(trsp) %
+	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+		udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+		preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+
+static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
+{
+	rt_mutex_unlock(&torture_rtmutex);
+}
+
+static struct lock_torture_ops rtmutex_lock_ops = {
+	.writelock	= torture_rtmutex_lock,
+	.write_delay	= torture_rtmutex_delay,
+	.task_boost     = torture_rtmutex_boost,
+	.writeunlock	= torture_rtmutex_unlock,
+	.readlock       = NULL,
+	.read_delay     = NULL,
+	.readunlock     = NULL,
+	.name		= "rtmutex_lock"
+};
+#endif
+
 static DECLARE_RWSEM(torture_rwsem);
 static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
 {
@@ -419,6 +518,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem)
 static struct lock_torture_ops rwsem_lock_ops = {
 	.writelock	= torture_rwsem_down_write,
 	.write_delay	= torture_rwsem_write_delay,
+	.task_boost     = torture_boost_dummy,
 	.writeunlock	= torture_rwsem_up_write,
 	.readlock       = torture_rwsem_down_read,
 	.read_delay     = torture_rwsem_read_delay,
@@ -442,6 +542,7 @@ static int lock_torture_writer(void *arg)
 		if ((torture_random(&rand) & 0xfffff) == 0)
 			schedule_timeout_uninterruptible(1);
 
+		cxt.cur_ops->task_boost(&rand);
 		cxt.cur_ops->writelock();
 		if (WARN_ON_ONCE(lock_is_write_held))
 			lwsp->n_lock_fail++;
@@ -456,6 +557,8 @@ static int lock_torture_writer(void *arg)
 
 		stutter_wait("lock_torture_writer");
 	} while (!torture_must_stop());
+
+	cxt.cur_ops->task_boost(NULL); /* reset prio */
 	torture_kthread_stopping("lock_torture_writer");
 	return 0;
 }
@@ -642,6 +745,9 @@ static int __init lock_torture_init(void)
 		&spin_lock_ops, &spin_lock_irq_ops,
 		&rw_lock_ops, &rw_lock_irq_ops,
 		&mutex_lock_ops,
+#ifdef CONFIG_RT_MUTEXES
+		&rtmutex_lock_ops,
+#endif
 		&rwsem_lock_ops,
 	};
 
@@ -676,6 +782,10 @@ static int __init lock_torture_init(void)
 	if (strncmp(torture_type, "mutex", 5) == 0)
 		cxt.debug_lock = true;
 #endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+	if (strncmp(torture_type, "rtmutex", 7) == 0)
+		cxt.debug_lock = true;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if ((strncmp(torture_type, "spin", 4) == 0) ||
 	    (strncmp(torture_type, "rw_lock", 7) == 0))
-- 
cgit v1.2.3


From 302707fd7cd341a23dbc448a335d432ad0069c20 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 31 Aug 2015 20:21:59 -0700
Subject: locking/percpu-rwsem: Export symbols for locktorture

This commit exports percpu_down_read(), percpu_down_write(),
__percpu_init_rwsem(), percpu_up_read(), and percpu_up_write() to allow
locktorture to test them when built as a module.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/percpu-rwsem.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f32567254867..e2621fbbcbd1 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -22,6 +22,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
 void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 {
@@ -87,6 +88,7 @@ void percpu_down_read(struct percpu_rw_semaphore *brw)
 	/* avoid up_read()->rwsem_release() */
 	__up_read(&brw->rw_sem);
 }
+EXPORT_SYMBOL_GPL(percpu_down_read);
 
 int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
 {
@@ -112,6 +114,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw)
 	if (atomic_dec_and_test(&brw->slow_read_ctr))
 		wake_up_all(&brw->write_waitq);
 }
+EXPORT_SYMBOL_GPL(percpu_up_read);
 
 static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 {
@@ -163,6 +166,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	/* wait for all readers to complete their percpu_up_read() */
 	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
 }
+EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *brw)
 {
@@ -176,3 +180,4 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
 	/* the last writer unblocks update_fast_ctr() */
 	atomic_dec(&brw->write_ctr);
 }
+EXPORT_SYMBOL_GPL(percpu_up_write);
-- 
cgit v1.2.3


From 617783dd99704331e22636388c932450e02ee636 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 29 Aug 2015 14:46:29 -0700
Subject: locktorture: Add torture tests for percpu_rwsem

This commit adds percpu_rwsem tests based on the earlier rwsem tests.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/locktorture.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index e1ca7a2fae91..8545e12598ce 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -36,6 +36,7 @@
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 #include <linux/torture.h>
 
 MODULE_LICENSE("GPL");
@@ -526,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = {
 	.name		= "rwsem_lock"
 };
 
+#include <linux/percpu-rwsem.h>
+static struct percpu_rw_semaphore pcpu_rwsem;
+
+void torture_percpu_rwsem_init(void)
+{
+	BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
+}
+
+static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
+{
+	percpu_down_write(&pcpu_rwsem);
+	return 0;
+}
+
+static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem)
+{
+	percpu_up_write(&pcpu_rwsem);
+}
+
+static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem)
+{
+	percpu_down_read(&pcpu_rwsem);
+	return 0;
+}
+
+static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
+{
+	percpu_up_read(&pcpu_rwsem);
+}
+
+static struct lock_torture_ops percpu_rwsem_lock_ops = {
+	.init		= torture_percpu_rwsem_init,
+	.writelock	= torture_percpu_rwsem_down_write,
+	.write_delay	= torture_rwsem_write_delay,
+	.task_boost     = torture_boost_dummy,
+	.writeunlock	= torture_percpu_rwsem_up_write,
+	.readlock       = torture_percpu_rwsem_down_read,
+	.read_delay     = torture_rwsem_read_delay,
+	.readunlock     = torture_percpu_rwsem_up_read,
+	.name		= "percpu_rwsem_lock"
+};
+
 /*
  * Lock torture writer kthread.  Repeatedly acquires and releases
  * the lock, checking for duplicate acquisitions.
@@ -749,6 +792,7 @@ static int __init lock_torture_init(void)
 		&rtmutex_lock_ops,
 #endif
 		&rwsem_lock_ops,
+		&percpu_rwsem_lock_ops,
 	};
 
 	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
-- 
cgit v1.2.3


From 3836f5337f74fedc15981688c3c31dbf4293ae84 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 30 Aug 2015 03:29:58 -0700
Subject: torture: Consolidate cond_resched_rcu_qs() into stutter_wait()

This commit moves cond_resched_rcu_qs() into stutter_wait(), saving
a line and also avoiding RCU CPU stall warnings from all torture
loops containing a stutter_wait().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/rcutorture.c | 2 --
 kernel/torture.c        | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 77192953dee5..8a65b7d471a0 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -768,7 +768,6 @@ static int rcu_torture_boost(void *arg)
 				}
 				call_rcu_time = jiffies;
 			}
-			cond_resched_rcu_qs();
 			stutter_wait("rcu_torture_boost");
 			if (torture_must_stop())
 				goto checkwait;
@@ -1208,7 +1207,6 @@ rcu_torture_reader(void *arg)
 		__this_cpu_inc(rcu_torture_batch[completed]);
 		preempt_enable();
 		cur_ops->readunlock(idx);
-		cond_resched_rcu_qs();
 		stutter_wait("rcu_torture_reader");
 	} while (!torture_must_stop());
 	if (irqreader && cur_ops->irq_capable) {
diff --git a/kernel/torture.c b/kernel/torture.c
index 3e4840633d3e..44aa462d033f 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -523,6 +523,7 @@ static int stutter;
  */
 void stutter_wait(const char *title)
 {
+	cond_resched_rcu_qs();
 	while (READ_ONCE(stutter_pause_test) ||
 	       (torture_runnable && !READ_ONCE(*torture_runnable))) {
 		if (stutter_pause_test)
-- 
cgit v1.2.3


From cc44ca848f5e517aeca9f5eabbe13609a3f71450 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:44 +0200
Subject: rcu: Create rcu_sync infrastructure

The rcu_sync infrastructure can be thought of as infrastructure to be
used to implement reader-writer primitives having extremely lightweight
readers during times when there are no writers.  The first use is in
the percpu_rwsem used by the VFS subsystem.

This infrastructure is functionally equivalent to

        struct rcu_sync_struct {
                atomic_t counter;
        };

	/* Check possibility of fast-path read-side operations. */
        static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
        {
                return atomic_read(&rss->counter) == 0;
        }

	/* Tell readers to use slowpaths. */
        static inline void rcu_sync_enter(struct rcu_sync_struct *rss)
        {
                atomic_inc(&rss->counter);
                synchronize_sched();
        }

	/* Allow readers to once again use fastpaths. */
        static inline void rcu_sync_exit(struct rcu_sync_struct *rss)
        {
                synchronize_sched();
                atomic_dec(&rss->counter);
        }

The main difference is that it records the state and only calls
synchronize_sched() if required.  At least some of the calls to
synchronize_sched() will be optimized away when rcu_sync_enter() and
rcu_sync_exit() are invoked repeatedly in quick succession.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/Makefile |   2 +-
 kernel/rcu/sync.c   | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 1 deletion(-)
 create mode 100644 kernel/rcu/sync.c

(limited to 'kernel')

diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 50a808424b06..61a16569ffbf 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,4 +1,4 @@
-obj-y += update.o
+obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
new file mode 100644
index 000000000000..0a11df43be23
--- /dev/null
+++ b/kernel/rcu/sync.c
@@ -0,0 +1,175 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#include <linux/rcu_sync.h>
+#include <linux/sched.h>
+
+enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
+enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+
+#define	rss_lock	gp_wait.lock
+
+/**
+ * rcu_sync_init() - Initialize an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be initialized
+ * @type: Flavor of RCU with which to synchronize rcu_sync structure
+ */
+void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
+{
+	memset(rsp, 0, sizeof(*rsp));
+	init_waitqueue_head(&rsp->gp_wait);
+
+	switch (type) {
+	case RCU_SYNC:
+		rsp->sync = synchronize_rcu;
+		rsp->call = call_rcu;
+		break;
+
+	case RCU_SCHED_SYNC:
+		rsp->sync = synchronize_sched;
+		rsp->call = call_rcu_sched;
+		break;
+
+	case RCU_BH_SYNC:
+		rsp->sync = synchronize_rcu_bh;
+		rsp->call = call_rcu_bh;
+		break;
+	}
+}
+
+/**
+ * rcu_sync_enter() - Force readers onto slowpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who need readers to make use of
+ * a slowpath during the update.  After this function returns, all
+ * subsequent calls to rcu_sync_is_idle() will return false, which
+ * tells readers to stay off their fastpaths.  A later call to
+ * rcu_sync_exit() re-enables reader slowpaths.
+ *
+ * When called in isolation, rcu_sync_enter() must wait for a grace
+ * period, however, closely spaced calls to rcu_sync_enter() can
+ * optimize away the grace-period wait via a state machine implemented
+ * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
+ */
+void rcu_sync_enter(struct rcu_sync *rsp)
+{
+	bool need_wait, need_sync;
+
+	spin_lock_irq(&rsp->rss_lock);
+	need_wait = rsp->gp_count++;
+	need_sync = rsp->gp_state == GP_IDLE;
+	if (need_sync)
+		rsp->gp_state = GP_PENDING;
+	spin_unlock_irq(&rsp->rss_lock);
+
+	BUG_ON(need_wait && need_sync);
+
+	if (need_sync) {
+		rsp->sync();
+		rsp->gp_state = GP_PASSED;
+		wake_up_all(&rsp->gp_wait);
+	} else if (need_wait) {
+		wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
+	} else {
+		/*
+		 * Possible when there's a pending CB from a rcu_sync_exit().
+		 * Nobody has yet been allowed the 'fast' path and thus we can
+		 * avoid doing any sync(). The callback will get 'dropped'.
+		 */
+		BUG_ON(rsp->gp_state != GP_PASSED);
+	}
+}
+
+/**
+ * rcu_sync_func() - Callback function managing reader access to fastpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is passed to one of the call_rcu() functions by
+ * rcu_sync_exit(), so that it is invoked after a grace period following the
+ * that invocation of rcu_sync_exit().  It takes action based on events that
+ * have taken place in the meantime, so that closely spaced rcu_sync_enter()
+ * and rcu_sync_exit() pairs need not wait for a grace period.
+ *
+ * If another rcu_sync_enter() is invoked before the grace period
+ * ended, reset state to allow the next rcu_sync_exit() to let the
+ * readers back onto their fastpaths (after a grace period).  If both
+ * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
+ * before the grace period ended, re-invoke call_rcu() on behalf of that
+ * rcu_sync_exit().  Otherwise, set all state back to idle so that readers
+ * can again use their fastpaths.
+ */
+static void rcu_sync_func(struct rcu_head *rcu)
+{
+	struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head);
+	unsigned long flags;
+
+	BUG_ON(rsp->gp_state != GP_PASSED);
+	BUG_ON(rsp->cb_state == CB_IDLE);
+
+	spin_lock_irqsave(&rsp->rss_lock, flags);
+	if (rsp->gp_count) {
+		/*
+		 * A new rcu_sync_begin() has happened; drop the callback.
+		 */
+		rsp->cb_state = CB_IDLE;
+	} else if (rsp->cb_state == CB_REPLAY) {
+		/*
+		 * A new rcu_sync_exit() has happened; requeue the callback
+		 * to catch a later GP.
+		 */
+		rsp->cb_state = CB_PENDING;
+		rsp->call(&rsp->cb_head, rcu_sync_func);
+	} else {
+		/*
+		 * We're at least a GP after rcu_sync_exit(); eveybody will now
+		 * have observed the write side critical section. Let 'em rip!.
+		 */
+		rsp->cb_state = CB_IDLE;
+		rsp->gp_state = GP_IDLE;
+	}
+	spin_unlock_irqrestore(&rsp->rss_lock, flags);
+}
+
+/**
+ * rcu_sync_exit() - Allow readers back onto fast patch after grace period
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who have completed, and can therefore
+ * now allow readers to make use of their fastpaths after a grace period
+ * has elapsed.  After this grace period has completed, all subsequent
+ * calls to rcu_sync_is_idle() will return true, which tells readers that
+ * they can once again use their fastpaths.
+ */
+void rcu_sync_exit(struct rcu_sync *rsp)
+{
+	spin_lock_irq(&rsp->rss_lock);
+	if (!--rsp->gp_count) {
+		if (rsp->cb_state == CB_IDLE) {
+			rsp->cb_state = CB_PENDING;
+			rsp->call(&rsp->cb_head, rcu_sync_func);
+		} else if (rsp->cb_state == CB_PENDING) {
+			rsp->cb_state = CB_REPLAY;
+		}
+	}
+	spin_unlock_irq(&rsp->rss_lock);
+}
-- 
cgit v1.2.3


From 82e8c565be8a72957570d7da8dd9b441db7bb648 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:47 +0200
Subject: rcu_sync: Simplify rcu_sync using new rcu_sync_ops structure

This commit adds the new struct rcu_sync_ops which holds sync/call
methods, and turns the function pointers in rcu_sync_struct into an array
of struct rcu_sync_ops.  This simplifies the "init" helpers by collapsing
a switch statement and explicit multiple definitions into a simple
assignment and a helper macro, respectively.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/sync.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 0a11df43be23..5a9aa4c394f1 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -23,6 +23,24 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
+static const struct {
+	void (*sync)(void);
+	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+} gp_ops[] = {
+	[RCU_SYNC] = {
+		.sync = synchronize_rcu,
+		.call = call_rcu,
+	},
+	[RCU_SCHED_SYNC] = {
+		.sync = synchronize_sched,
+		.call = call_rcu_sched,
+	},
+	[RCU_BH_SYNC] = {
+		.sync = synchronize_rcu_bh,
+		.call = call_rcu_bh,
+	},
+};
+
 enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
 enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
@@ -37,23 +55,7 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
 {
 	memset(rsp, 0, sizeof(*rsp));
 	init_waitqueue_head(&rsp->gp_wait);
-
-	switch (type) {
-	case RCU_SYNC:
-		rsp->sync = synchronize_rcu;
-		rsp->call = call_rcu;
-		break;
-
-	case RCU_SCHED_SYNC:
-		rsp->sync = synchronize_sched;
-		rsp->call = call_rcu_sched;
-		break;
-
-	case RCU_BH_SYNC:
-		rsp->sync = synchronize_rcu_bh;
-		rsp->call = call_rcu_bh;
-		break;
-	}
+	rsp->gp_type = type;
 }
 
 /**
@@ -85,7 +87,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
 	BUG_ON(need_wait && need_sync);
 
 	if (need_sync) {
-		rsp->sync();
+		gp_ops[rsp->gp_type].sync();
 		rsp->gp_state = GP_PASSED;
 		wake_up_all(&rsp->gp_wait);
 	} else if (need_wait) {
@@ -138,7 +140,7 @@ static void rcu_sync_func(struct rcu_head *rcu)
 		 * to catch a later GP.
 		 */
 		rsp->cb_state = CB_PENDING;
-		rsp->call(&rsp->cb_head, rcu_sync_func);
+		gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
 	} else {
 		/*
 		 * We're at least a GP after rcu_sync_exit(); eveybody will now
@@ -166,7 +168,7 @@ void rcu_sync_exit(struct rcu_sync *rsp)
 	if (!--rsp->gp_count) {
 		if (rsp->cb_state == CB_IDLE) {
 			rsp->cb_state = CB_PENDING;
-			rsp->call(&rsp->cb_head, rcu_sync_func);
+			gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
 		} else if (rsp->cb_state == CB_PENDING) {
 			rsp->cb_state = CB_REPLAY;
 		}
-- 
cgit v1.2.3


From 3a518b76af7bb411efe6dd090fbf098e29accb2e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:50 +0200
Subject: rcu_sync: Add CONFIG_PROVE_RCU checks

This commit validates that the caller of rcu_sync_is_idle() holds the
corresponding type of RCU read-side lock, but only in kernels built
with CONFIG_PROVE_RCU=y.  This validation is carried out via a new
rcu_sync_ops->held() method that is checked within rcu_sync_is_idle().

Note that although this does add code to the fast path, it only does so
in kernels built with CONFIG_PROVE_RCU=y.

Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/sync.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 5a9aa4c394f1..01c9807a7f73 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -23,21 +23,33 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_PROVE_RCU
+#define __INIT_HELD(func)	.held = func,
+#else
+#define __INIT_HELD(func)
+#endif
+
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+#ifdef CONFIG_PROVE_RCU
+	int  (*held)(void);
+#endif
 } gp_ops[] = {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
+		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_sched,
 		.call = call_rcu_sched,
+		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu_bh,
 		.call = call_rcu_bh,
+		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
 
@@ -46,6 +58,14 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
 #define	rss_lock	gp_wait.lock
 
+#ifdef CONFIG_PROVE_RCU
+bool __rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+	WARN_ON(!gp_ops[rsp->gp_type].held());
+	return rsp->gp_state == GP_IDLE;
+}
+#endif
+
 /**
  * rcu_sync_init() - Initialize an rcu_sync structure
  * @rsp: Pointer to rcu_sync structure to be initialized
-- 
cgit v1.2.3


From 07899a6e5f56136028c44a57ad0451e797365ac3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:52 +0200
Subject: rcu_sync: Introduce rcu_sync_dtor()

This commit allows rcu_sync structures to be safely deallocated,
The trick is to add a new ->wait field to the gp_ops array.
This field is a pointer to the rcu_barrier() function corresponding
to the flavor of RCU in question.  This allows a new rcu_sync_dtor()
to wait for any outstanding callbacks before freeing the rcu_sync
structure.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/sync.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 01c9807a7f73..1e353f0a2b66 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -32,6 +32,7 @@
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+	void (*wait)(void);
 #ifdef CONFIG_PROVE_RCU
 	int  (*held)(void);
 #endif
@@ -39,16 +40,19 @@ static const struct {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
+		.wait = rcu_barrier,
 		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_sched,
 		.call = call_rcu_sched,
+		.wait = rcu_barrier_sched,
 		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu_bh,
 		.call = call_rcu_bh,
+		.wait = rcu_barrier_bh,
 		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
@@ -195,3 +199,25 @@ void rcu_sync_exit(struct rcu_sync *rsp)
 	}
 	spin_unlock_irq(&rsp->rss_lock);
 }
+
+/**
+ * rcu_sync_dtor() - Clean up an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be cleaned up
+ */
+void rcu_sync_dtor(struct rcu_sync *rsp)
+{
+	int cb_state;
+
+	BUG_ON(rsp->gp_count);
+
+	spin_lock_irq(&rsp->rss_lock);
+	if (rsp->cb_state == CB_REPLAY)
+		rsp->cb_state = CB_PENDING;
+	cb_state = rsp->cb_state;
+	spin_unlock_irq(&rsp->rss_lock);
+
+	if (cb_state != CB_IDLE) {
+		gp_ops[rsp->gp_type].wait();
+		BUG_ON(rsp->cb_state != CB_IDLE);
+	}
+}
-- 
cgit v1.2.3


From 95b19f684c61ffc9b039e02c5d1113c2d8cd7105 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:55 +0200
Subject: locking/percpu-rwsem: Make percpu_free_rwsem() after kzalloc() safe

This is the temporary ugly hack which will be reverted later. We only
need it to ensure that the next patch will not break "change sb_writers
to use percpu_rw_semaphore" patches routed via the VFS tree.

The alloc_super()->destroy_super() error path assumes that it is safe
to call percpu_free_rwsem() after kzalloc() without percpu_init_rwsem(),
so let's not disappoint it.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/percpu-rwsem.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index e2621fbbcbd1..9529a30ec57b 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -26,6 +26,13 @@ EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
 void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 {
+	/*
+	 * XXX: temporary kludge. The error path in alloc_super()
+	 * assumes that percpu_free_rwsem() is safe after kzalloc().
+	 */
+	if (!brw->fast_read_ctr)
+		return;
+
 	free_percpu(brw->fast_read_ctr);
 	brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
-- 
cgit v1.2.3


From 001dac627ff37433d5528ffb0d897cd19c2b1e43 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:57 +0200
Subject: locking/percpu-rwsem: Make use of the rcu_sync infrastructure

Currently down_write/up_write calls synchronize_sched_expedited()
twice, which is evil.  Change this code to rely on rcu-sync primitives.
This avoids the _expedited "big hammer", and this can be faster in
the contended case or even in the case when a single thread does
down_write/up_write in a loop.

Of course, a single down_write() will take more time, but otoh it
will be much more friendly to the whole system.

To simplify the review this patch doesn't update the comments, fixed
by the next change.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/percpu-rwsem.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 9529a30ec57b..183a71151ac0 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	atomic_set(&brw->write_ctr, 0);
+	rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
@@ -33,6 +33,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 	if (!brw->fast_read_ctr)
 		return;
 
+	rcu_sync_dtor(&brw->rss);
 	free_percpu(brw->fast_read_ctr);
 	brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
@@ -62,13 +63,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
-	bool success = false;
+	bool success;
 
 	preempt_disable();
-	if (likely(!atomic_read(&brw->write_ctr))) {
+	success = rcu_sync_is_idle(&brw->rss);
+	if (likely(success))
 		__this_cpu_add(*brw->fast_read_ctr, val);
-		success = true;
-	}
 	preempt_enable();
 
 	return success;
@@ -149,8 +149,6 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-	/* tell update_fast_ctr() there is a pending writer */
-	atomic_inc(&brw->write_ctr);
 	/*
 	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
 	 *    so that update_fast_ctr() can't succeed.
@@ -162,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	 *    fast-path, it executes a full memory barrier before we return.
 	 *    See R_W case in the comment above update_fast_ctr().
 	 */
-	synchronize_sched_expedited();
+	rcu_sync_enter(&brw->rss);
 
 	/* exclude other writers, and block the new readers completely */
 	down_write(&brw->rw_sem);
@@ -183,8 +181,6 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
 	 * Insert the barrier before the next fast-path in down_read,
 	 * see W_R case in the comment above update_fast_ctr().
 	 */
-	synchronize_sched_expedited();
-	/* the last writer unblocks update_fast_ctr() */
-	atomic_dec(&brw->write_ctr);
+	rcu_sync_exit(&brw->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
-- 
cgit v1.2.3


From f324a76324c97e81a6ba66a8efac20cdbffd759e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:43:00 +0200
Subject: locking/percpu-rwsem: Fix the comments outdated by rcu_sync

Update the comments broken by the previous change.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/percpu-rwsem.c | 50 ++++++++++---------------------------------
 1 file changed, 11 insertions(+), 39 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 183a71151ac0..02a726dd9adc 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -39,27 +39,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 }
 
 /*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- *	R_W: down_write() comes after up_read(), the writer should see all
- *	     changes done by the reader
- * or
- *	W_R: down_read() comes after up_write(), the reader should see all
- *	     changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
  *
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
@@ -136,29 +121,15 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 	return sum;
 }
 
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
 	/*
-	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-	 *    so that update_fast_ctr() can't succeed.
-	 *
-	 * 2. Ensures we see the result of every previous this_cpu_add() in
-	 *    update_fast_ctr().
+	 * Make rcu_sync_is_idle() == F and thus disable the fast-path in
+	 * percpu_down_read() and percpu_up_read(), and wait for gp pass.
 	 *
-	 * 3. Ensures that if any reader has exited its critical section via
-	 *    fast-path, it executes a full memory barrier before we return.
-	 *    See R_W case in the comment above update_fast_ctr().
+	 * The latter synchronises us with the preceding readers which used
+	 * the fast-past, so we can not miss the result of __this_cpu_add()
+	 * or anything else inside their criticial sections.
 	 */
 	rcu_sync_enter(&brw->rss);
 
@@ -178,8 +149,9 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
 	/* release the lock, but the readers can't use the fast-path */
 	up_write(&brw->rw_sem);
 	/*
-	 * Insert the barrier before the next fast-path in down_read,
-	 * see W_R case in the comment above update_fast_ctr().
+	 * Enable the fast-path in percpu_down_read() and percpu_up_read()
+	 * but only after another gp pass; this adds the necessary barrier
+	 * to ensure the reader can't miss the changes done by us.
 	 */
 	rcu_sync_exit(&brw->rss);
 }
-- 
cgit v1.2.3


From cc5f730b41506d37a5c2826b2e801d0a59853d11 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:43:03 +0200
Subject: locking/percpu-rwsem: Clean up the lockdep annotations in
 percpu_down_read()

Based on Peter Zijlstra's earlier patch.

Change percpu_down_read() to use __down_read(), this way we can
do rwsem_acquire_read() unconditionally at the start to make this
code more symmetric and clean.

Originally-From: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/percpu-rwsem.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 02a726dd9adc..f231e0bb311c 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -70,14 +70,14 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
 	might_sleep();
-	if (likely(update_fast_ctr(brw, +1))) {
-		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+	if (likely(update_fast_ctr(brw, +1)))
 		return;
-	}
 
-	down_read(&brw->rw_sem);
+	/* Avoid rwsem_acquire_read() and rwsem_release() */
+	__down_read(&brw->rw_sem);
 	atomic_inc(&brw->slow_read_ctr);
-	/* avoid up_read()->rwsem_release() */
 	__up_read(&brw->rw_sem);
 }
 EXPORT_SYMBOL_GPL(percpu_down_read);
-- 
cgit v1.2.3


From 4bace7344d6dbd7a1b0b801abf24ea9878064317 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 Sep 2015 17:59:18 +0200
Subject: rcu_sync: Cleanup the CONFIG_PROVE_RCU checks

1. Rename __rcu_sync_is_idle() to rcu_sync_lockdep_assert() and
   change it to use rcu_lockdep_assert().

2. Change rcu_sync_is_idle() to return rsp->gp_state == GP_IDLE
   unconditonally, this way we can remove the same check from
   rcu_sync_lockdep_assert() and clearly isolate the debugging
   code.

Note: rcu_sync_enter()->wait_event(gp_state == GP_PASSED) needs
another CONFIG_PROVE_RCU check, the same as is done in ->sync(); but
this needs some simple preparations in the core RCU code to avoid the
code duplication.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/sync.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 1e353f0a2b66..be922c9f3d37 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -63,10 +63,10 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 #define	rss_lock	gp_wait.lock
 
 #ifdef CONFIG_PROVE_RCU
-bool __rcu_sync_is_idle(struct rcu_sync *rsp)
+void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
 {
-	WARN_ON(!gp_ops[rsp->gp_type].held());
-	return rsp->gp_state == GP_IDLE;
+	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
+			 "suspicious rcu_sync_is_idle() usage");
 }
 #endif
 
-- 
cgit v1.2.3


From 889d487a26de4bcd1a0a668754bcbce893969edf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 24 Aug 2015 11:37:58 -0700
Subject: rcutorture: Fix module unwind when bad torture_type specified

The rcutorture module has a list of torture types, and specifying a
type not on this list is supposed to cleanly fail the module load.
Unfortunately, the "fail" happens without the "cleanly".  This commit
therefore adds the needed clean-up after an incorrect torture_type.

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/rcutorture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 77192953dee5..b74b56474e17 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1742,15 +1742,15 @@ rcu_torture_init(void)
 		for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
 			pr_alert(" %s", torture_ops[i]->name);
 		pr_alert("\n");
-		torture_init_end();
-		return -EINVAL;
+		firsterr = -EINVAL;
+		goto unwind;
 	}
 	if (cur_ops->fqs == NULL && fqs_duration != 0) {
 		pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
 		fqs_duration = 0;
 	}
 	if (cur_ops->init)
-		cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+		cur_ops->init();
 
 	if (nreaders >= 0) {
 		nrealreaders = nreaders;
-- 
cgit v1.2.3


From 4f441a258f7badf752b3d9b04b675869ca4e751c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 24 Aug 2015 13:13:51 -0700
Subject: rcutorture: Fix unused-function warning for torturing_tasks()

The torturing_tasks() function is used only in kernels built with
CONFIG_PROVE_RCU=y, so the second definition can result in unused-function
compiler warnings.  This commit adds __maybe_unused to suppress these
warnings.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index b74b56474e17..009b62c76dfa 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -695,7 +695,7 @@ static bool __maybe_unused torturing_tasks(void)
 
 #define RCUTORTURE_TASKS_OPS
 
-static bool torturing_tasks(void)
+static bool __maybe_unused torturing_tasks(void)
 {
 	return false;
 }
-- 
cgit v1.2.3


From a36a99618b1adb2d6ca0b7e08e3a656a04e477fe Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 30 Aug 2015 20:01:48 -0700
Subject: locktorture: Fix module unwind when bad torture_type specified

The locktorture module has a list of torture types, and specifying
a type not on this list is supposed to cleanly fail the module load.
Unfortunately, the "fail" happens without the "cleanly".  This commit
therefore adds the needed clean-up after an incorrect torture_type.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/locktorture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 32244186f1f2..820852f69858 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -661,11 +661,11 @@ static int __init lock_torture_init(void)
 		for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
 			pr_alert(" %s", torture_ops[i]->name);
 		pr_alert("\n");
-		torture_init_end();
-		return -EINVAL;
+		firsterr = -EINVAL;
+		goto unwind;
 	}
 	if (cxt.cur_ops->init)
-		cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+		cxt.cur_ops->init();
 
 	if (nwriters_stress >= 0)
 		cxt.nrealwriters_stress = nwriters_stress;
-- 
cgit v1.2.3