// SPDX-License-Identifier: GPL-2.0
/* kernel/rwsem.c: R/W semaphores, public implementation
*
* Written by David Howells (dhowells@redhat.com).
* Derived from asm-i386/semaphore.h
*
* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
* and Michel Lespinasse <walken@google.com>
*
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
*
* Rwsem count bit fields re-definition and rwsem rearchitecture by
* Waiman Long <longman@redhat.com> and
* Peter Zijlstra <peterz@infradead.org>.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/sched/task.h>
#include <linux/sched/debug.h>
#include <linux/sched/wake_q.h>
#include <linux/sched/signal.h>
#include <linux/sched/clock.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include "lock_events.h"
/*
* The least significant 2 bits of the owner value has the following
* meanings when set.
* - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
* - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
*
* When the rwsem is reader-owned and a spinning writer has timed out,
* the nonspinnable bit will be set to disable optimistic spinning.
* When a writer acquires a rwsem, it puts its task_struct pointer
* into the owner field. It is cleared after an unlock.
*
* When a reader acquires a rwsem, it will also puts its task_struct
* pointer into the owner field with the RWSEM_READER_OWNED bit set.
* On unlock, the owner field will largely be left untouched. So
* for a free or reader-owned rwsem, the owner value may contain
* information about the last reader that acquires the rwsem.
*
* That information may be helpful in debugging cases where the system
* seems to hang on a reader owned rwsem especially if only one reader
* is involved. Ideally we would like to track all the readers that own
* a rwsem, but the overhead is simply too big.
*
* A fast path reader optimistic lock stealing is supported when the rwsem
* is previously owned by a writer and the following conditions are met:
* - OSQ is empty
* - rwsem is not currently writer owned
* - the handoff isn't set.
*/
#define RWSEM_READER_OWNED (1UL << 0)
#define RWSEM_NONSPINNABLE (1UL << 1)
#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
if (!debug_locks_silent && \
WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
#c, atomic_long_read(&(sem)->count), \
(unsigned long) sem->magic, \
atomic_long_read(&(sem)->owner), (long)current, \
list_empty(&(sem)->wait_list) ? "" : "not ")) \
debug_locks_off(); \
} while (0)
#else
# define DEBUG_RWSEMS_WARN_ON(c, sem)
#endif
/*
* On 64-bit architectures, the bit definitions of the count are:
*
* Bit 0 - writer locked bit
* Bit 1 - waiters present bit
* Bit 2 - lock handoff bit
* Bits 3-7 - reserved
* Bits 8-62 - 55-bit reader count
* Bit 63 - read fail bit
*
* On 32-bit architectures, the bit definitions of the count are:
*
* Bit 0 - writer locked bit
* Bit 1 - waiters present bit
* Bit 2 - lock handoff bit
* Bits 3-7 - reserved
* Bits 8-30 - 23-bit reader count
* Bit 31 - read fail bit
*
* It is not likely that the most significant bit (read fail bit) will ever
* be set. This guard bit is still checked anyway in the down_read() fastpath
* just in case we need to use up more of the reader bits for other purpose
* in the future.
*
* atomic_long_fetch_add() is used to obtain reader lock, whereas
* atomic_long_cmpxchg() will be used to obtain writer lock.
*
* There are three places where the lock handoff bit may be set or cleared.
* 1) rwsem_mark_wake() for readers.
* 2) rwsem_try_write_lock() for writers.
* 3) Error path of rwsem_down_write_slowpath().
*
* For all the above cases, wait_lock will be held. A writer must also
* be the first one in the wait_list to be eligible for setting the handoff
* bit. So concurrent setting/clearing of handoff bit is not possible.
*/
#define RWSEM_WRITER_LOCKED (1UL << 0)
#define RWSEM_FLAG_WAITERS (1UL << 1)
#define RWSEM_FLAG_HANDOFF (1UL << 2)
#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
#define RWSEM_READER_SHIFT 8
#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
/*
* All writes to owner are protected by WRITE_ONCE() to make sure that
* store tearing can't happen as optimistic spinners may read and use
* the owner value concurrently without lock. Read from owner, however,
* may not need READ_ONCE() as long as the pointer value is only used
* for comparison and isn't being dereferenced.
*/
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
atomic_long_set(&sem->owner, (long)current);
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
atomic_long_set(&sem->owner, 0);
}
/*
* Test the flags in the owner field.
*/
static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
{
return atomic_long_read(&sem->owner) & flags;
}
/*
* The task_struct pointer of the last owning reader will be left in
* the owner field.
*
* Note that the owner value just indicates the task has owned the rwsem
* previously, it may not be the real owner or one of the real owners
* anymore when that field is examined, so take it with a grain of salt.
*
* The reader non-spinnable bit is preserved.
*/
static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
struct task_struct *owner)
{
unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
(atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
atomic_long_set(&sem->owner, val);
}
static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
{
__rwsem_set_reader_owned(sem, current);
}
/*
* Return true if the rwsem is owned by a reader.
*/
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{
#ifdef CONFIG_DEBUG_RWSEMS
/*
* Check the count to see if it is write-locked.
*/
long count = atomic_long_read(&sem->count);
if (count & RWSEM_WRITER_MASK)
return false;
#endif
return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
}
#ifdef CONFIG_DEBUG_RWSEMS
/*
* With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
* is a task pointer in owner of a reader-owned rwsem, it will be the
* real owner or one of the real owners. The only exception is when the
* unlock is done by up_read_non_owner().
*/
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
unsigned long val = atomic_long_read(&sem->owner);
while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
if (atomic_long_try_cmpxchg(&sem->owner, &val,
val & RWSEM_OWNER_FLAGS_MASK))
return;
}
}
#else
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
}
#endif
/*
* Set the RWSEM_N
|