diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-02-28 23:12:55 -0800 |
| commit | 1136fa0c07de570dc17858745af8be169d1440ba (patch) | |
| tree | 3221b003517dd3cb13df5ba4b85637cd9ed82692 /kernel/locking | |
| parent | ba115adf61b36b8c167126425a62b0efc23f72c0 (diff) | |
| parent | 754e0b0e35608ed5206d6a67a791563c631cec07 (diff) | |
| download | linux-1136fa0c07de570dc17858745af8be169d1440ba.tar.gz linux-1136fa0c07de570dc17858745af8be169d1440ba.tar.bz2 linux-1136fa0c07de570dc17858745af8be169d1440ba.zip | |
Merge tag 'v5.17-rc4' into for-linus
Merge with mainline to get the Intel ASoC generic helpers header and
other changes.
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/lockdep.c | 26 | ||||
| -rw-r--r-- | kernel/locking/locktorture.c | 18 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 70 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 29 | ||||
| -rw-r--r-- | kernel/locking/rtmutex_api.c | 30 | ||||
| -rw-r--r-- | kernel/locking/rwbase_rt.c | 11 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 255 | ||||
| -rw-r--r-- | kernel/locking/spinlock.c | 13 | ||||
| -rw-r--r-- | kernel/locking/spinlock_rt.c | 35 | ||||
| -rw-r--r-- | kernel/locking/test-ww_mutex.c | 87 | ||||
| -rw-r--r-- | kernel/locking/ww_rt_mutex.c | 25 |
11 files changed, 391 insertions, 208 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bf1c00c881e4..4a882f83aeb9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -788,6 +788,21 @@ static int very_verbose(struct lock_class *class) * Is this the address of a static object: */ #ifdef __KERNEL__ +/* + * Check if an address is part of freed initmem. After initmem is freed, + * memory can be allocated from it, and such allocations would then have + * addresses within the range [_stext, _end]. + */ +#ifndef arch_is_kernel_initmem_freed +static int arch_is_kernel_initmem_freed(unsigned long addr) +{ + if (system_state < SYSTEM_FREEING_INITMEM) + return 0; + + return init_section_contains((void *)addr, 1); +} +#endif + static int static_obj(const void *obj) { unsigned long start = (unsigned long) &_stext, @@ -803,9 +818,6 @@ static int static_obj(const void *obj) if ((addr >= start) && (addr < end)) return 1; - if (arch_is_kernel_data(addr)) - return 1; - /* * in-kernel percpu var? */ @@ -888,7 +900,7 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return NULL; - hlist_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -4671,7 +4683,7 @@ print_lock_invalid_wait_context(struct task_struct *curr, /* * Verify the wait_type context. * - * This check validates we takes locks in the right wait-type order; that is it + * This check validates we take locks in the right wait-type order; that is it * ensures that we do not take mutexes inside spinlocks and do not attempt to * acquire spinlocks inside raw_spinlocks and the sort. * @@ -5366,7 +5378,7 @@ int __lock_is_held(const struct lockdep_map *lock, int read) struct held_lock *hlock = curr->held_locks + i; if (match_held_lock(hlock, lock)) { - if (read == -1 || hlock->read == read) + if (read == -1 || !!hlock->read == read) return LOCK_STATE_HELD; return LOCK_STATE_NOT_HELD; @@ -5473,6 +5485,7 @@ static noinstr void check_flags(unsigned long flags) } } +#ifndef CONFIG_PREEMPT_RT /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only @@ -5487,6 +5500,7 @@ static noinstr void check_flags(unsigned long flags) DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } +#endif if (!debug_locks) print_irqtrace_events(current); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 7c5a4a087cc7..9c2fb613a55d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -1022,23 +1022,23 @@ static int __init lock_torture_init(void) if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ, NULL); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (shuffle_interval > 0) { firsterr = torture_shuffle_init(shuffle_interval); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (shutdown_secs > 0) { firsterr = torture_shutdown_init(shutdown_secs, lock_torture_cleanup); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (stutter > 0) { firsterr = torture_stutter_init(stutter, stutter); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } @@ -1047,7 +1047,7 @@ static int __init lock_torture_init(void) sizeof(writer_tasks[0]), GFP_KERNEL); if (writer_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + TOROUT_ERRSTRING("writer_tasks: Out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -1058,7 +1058,7 @@ static int __init lock_torture_init(void) sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); + TOROUT_ERRSTRING("reader_tasks: Out of memory"); kfree(writer_tasks); writer_tasks = NULL; firsterr = -ENOMEM; @@ -1082,7 +1082,7 @@ static int __init lock_torture_init(void) /* Create writer. */ firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i], writer_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; create_reader: @@ -1091,13 +1091,13 @@ static int __init lock_torture_init(void) /* Create reader. */ firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j], reader_tasks[j]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, stats_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } torture_init_end(); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d456579d0952..5e3585950ec8 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -94,6 +94,9 @@ static inline unsigned long __owner_flags(unsigned long owner) return owner & MUTEX_FLAGS; } +/* + * Returns: __mutex_owner(lock) on failure or NULL on success. + */ static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff) { unsigned long owner, curr = (unsigned long)current; @@ -348,21 +351,23 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, { bool ret = true; - rcu_read_lock(); + lockdep_assert_preemption_disabled(); + while (__mutex_owner(lock) == owner) { /* * Ensure we emit the owner->on_cpu, dereference _after_ - * checking lock->owner still matches owner. If that fails, - * owner might point to freed memory. If it still matches, - * the rcu_read_lock() ensures the memory stays valid. + * checking lock->owner still matches owner. And we already + * disabled preemption which is equal to the RCU read-side + * crital section in optimistic spinning code. Thus the + * task_strcut structure won't go away during the spinning + * period */ barrier(); /* * Use vcpu_is_preempted to detect lock holder preemption issue. */ - if (!owner->on_cpu || need_resched() || - vcpu_is_preempted(task_cpu(owner))) { + if (!owner_on_cpu(owner) || need_resched()) { ret = false; break; } @@ -374,7 +379,6 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, cpu_relax(); } - rcu_read_unlock(); return ret; } @@ -387,19 +391,19 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) struct task_struct *owner; int retval = 1; + lockdep_assert_preemption_disabled(); + if (need_resched()) return 0; - rcu_read_lock(); - owner = __mutex_owner(lock); - /* - * As lock holder preemption issue, we both skip spinning if task is not - * on cpu or its cpu is preempted + * We already disabled preemption which is equal to the RCU read-side + * crital section in optimistic spinning code. Thus the task_strcut + * structure won't go away during the spinning period. */ + owner = __mutex_owner(lock); if (owner) - retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); - rcu_read_unlock(); + retval = owner_on_cpu(owner); /* * If lock->owner is not set, the mutex has been released. Return true @@ -736,6 +740,44 @@ __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true); } +/** + * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire context + * @ww: mutex to lock + * @ww_ctx: optional w/w acquire context + * + * Trylocks a mutex with the optional acquire context; no deadlock detection is + * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. + * + * Unlike ww_mutex_lock, no deadlock handling is performed. However, if a @ctx is + * specified, -EALREADY handling may happen in calls to ww_mutex_trylock. + * + * A mutex acquired with this function must be released with ww_mutex_unlock. + */ +int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx) + return mutex_trylock(&ww->base); + + MUTEX_WARN_ON(ww->base.magic != &ww->base); + + /* + * Reset the wounded flag after a kill. No other process can + * race and wound us here, since they can't have a valid owner + * pointer if we don't have any locks held. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = 0; + + if (__mutex_trylock(&ww->base)) { + ww_mutex_set_context_fastpath(ww, ww_ctx); + mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); + return 1; + } + + return 0; +} +EXPORT_SYMBOL(ww_mutex_trylock); + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6bb116c559b4..8555c4efe97c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -446,19 +446,26 @@ static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) } /* RT mutex specific wake_q wrappers */ -static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, - struct rt_mutex_waiter *w) +static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, + struct task_struct *task, + unsigned int wake_state) { - if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { + if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { if (IS_ENABLED(CONFIG_PROVE_LOCKING)) WARN_ON_ONCE(wqh->rtlock_task); - get_task_struct(w->task); - wqh->rtlock_task = w->task; + get_task_struct(task); + wqh->rtlock_task = task; } else { - wake_q_add(&wqh->head, w->task); + wake_q_add(&wqh->head, task); } } +static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, + struct rt_mutex_waiter *w) +{ + rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); +} + static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) { if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { @@ -1096,8 +1103,11 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, * the other will detect the deadlock and return -EDEADLOCK, * which is wrong, as the other waiter is not in a deadlock * situation. + * + * Except for ww_mutex, in that case the chain walk must already deal + * with spurious cycles, see the comments at [3] and [6]. */ - if (owner == task) + if (owner == task && !(build_ww_mutex() && ww_ctx)) return -EDEADLK; raw_spin_lock(&task->pi_lock); @@ -1372,9 +1382,8 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * for CONFIG_PREEMPT_RCU=y) * - the VCPU on which owner runs is preempted */ - if (!owner->on_cpu || need_resched() || - rt_mutex_waiter_is_top_waiter(lock, waiter) || - vcpu_is_preempted(task_cpu(owner))) { + if (!owner_on_cpu(owner) || need_resched() || + !rt_mutex_waiter_is_top_waiter(lock, waiter)) { res = false; break; } diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 5c9299aaabae..900220941caa 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -21,12 +21,13 @@ int max_lock_depth = 1024; */ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, unsigned int state, + struct lockdep_map *nest_lock, unsigned int subclass) { int ret; might_sleep(); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_); ret = __rt_mutex_lock(&lock->rtmutex, state); if (ret) mutex_release(&lock->dep_map, _RET_IP_); @@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init); */ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); } EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) +{ + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); +} +EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); + #else /* !CONFIG_DEBUG_LOCK_ALLOC */ /** @@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock); #endif @@ -77,11 +84,26 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { - return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); + return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); /** + * rt_mutex_lock_killable - lock a rt_mutex killable + * + * @lock: the rt_mutex to be locked + * + * Returns: + * 0 on success + * -EINTR when interrupted by a signal + */ +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) +{ + return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); + +/** * rt_mutex_trylock - try to lock a rt_mutex * * @lock: the rt_mutex to be locked diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 88191f6e252c..6fd3162e4098 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -59,8 +59,7 @@ static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) * set. */ for (r = atomic_read(&rwb->readers); r < 0;) { - /* Fully-ordered if cmpxchg() succeeds, provides ACQUIRE */ - if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) + if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1))) return 1; } return 0; @@ -148,6 +147,7 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, { struct rt_mutex_base *rtm = &rwb->rtmutex; struct task_struct *owner; + DEFINE_RT_WAKE_Q(wqh); raw_spin_lock_irq(&rtm->wait_lock); /* @@ -158,9 +158,12 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, */ owner = rt_mutex_owner(rtm); if (owner) - wake_up_state(owner, state); + rt_mutex_wake_q_add_task(&wqh, owner, state); + /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ + preempt_disable(); raw_spin_unlock_irq(&rtm->wait_lock); + rt_mutex_wake_up_q(&wqh); } static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, @@ -183,7 +186,7 @@ static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, /* * _release() is needed in case that reader is in fast path, pairing - * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE + * with atomic_try_cmpxchg_acquire() in rwbase_read_trylock(). */ (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 000e8d5a2884..69aba4abe104 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -56,7 +56,6 @@ * * A fast path reader optimistic lock stealing is supported when the rwsem * is previously owned by a writer and the following conditions are met: - * - OSQ is empty * - rwsem is not currently writer owned * - the handoff isn't set. */ @@ -106,9 +105,9 @@ * atomic_long_cmpxchg() will be used to obtain writer lock. * * There are three places where the lock handoff bit may be set or cleared. - * 1) rwsem_mark_wake() for readers. - * 2) rwsem_try_write_lock() for writers. - * 3) Error path of rwsem_down_write_slowpath(). + * 1) rwsem_mark_wake() for readers -- set, clear + * 2) rwsem_try_write_lock() for writers -- set, clear + * 3) rwsem_del_waiter() -- clear * * For all the above cases, wait_lock will be held. A writer must also * be the first one in the wait_list to be eligible for setting the handoff @@ -335,6 +334,9 @@ struct rwsem_waiter { struct task_struct *task; enum rwsem_waiter_type type; unsigned long timeout; + + /* Writer only, not initialized in reader */ + bool handoff_set; }; #define rwsem_first_waiter(sem) \ list_first_entry(&sem->wait_list, struct rwsem_waiter, list) @@ -345,12 +347,6 @@ enum rwsem_wake_type { RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ }; -enum writer_wait_state { - WRITER_NOT_FIRST, /* Writer is not first in wait list */ - WRITER_FIRST, /* Writer is first in wait list */ - WRITER_HANDOFF /* Writer is first & handoff needed */ -}; - /* * The typical HZ value is either 250 or 1000. So set the minimum waiting * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait @@ -366,6 +362,31 @@ enum writer_wait_state { */ #define MAX_READERS_WAKEUP 0x100 +static inline void +rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) +{ + lockdep_assert_held(&sem->wait_lock); + list_add_tail(&waiter->list, &sem->wait_list); + /* caller will set RWSEM_FLAG_WAITERS */ +} + +/* + * Remove a waiter from the wait_list and clear flags. + * + * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of + * this function. Modify with care. + */ +static inline void +rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) +{ + lockdep_assert_held(&sem->wait_lock); + list_del(&waiter->list); + if (likely(!list_empty(&sem->wait_list))) + return; + + atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); +} + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must @@ -377,6 +398,8 @@ enum writer_wait_state { * preferably when the wait_lock is released * - woken process blocks are discarded from the list after having task zeroed * - writers are only marked woken if downgrading is false + * + * Implies rwsem_del_waiter() for all woken readers. */ static void rwsem_mark_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type, @@ -485,24 +508,31 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, /* * Limit # of readers that can be woken up per wakeup call. */ - if (woken >= MAX_READERS_WAKEUP) + if (unlikely(woken >= MAX_READERS_WAKEUP)) break; } adjustment = woken * RWSEM_READER_BIAS - adjustment; lockevent_cond_inc(rwsem_wake_reader, woken); + + oldcount = atomic_long_read(&sem->count); if (list_empty(&sem->wait_list)) { - /* hit end of list above */ + /* + * Combined with list_move_tail() above, this implies + * rwsem_del_waiter(). + */ adjustment -= RWSEM_FLAG_WAITERS; + if (oldcount & RWSEM_FLAG_HANDOFF) + adjustment -= RWSEM_FLAG_HANDOFF; + } else if (woken) { + /* + * When we've woken a reader, we no longer need to force + * writers to give up the lock and we can clear HANDOFF. + */ + if (oldcount & RWSEM_FLAG_HANDOFF) + adjustment -= RWSEM_FLAG_HANDOFF; } - /* - * When we've woken a reader, we no longer need to force writers - * to give up the lock and we can clear HANDOFF. - */ - if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF)) - adjustment -= RWSEM_FLAG_HANDOFF; - if (adjustment) atomic_long_add(adjustment, &sem->count); @@ -533,12 +563,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * race conditions between checking the rwsem wait list and setting the * sem->count accordingly. * - * If wstate is WRITER_HANDOFF, it will make sure that either the handoff - * bit is set or the lock is acquired with handoff bit cleared. + * Implies rwsem_del_waiter() on success. */ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, - enum writer_wait_state wstate) + struct rwsem_waiter *waiter) { + bool first = rwsem_first_waiter(sem) == waiter; long count, new; lockdep_assert_held(&sem->wait_lock); @@ -547,13 +577,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, do { bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); - if (has_handoff && wstate == WRITER_NOT_FIRST) - return false; + if (has_handoff) { + if (!first) + return false; + + /* First waiter inherits a previously set handoff bit */ + waiter->handoff_set = true; + } new = count; if (count & RWSEM_LOCK_MASK) { - if (has_handoff || (wstate != WRITER_HANDOFF)) + if (has_handoff || (!rt_task(waiter->task) && + !time_after(jiffies, waiter->timeout))) return false; new |= RWSEM_FLAG_HANDOFF; @@ -570,13 +606,39 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * We have either acquired the lock with handoff bit cleared or * set the handoff bit. */ - if (new & RWSEM_FLAG_HANDOFF) + if (new & RWSEM_FLAG_HANDOFF) { + waiter->handoff_set = true; + lockevent_inc(rwsem_wlock_handoff); return false; + } + /* + * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on + * success. + */ + list_del(&waiter->list); rwsem_set_owner(sem); return true; } +/* + * The rwsem_spin_on_owner() function returns the following 4 values + * depending on the lock owner state. + * OWNER_NULL : owner is currently NULL + * OWNER_WRITER: when owner changes and is a writer + * OWNER_READER: when owner changes and the new owner may be a reader. + * OWNER_NONSPINNABLE: + * when optimistic spinning has to stop because either the + * owner stops running, is unknown, or its timeslice has + * been used up. + */ +enum owner_state { + OWNER_NULL = 1 << 0, + OWNER_WRITER = 1 << 1, + OWNER_READER = 1 << 2, + OWNER_NONSPINNABLE = 1 << 3, +}; + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * Try to acquire write lock before the writer has been put on wait queue. @@ -596,15 +658,6 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) return false; } -static inline bool owner_on_cpu(struct task_struct *owner) -{ - /* - * As lock holder preemption issue, we both skip spinning if - * task is not on cpu or its cpu is preempted - */ - return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); -} - static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; @@ -617,7 +670,10 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) } preempt_disable(); - rcu_read_lock(); + /* + * Disable preemption is equal to the RCU read-side crital section, + * thus the task_strcut structure won't go away. + */ owner = rwsem_owner_flags(sem, &flags); /* * Don't check the read-owner as the entry may be stale. @@ -625,30 +681,12 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) if ((flags & RWSEM_NONSPINNABLE) || (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) ret = false; - rcu_read_unlock(); preempt_enable(); lockevent_cond_inc(rwsem_opt_fail, !ret); return ret; } -/* - * The rwsem_spin_on_owner() function returns the following 4 values - * depending on the lock owner state. - * OWNER_NULL : owner is currently NULL - * OWNER_WRITER: when owner changes and is a writer - * OWNER_READER: when owner changes and the new owner may be a reader. - * OWNER_NONSPINNABLE: - * when optimistic spinning has to stop because either the - * owner stops running, is unknown, or its timeslice has - * been used up. - */ -enum owner_state { - OWNER_NULL = 1 << 0, - OWNER_WRITER = 1 << 1, - OWNER_READER = 1 << 2, - OWNER_NONSPINNABLE = 1 << 3, -}; #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) static inline enum owner_state @@ -670,12 +708,13 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) unsigned long flags, new_flags; enum owner_state state; + lockdep_assert_preemption_disabled(); + owner = rwsem_owner_flags(sem, &flags); state = rwsem_owner_state(owner, flags); if (state != OWNER_WRITER) return state; - rcu_read_lock(); for (;;) { /* * When a waiting writer set the handoff flag, it may spin @@ -693,7 +732,9 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) * Ensure we emit the owner->on_cpu, dereference _after_ * checking sem->owner still matches owner, if that fails, * owner might point to free()d memory, if it still matches, - * the rcu_read_lock() ensures the memory stays valid. + * our spinning context already disabled preemption which is + * equal to RCU read-side crital section ensures the memory + * stays valid. */ barrier(); @@ -704,7 +745,6 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) cpu_relax(); } - rcu_read_unlock(); return state; } @@ -878,12 +918,11 @@ static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem) static inline void clear_nonspinnable(struct rw_semaphore *sem) { } -static inline int +static inline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem) { - return 0; + return OWNER_NONSPINNABLE; } -#define OWNER_NULL 1 #endif /* @@ -953,7 +992,7 @@ queue: } adjustment += RWSEM_FLAG_WAITERS; } - list_add_tail(&waiter.list, &sem->wait_list); + rwsem_add_waiter(sem, &waiter); /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); @@ -999,11 +1038,7 @@ queue: return sem; out_nolock: - list_del(&waiter.list); - if (list_empty(&sem->wait_list)) { - atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF, - &sem->count); - } + rwsem_del_waiter(sem, &waiter); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock_fail); @@ -1017,9 +1052,7 @@ static struct rw_semaphore * rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) { long count; - enum writer_wait_state wstate; struct rwsem_waiter waiter; - struct rw_semaphore *ret = sem; DEFINE_WAKE_Q(wake_q); /* do optimistic spinning and steal lock if possible */ @@ -1035,16 +1068,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) waiter.task = current; waiter.type = RWSEM_WAITING_FOR_WRITE; waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; + waiter.handoff_set = false; raw_spin_lock_irq(&sem->wait_lock); - - /* account for this before adding a new element to the list */ - wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST; - - list_add_tail(&waiter.list, &sem->wait_list); + rwsem_add_waiter(sem, &waiter); /* we're now waiting on the lock */ - if (wstate == WRITER_NOT_FIRST) { + if (rwsem_first_waiter(sem) != &waiter) { count = atomic_long_read(&sem->count); /* @@ -1080,13 +1110,16 @@ wait: /* wait until we successfully acquire the lock */ set_current_state(state); for (;;) { - if (rwsem_try_write_lock(sem, wstate)) { + if (rwsem_try_write_lock(sem, &waiter)) { /* rwsem_try_write_lock() implies ACQUIRE on success */ break; } raw_spin_unlock_irq(&sem->wait_lock); + if (signal_pending_state(state, current)) + goto out_nolock; + /* * After setting the handoff bit and failing to acquire * the lock, attempt to spin on owner to accelerate lock @@ -1095,70 +1128,37 @@ wait: * In this case, we attempt to acquire the lock again * without sleeping. */ - if (wstate == WRITER_HANDOFF && - rwsem_spin_on_owner(sem) == OWNER_NULL) - goto trylock_again; + if (waiter.handoff_set) { + enum owner_state owner_state; - /* Block until there are no active lockers. */ - for (;;) { - if (signal_pending_state(state, current)) - goto out_nolock; + preempt_disable(); + owner_state = rwsem_spin_on_owner(sem); + preempt_enable(); - schedule(); - lockevent_inc(rwsem_sleep_writer); - set_current_state(state); - /* - * If HANDOFF bit is set, unconditionally do - * a trylock. - */ - if (wstate == WRITER_HANDOFF) - break; - - if ((wstate == WRITER_NOT_FIRST) && - (rwsem_first_waiter(sem) == &waiter)) - wstate = WRITER_FIRST; - - count = atomic_long_read(&sem->count); - if (!(count & RWSEM_LOCK_MASK)) - break; - - /* - * The setting of the handoff bit is deferred - * until rwsem_try_write_lock() is called. - */ - if ((wstate == WRITER_FIRST) && (rt_task(current) || - time_after(jiffies, waiter.timeout))) { - wstate = WRITER_HANDOFF; - lockevent_inc(rwsem_wlock_handoff); - break; - } + if (owner_state == OWNER_NULL) + goto trylock_again; } + + schedule(); + lockevent_inc(rwsem_sleep_writer); + set_current_state(state); trylock_again: raw_spin_lock_irq(&sem->wait_lock); } __set_current_state(TASK_RUNNING); - list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); lockevent_inc(rwsem_wlock); - - return ret; + return sem; out_nolock: __set_current_state(TASK_RUNNING); raw_spin_lock_irq(&sem->wait_lock); - list_del(&waiter.list); - - if (unlikely(wstate == WRITER_HANDOFF)) - atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count); - - if (list_empty(&sem->wait_list)) - atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count); - else + rwsem_del_waiter(sem, &waiter); + if (!list_empty(&sem->wait_list)) rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); wake_up_q(&wake_q); lockevent_inc(rwsem_wlock_fail); - return ERR_PTR(-EINTR); } @@ -1239,17 +1239,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); - /* - * Optimize for the case when the rwsem is not locked at all. - */ - tmp = RWSEM_UNLOCKED_VALUE; - do { + tmp = atomic_long_read(&sem->count); + while (!(tmp & RWSEM_READ_FAILED_MASK)) { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, - tmp + RWSEM_READER_BIAS)) { + |
