// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
*
* Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
* (C) Copyright 2003 Red Hat Inc, All Rights Reserved
*
* Removed page pinning, fix privately mapped COW pages and other cleanups
* (C) Copyright 2003, 2004 Jamie Lokier
*
* Robust futex support started by Ingo Molnar
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
*
* PI-futex support started by Ingo Molnar and Thomas Gleixner
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* PRIVATE futexes by Eric Dumazet
* Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
*
* Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
* Copyright (C) IBM Corporation, 2009
* Thanks to Thomas Gleixner for conceptual design and careful reviews.
*
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
*/
#include <linux/compat.h>
#include <linux/jhash.h>
#include <linux/pagemap.h>
#include <linux/memblock.h>
#include <linux/fault-inject.h>
#include <linux/slab.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
/*
* The base of the bucket array and its size are always used together
* (after initialization only in futex_hash()), so ensure that they
* reside in the same cacheline.
*/
static struct {
struct futex_hash_bucket *queues;
unsigned long hashsize;
} __futex_data __read_mostly __aligned(2*sizeof(long));
#define futex_queues (__futex_data.queues)
#define futex_hashsize (__futex_data.hashsize)
/*
* Fault injections for futexes.
*/
#ifdef CONFIG_FAIL_FUTEX
static struct {
struct fault_attr attr;
bool ignore_private;
} fail_futex = {
.attr = FAULT_ATTR_INITIALIZER,
.ignore_private = false,
};
static int __init setup_fail_futex(char *str)
{
return setup_fault_attr(&fail_futex.attr, str);
}
__setup("fail_futex=", setup_fail_futex);
bool should_fail_futex(bool fshared)
{
if (fail_futex.ignore_private && !fshared)
return false;
return should_fail(&fail_futex.attr, 1);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init fail_futex_debugfs(void)
{
umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
dir = fault_create_debugfs_attr("fail_futex", NULL,
&fail_futex.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
debugfs_create_bool("ignore-private", mode, dir,
&fail_futex.ignore_private);
return 0;
}
late_initcall(fail_futex_debugfs);
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
#endif /* CONFIG_FAIL_FUTEX */
/**
* futex_hash - Return the hash bucket in the global hash
* @key: Pointer to the futex key for which the hash is calculated
*
* We hash on the keys returned from get_futex_key (see below) and return the
* corresponding hash bucket in the global hash.
*/
struct futex_hash_bucket *futex_hash(union futex_key *key)
{
u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
key->both.offset);
return &futex_queues[hash & (futex_hashsize - 1)];
}
/**
* futex_setup_timer - set up the sleeping hrtimer.
* @time: ptr to the given timeout value
* @timeout: the hrtimer_sleeper structure to be set up
* @flags: futex flags
* @range_ns: optional range in ns
*
* Return: Initialized hrtimer_sleeper structure or NULL if no timeout
* value given
*/
struct hrtimer_sleeper *
futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns)
{
if (!time)
return NULL;
hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
CLOCK_REALTIME : CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
/*
* If range_ns is 0, calling hrtimer_set_expires_range_ns() is
* effectively the same as calling hrtimer_set_expires().
*/
hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
return timeout;
}
/*
* Generate a machine wide unique identifier for this inode.
*
* This relies on u64 not wrapping in the life-time of the machine; which with
* 1ns resolution means almost 585 years.
*
* This further relies on the fact that a well formed program will not unmap
* the file while it has a (shared) futex waiting on it. This mapping will have
* a file reference which pins the mount and inode.
*
* If for some reason an inode gets evicted and read back in again, it will get
* a new sequence number and will _NOT_ match, even though it is the exact same
* file.
*
* It is important that futex_match() will never have a false-positive, esp.
* for PI futexes that can mess up the state. The above argues that false-negatives
* are only possible for malformed programs.
*/
static u64 get_inode_sequence_number(struct inode *inode)
{
static atomic64_t i_seq;
u64 old;
/* Does the inode already have a sequence number? */
old = atomic64_read(&inode->i_sequence);
if (likely(old))
return old;
for (;;) {
u64 new = atomic64_add_return(1, &i_seq);
if (WARN_ON_ONCE(!new))
continue;
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
if (old)
return old;
return new;
}
}
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
* @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
* @key: address where result is stored.
* @rw: mapping needs to be read/write (values: FUTEX_READ,
*