summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-07-30 11:15:13 +0200
committerIngo Molnar <mingo@kernel.org>2017-07-30 11:15:13 +0200
commitf5db340f19f14a8df9dfd22d71fba1513e9f1f7e (patch)
tree131d3345bc987aee3c922624de816492e7f323a4 /lib
parentee438ec8f33c5af0d4a4ffb935c5b9272e8c2680 (diff)
parent38115f2f8cec8087d558c062e779c443a01f87d6 (diff)
downloadlinux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.tar.gz
linux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.tar.bz2
linux-f5db340f19f14a8df9dfd22d71fba1513e9f1f7e.zip
Merge branch 'perf/urgent' into perf/core, to pick up latest fixes and refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig11
-rw-r--r--lib/Kconfig.debug115
-rw-r--r--lib/Kconfig.kgdb2
-rw-r--r--lib/Makefile5
-rw-r--r--lib/atomic64_test.c7
-rw-r--r--lib/bitmap.c8
-rw-r--r--lib/bsearch.c22
-rw-r--r--lib/crc4.c46
-rw-r--r--lib/dma-noop.c21
-rw-r--r--lib/dma-virt.c12
-rw-r--r--lib/errseq.c208
-rw-r--r--lib/extable.c41
-rw-r--r--lib/fault-inject.c10
-rw-r--r--lib/flex_proportions.c6
-rw-r--r--lib/interval_tree_test.c93
-rw-r--r--lib/iov_iter.c116
-rw-r--r--lib/kobject_uevent.c167
-rw-r--r--lib/kstrtox.c12
-rw-r--r--lib/nlattr.c11
-rw-r--r--lib/nmi_backtrace.c3
-rw-r--r--lib/percpu_counter.c11
-rw-r--r--lib/raid6/mktables.c20
-rw-r--r--lib/rhashtable.c9
-rw-r--r--lib/string.c7
-rw-r--r--lib/strnlen_user.c34
-rw-r--r--lib/test_bitmap.c29
-rw-r--r--lib/test_bpf.c59
-rw-r--r--lib/test_kmod.c1246
-rw-r--r--lib/test_sysctl.c148
-rw-r--r--lib/usercopy.c10
-rw-r--r--lib/vsprintf.c136
31 files changed, 2411 insertions, 214 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 0c8b78a9ae2e..6762529ad9e4 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -158,6 +158,14 @@ config CRC32_BIT
endchoice
+config CRC4
+ tristate "CRC4 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC4 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC4
+ functions require M here.
+
config CRC7
tristate "CRC7 functions"
help
@@ -548,6 +556,9 @@ config ARCH_HAS_SG_CHAIN
config ARCH_HAS_PMEM_API
bool
+config ARCH_HAS_UACCESS_FLUSHCACHE
+ bool
+
config ARCH_HAS_MMIO_FLUSH
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9c5d40a50930..98fe715522e8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -286,7 +286,7 @@ config DEBUG_FS
write to these files.
For detailed documentation on the debugfs API, see
- Documentation/DocBook/filesystems.
+ Documentation/filesystems/.
If unsure, say N.
@@ -778,34 +778,45 @@ config DEBUG_SHIRQ
menu "Debug Lockups and Hangs"
config LOCKUP_DETECTOR
- bool "Detect Hard and Soft Lockups"
+ bool
+
+config SOFTLOCKUP_DETECTOR
+ bool "Detect Soft Lockups"
depends on DEBUG_KERNEL && !S390
+ select LOCKUP_DETECTOR
help
Say Y here to enable the kernel to act as a watchdog to detect
- hard and soft lockups.
+ soft lockups.
Softlockups are bugs that cause the kernel to loop in kernel
mode for more than 20 seconds, without giving other tasks a
chance to run. The current stack trace is displayed upon
detection and the system will stay locked up.
+config HARDLOCKUP_DETECTOR_PERF
+ bool
+ select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+ bool "Detect Hard Lockups"
+ depends on DEBUG_KERNEL && !S390
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select LOCKUP_DETECTOR
+ select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+ select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+ help
+ Say Y here to enable the kernel to act as a watchdog to detect
+ hard lockups.
+
Hardlockups are bugs that cause the CPU to loop in kernel mode
for more than 10 seconds, without letting other interrupts have a
chance to run. The current stack trace is displayed upon detection
and the system will stay locked up.
- The overhead should be minimal. A periodic hrtimer runs to
- generate interrupts and kick the watchdog task every 4 seconds.
- An NMI is generated every 10 seconds or so to check for hardlockups.
-
- The frequency of hrtimer and NMI events and the soft and hard lockup
- thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
- def_bool y
- depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
- depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR
@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
- depends on LOCKUP_DETECTOR
+ depends on SOFTLOCKUP_DETECTOR
help
Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel
@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
int
- depends on LOCKUP_DETECTOR
+ depends on SOFTLOCKUP_DETECTOR
range 0 1
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
config DETECT_HUNG_TASK
bool "Detect Hung Tasks"
depends on DEBUG_KERNEL
- default LOCKUP_DETECTOR
+ default SOFTLOCKUP_DETECTOR
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
@@ -1212,6 +1223,34 @@ config STACKTRACE
It is also used by various kernel debugging features that require
stack trace generation.
+config WARN_ALL_UNSEEDED_RANDOM
+ bool "Warn for all uses of unseeded randomness"
+ default n
+ help
+ Some parts of the kernel contain bugs relating to their use of
+ cryptographically secure random numbers before it's actually possible
+ to generate those numbers securely. This setting ensures that these
+ flaws don't go unnoticed, by enabling a message, should this ever
+ occur. This will allow people with obscure setups to know when things
+ are going wrong, so that they might contact developers about fixing
+ it.
+
+ Unfortunately, on some models of some architectures getting
+ a fully seeded CRNG is extremely difficult, and so this can
+ result in dmesg getting spammed for a surprisingly long
+ time. This is really bad from a security perspective, and
+ so architecture maintainers really need to do what they can
+ to get the CRNG seeded sooner after the system is booted.
+ However, since users can not do anything actionble to
+ address this, by default the kernel will issue only a single
+ warning for the first use of unseeded randomness.
+
+ Say Y here if you want to receive warnings for all uses of
+ unseeded randomness. This will be of use primarily for
+ those developers interersted in improving the security of
+ Linux kernels running on their architecture (or
+ subarchitecture).
+
config DEBUG_KOBJECT
bool "kobject debugging"
depends on DEBUG_KERNEL
@@ -1594,7 +1633,7 @@ config RBTREE_TEST
config INTERVAL_TREE_TEST
tristate "Interval tree test"
- depends on m && DEBUG_KERNEL
+ depends on DEBUG_KERNEL
select INTERVAL_TREE
help
A benchmark measuring the performance of the interval tree library
@@ -1785,6 +1824,17 @@ config TEST_FIRMWARE
If unsure, say N.
+config TEST_SYSCTL
+ tristate "sysctl test driver"
+ default n
+ depends on PROC_SYSCTL
+ help
+ This builds the "test_sysctl" module. This driver enables to test the
+ proc sysctl interfaces available to drivers safely without affecting
+ production knobs which might alter system functionality.
+
+ If unsure, say N.
+
config TEST_UDELAY
tristate "udelay test driver"
default n
@@ -1825,6 +1875,33 @@ config BUG_ON_DATA_CORRUPTION
If unsure, say N.
+config TEST_KMOD
+ tristate "kmod stress tester"
+ default n
+ depends on m
+ depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS
+ depends on NETDEVICES && NET_CORE && INET # for TUN
+ select TEST_LKM
+ select XFS_FS
+ select TUN
+ select BTRFS_FS
+ help
+ Test the kernel's module loading mechanism: kmod. kmod implements
+ support to load modules using the Linux kernel's usermode helper.
+ This test provides a series of tests against kmod.
+
+ Although technically you can either build test_kmod as a module or
+ into the kernel we disallow building it into the kernel since
+ it stress tests request_module() and this will very likely cause
+ some issues by taking over precious threads available from other
+ module load requests, ultimately this could be fatal.
+
+ To run tests run:
+
+ tools/testing/selftests/kmod/kmod.sh --help
+
+ If unsure, say N.
+
source "samples/Kconfig"
source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 533f912638ed..ab4ff0eea776 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -13,7 +13,7 @@ menuconfig KGDB
CONFIG_FRAME_POINTER to aid in producing more reliable stack
backtraces in the external debugger. Documentation of
kernel debugger is available at http://kgdb.sourceforge.net
- as well as in DocBook form in Documentation/DocBook/. If
+ as well as in Documentation/dev-tools/kgdb.rst. If
unsure, say N.
if KGDB
diff --git a/lib/Makefile b/lib/Makefile
index 07fbe6a75692..40c18372b301 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -38,7 +38,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
- once.o refcount.o usercopy.o
+ once.o refcount.o usercopy.o errseq.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += hexdump.o
@@ -46,6 +46,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
@@ -60,6 +61,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
+obj-$(CONFIG_TEST_KMOD) += test_kmod.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
@@ -96,6 +98,7 @@ obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
+obj-$(CONFIG_CRC4) += crc4.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index fd70c0e0e673..62ab629f51ca 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -153,8 +153,10 @@ static __init void test_atomic64(void)
long long v0 = 0xaaa31337c001d00dLL;
long long v1 = 0xdeadbeefdeafcafeLL;
long long v2 = 0xfaceabadf00df001LL;
+ long long v3 = 0x8000000000000000LL;
long long onestwos = 0x1111111122222222LL;
long long one = 1LL;
+ int r_int;
atomic64_t v = ATOMIC64_INIT(v0);
long long r = v0;
@@ -240,6 +242,11 @@ static __init void test_atomic64(void)
BUG_ON(!atomic64_inc_not_zero(&v));
r += one;
BUG_ON(v.counter != r);
+
+ /* Confirm the return value fits in an int, even if the value doesn't */
+ INIT(v3);
+ r_int = atomic64_inc_not_zero(&v);
+ BUG_ON(!r_int);
}
static __init int test_atomics_init(void)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 08c6ef3a2b6f..9a532805364b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
}
EXPORT_SYMBOL(__bitmap_weight);
-void bitmap_set(unsigned long *map, unsigned int start, int len)
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
const unsigned int size = start + len;
@@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
*p |= mask_to_set;
}
}
-EXPORT_SYMBOL(bitmap_set);
+EXPORT_SYMBOL(__bitmap_set);
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
const unsigned int size = start + len;
@@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
*p &= ~mask_to_clear;
}
}
-EXPORT_SYMBOL(bitmap_clear);
+EXPORT_SYMBOL(__bitmap_clear);
/**
* bitmap_find_next_zero_area_off - find a contiguous aligned zero area
diff --git a/lib/bsearch.c b/lib/bsearch.c
index e33c179089db..18b445b010c3 100644
--- a/lib/bsearch.c
+++ b/lib/bsearch.c
@@ -33,19 +33,21 @@
void *bsearch(const void *key, const void *base, size_t num, size_t size,
int (*cmp)(const void *key, const void *elt))
{
- size_t start = 0, end = num;
+ const char *pivot;
int result;
- while (start < end) {
- size_t mid = start + (end - start) / 2;
+ while (num > 0) {
+ pivot = base + (num >> 1) * size;
+ result = cmp(key, pivot);
- result = cmp(key, base + mid * size);
- if (result < 0)
- end = mid;
- else if (result > 0)
- start = mid + 1;
- else
- return (void *)base + mid * size;
+ if (result == 0)
+ return (void *)pivot;
+
+ if (result > 0) {
+ base = pivot + size;
+ num--;
+ }
+ num >>= 1;
}
return NULL;
diff --git a/lib/crc4.c b/lib/crc4.c
new file mode 100644
index 000000000000..cf6db46661be
--- /dev/null
+++ b/lib/crc4.c
@@ -0,0 +1,46 @@
+/*
+ * crc4.c - simple crc-4 calculations.
+ *
+ * This source code is licensed under the GNU General Public License, Version
+ * 2. See the file COPYING for more details.
+ */
+
+#include <linux/crc4.h>
+#include <linux/module.h>
+
+static const uint8_t crc4_tab[] = {
+ 0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2,
+ 0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3,
+};
+
+/**
+ * crc4 - calculate the 4-bit crc of a value.
+ * @crc: starting crc4
+ * @x: value to checksum
+ * @bits: number of bits in @x to checksum
+ *
+ * Returns the crc4 value of @x, using polynomial 0b10111.
+ *
+ * The @x value is treated as left-aligned, and bits above @bits are ignored
+ * in the crc calculations.
+ */
+uint8_t crc4(uint8_t c, uint64_t x, int bits)
+{
+ int i;
+
+ /* mask off anything above the top bit */
+ x &= (1ull << bits) - 1;
+
+ /* Align to 4-bits */
+ bits = (bits + 3) & ~0x3;
+
+ /* Calculate crc4 over four-bit nibbles, starting at the MSbit */
+ for (i = bits - 4; i >= 0; i -= 4)
+ c = crc4_tab[c ^ ((x >> i) & 0xf)];
+
+ return c;
+}
+EXPORT_SYMBOL_GPL(crc4);
+
+MODULE_DESCRIPTION("CRC4 calculations");
+MODULE_LICENSE("GPL");
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
index de26c8b68f34..acc4190e2731 100644
--- a/lib/dma-noop.c
+++ b/lib/dma-noop.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
+#include <linux/pfn.h>
static void *dma_noop_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
@@ -16,7 +17,8 @@ static void *dma_noop_alloc(struct device *dev, size_t size,
ret = (void *)__get_free_pages(gfp, get_order(size));
if (ret)
- *dma_handle = virt_to_phys(ret);
+ *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
+
return ret;
}
@@ -32,7 +34,7 @@ static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- return page_to_phys(page) + offset;
+ return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset);
}
static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
@@ -43,34 +45,23 @@ static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nent
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
+ dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
void *va;
BUG_ON(!sg_page(sg));
va = sg_virt(sg);
- sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
+ sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset;
sg_dma_len(sg) = sg->length;
}
return nents;
}
-static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
-static int dma_noop_supported(struct device *dev, u64 mask)
-{
- return 1;
-}
-
const struct dma_map_ops dma_noop_ops = {
.alloc = dma_noop_alloc,
.free = dma_noop_free,
.map_page = dma_noop_map_page,
.map_sg = dma_noop_map_sg,
- .mapping_error = dma_noop_mapping_error,
- .dma_supported = dma_noop_supported,
};
EXPORT_SYMBOL(dma_noop_ops);
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
index dcd4df1f7174..5c4f11329721 100644
--- a/lib/dma-virt.c
+++ b/lib/dma-virt.c
@@ -51,22 +51,10 @@ static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
return nents;
}
-static int dma_virt_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return false;
-}
-
-static int dma_virt_supported(struct device *dev, u64 mask)
-{
- return true;
-}
-
const struct dma_map_ops dma_virt_ops = {
.alloc = dma_virt_alloc,
.free = dma_virt_free,
.map_page = dma_virt_map_page,
.map_sg = dma_virt_map_sg,
- .mapping_error = dma_virt_mapping_error,
- .dma_supported = dma_virt_supported,
};
EXPORT_SYMBOL(dma_virt_ops);
diff --git a/lib/errseq.c b/lib/errseq.c
new file mode 100644
index 000000000000..841fa24e6e00
--- /dev/null
+++ b/lib/errseq.c
@@ -0,0 +1,208 @@
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/atomic.h>
+#include <linux/errseq.h>
+
+/*
+ * An errseq_t is a way of recording errors in one place, and allowing any
+ * number of "subscribers" to tell whether it has changed since a previous
+ * point where it was sampled.
+ *
+ * It's implemented as an unsigned 32-bit value. The low order bits are
+ * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
+ * are used as a counter. This is done with atomics instead of locking so that
+ * these functions can be called from any context.
+ *
+ * The general idea is for consumers to sample an errseq_t value. That value
+ * can later be used to tell whether any new errors have occurred since that
+ * sampling was done.
+ *
+ * Note that there is a risk of collisions if new errors are being recorded
+ * frequently, since we have so few bits to use as a counter.
+ *
+ * To mitigate this, one bit is used as a flag to tell whether the value has
+ * been sampled since a new value was recorded. That allows us to avoid bumping
+ * the counter if no one has sampled it since the last time an error was
+ * recorded.
+ *
+ * A new errseq_t should always be zeroed out. A errseq_t value of all zeroes
+ * is the special (but common) case where there has never been an error. An all
+ * zero value thus serves as the "epoch" if one wishes to know whether there
+ * has ever been an error set since it was first initialized.
+ */
+
+/* The low bits are designated for error code (max of MAX_ERRNO) */
+#define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1)
+
+/* This bit is used as a flag to indicate whether the value has been seen */
+#define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT)
+
+/* The lowest bit of the counter */
+#define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1))
+
+/**
+ * __errseq_set - set a errseq_t for later reporting
+ * @eseq: errseq_t field that should be set
+ * @err: error to set
+ *
+ * This function sets the error in *eseq, and increments the sequence counter
+ * if the last sequence was sampled at some point in the past.
+ *
+ * Any error set will always overwrite an existing error.
+ *
+ * Most callers will want to use the errseq_set inline wrapper to efficiently
+ * handle the common case where err is 0.
+ *
+ * We do return an errseq_t here, primarily for debugging purposes. The return
+ * value should not be used as a previously sampled value in later calls as it
+ * will not have the SEEN flag set.
+ */
+errseq_t __errseq_set(errseq_t *eseq, int err)
+{
+ errseq_t cur, old;
+
+ /* MAX_ERRNO must be able to serve as a mask */
+ BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);
+
+ /*
+ * Ensure the error code actually fits where we want it to go. If it
+ * doesn't then just throw a warning and don't record anything. We
+ * also don't accept zero here as that would effectively clear a
+ * previous error.
+ */
+ old = READ_ONCE(*eseq);
+
+ if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO),
+ "err = %d\n", err))
+ return old;
+
+ for (;;) {
+ errseq_t new;
+
+ /* Clear out error bits and set new error */
+ new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err;
+
+ /* Only increment if someone has looked at it */
+ if (old & ERRSEQ_SEEN)
+ new += ERRSEQ_CTR_INC;
+
+ /* If there would be no change, then call it done */
+ if (new == old) {
+ cur = new;
+ break;
+ }
+
+ /* Try to swap the new value into place */
+ cur = cmpxchg(eseq, old, new);
+
+ /*
+ * Call it success if we did the swap or someone else beat us
+ * to it for the same value.
+ */
+ if (likely(cur == old || cur == new))
+ break;
+
+ /* Raced with an update, try again */
+ old = cur;
+ }
+ return cur;
+}
+EXPORT_SYMBOL(__errseq_set);
+
+/**
+ * errseq_sample - grab current errseq_t value
+ * @eseq: pointer to errseq_t to be sampled
+ *
+ * This function allows callers to sample an errseq_t value, marking it as
+ * "seen" if required.
+ */
+errseq_t errseq_sample(errseq_t *eseq)
+{
+ errseq_t old = READ_ONCE(*eseq);
+ errseq_t new = old;
+
+ /*
+ * For the common case of no errors ever having been set, we can skip
+ * marking the SEEN bit. Once an error has been set, the value will
+ * never go back to zero.
+ */
+ if (old != 0) {
+ new |= ERRSEQ_SEEN;
+ if (old != new)
+ cmpxchg(eseq, old, new);
+ }
+ return new;
+}
+EXPORT_SYMBOL(errseq_sample);
+
+/**
+ * errseq_check - has an error occurred since a particular sample point?
+ * @eseq: pointer to errseq_t value to be checked
+ * @since: previously-sampled errseq_t from which to check
+ *
+ * Grab the value that eseq points to, and see if it has changed "since"
+ * the given value was sampled. The "since" value is not advanced, so there
+ * is no need to mark the value as seen.
+ *
+ * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
+ */
+int errseq_check(errseq_t *eseq, errseq_t since)
+{
+ errseq_t cur = READ_ONCE(*eseq);
+
+ if (likely(cur == since))
+ return 0;
+ return -(cur & MAX_ERRNO);
+}
+EXPORT_SYMBOL(errseq_check);
+
+/**
+ * errseq_check_and_advance - check an errseq_t and advance to current value
+ * @eseq: pointer to value being checked and reported
+ * @since: pointer to previously-sampled errseq_t to check against and advance
+ *
+ * Grab the eseq value, and see whether it matches the value that "since"
+ * points to. If it does, then just return 0.
+ *
+ * If it doesn't, then the value has changed. Set the "seen" flag, and try to
+ * swap it into place as the new eseq value. Then, set that value as the new
+ * "since" value, and return whatever the error portion is set to.
+ *
+ * Note that no locking is provided here for concurrent updates to the "since"
+ * value. The caller must provide that if necessary. Because of this, callers
+ * may want to do a lockless errseq_check before taking the lock and calling
+ * this.
+ */
+int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
+{
+ int err = 0;
+ errseq_t old, new;
+
+ /*
+ * Most callers will want to use the inline wrapper to check this,
+ * so that the common case of no error is handled without needing
+ * to take the lock that protects the "since" value.
+ */
+ old = READ_ONCE(*eseq);
+ if (old != *since) {
+ /*
+ * Set the flag and try to swap it into place if it has
+ * changed.
+ *
+ * We don't care about the outcome of the swap here. If the
+ * swap doesn't occur, then it has either been updated by a
+ * writer who is altering the value in some way (updating
+ * counter or resetting the error), or another reader who is
+ * just setting the "seen" flag. Either outcome is OK, and we
+ * can advance "since" and return an error based on what we
+ * have.
+ */
+ new = old | ERRSEQ_SEEN;
+ if (new != old)
+ cmpxchg(eseq, old, new);
+ *since = new;
+ err = -(new & MAX_ERRNO);
+ }
+ return err;
+}
+EXPORT_SYMBOL(errseq_check_and_advance);
diff --git a/lib/extable.c b/lib/extable.c
index 62968daa66a9..f54996fdd0b8 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -9,6 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/bsearch.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sort.h>
@@ -51,7 +52,7 @@ static void swap_ex(void *a, void *b, int size)
* This is used both for the kernel exception table and for
* the exception tables of modules that get loaded.
*/
-static int cmp_ex(const void *a, const void *b)
+static int cmp_ex_sort(const void *a, const void *b)
{
const struct exception_table_entry *x = a, *y = b;
@@ -67,7 +68,7 @@ void sort_extable(struct exception_table_entry *start,
struct exception_table_entry *finish)
{
sort(start, finish - start, sizeof(struct exception_table_entry),
- cmp_ex, swap_ex);
+ cmp_ex_sort, swap_ex);
}
#ifdef CONFIG_MODULES
@@ -93,6 +94,20 @@ void trim_init_extable(struct module *m)
#endif /* !ARCH_HAS_SORT_EXTABLE */
#ifndef ARCH_HAS_SEARCH_EXTABLE
+
+static int cmp_ex_search(const void *key, const void *elt)
+{
+ const struct exception_table_entry *_elt = elt;
+ unsigned long _key = *(unsigned long *)key;
+
+ /* avoid overflow */
+ if (_key > ex_to_insn(_elt))
+ return 1;
+ if (_key < ex_to_insn(_elt))
+ return -1;
+ return 0;
+}
+
/*
* Search one exception table for an entry corresponding to the
* given instruction address, and return the address of the entry,
@@ -101,25 +116,11 @@ void trim_init_extable(struct module *m)
* already sorted.
*/
const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+ const size_t num,
unsigned long value)
{
- while (first <= last) {
- const struct exception_table_entry *mid;
-
- mid = ((last - first) >> 1) + first;
- /*
- * careful, the distance between value and insn
- * can be larger than MAX_LONG:
- */
- if (ex_to_insn(mid) < value)
- first = mid + 1;
- else if (ex_to_insn(mid) > value)
- last = mid - 1;
- else
- return mid;
- }
- return NULL;
+ return bsearch(&value, base, num,
+ sizeof(struct exception_table_entry), cmp_ex_search);
}
#endif
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4ff157159a0d..7d315fdb9f13 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -107,6 +107,15 @@ static inline bool fail_stacktrace(struct fault_attr *attr)