summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 19:15:14 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 19:15:14 -0800
commitd080827f850ba4df5b955d5ca8c8c0fc92fe18c0 (patch)
tree37262315200bbbe50bdd64ce3011951a92855159
parentcbd88cd4c07f9361914ab7fd7e21c9227986fe68 (diff)
parent8b63b6bfc1a551acf154061699028c7032d7890c (diff)
downloadlinux-d080827f850ba4df5b955d5ca8c8c0fc92fe18c0.tar.gz
linux-d080827f850ba4df5b955d5ca8c8c0fc92fe18c0.tar.bz2
linux-d080827f850ba4df5b955d5ca8c8c0fc92fe18c0.zip
Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "The bulk of this has appeared in -next and independently received a build success notification from the kbuild robot. The 'for-4.5/block- dax' topic branch was rebased over the weekend to drop the "block device end-of-life" rework that Al would like to see re-implemented with a notifier, and to address bug reports against the badblocks integration. There is pending feedback against "libnvdimm: Add a poison list and export badblocks" received last week. Linda identified some localized fixups that we will handle incrementally. Summary: - Media error handling: The 'badblocks' implementation that originated in md-raid is up-levelled to a generic capability of a block device. This initial implementation is limited to being consulted in the pmem block-i/o path. Later, 'badblocks' will be consulted when creating dax mappings. - Raw block device dax: For virtualization and other cases that want large contiguous mappings of persistent memory, add the capability to dax-mmap a block device directly. - Increased /dev/mem restrictions: Add an option to treat all io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access while a driver is actively using an address range. This behavior is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be overridden by the existing "iomem=relaxed" kernel command line option. - Miscellaneous fixes include a 'pfn'-device huge page alignment fix, block device shutdown crash fix, and other small libnvdimm fixes" * tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits) block: kill disk_{check|set|clear|alloc}_badblocks libnvdimm, pmem: nvdimm_read_bytes() badblocks support pmem, dax: disable dax in the presence of bad blocks pmem: fail io-requests to known bad blocks libnvdimm: convert to statically allocated badblocks libnvdimm: don't fail init for full badblocks list block, badblocks: introduce devm_init_badblocks block: clarify badblocks lifetime badblocks: rename badblocks_free to badblocks_exit libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h libnvdimm: Add a poison list and export badblocks nfit_test: Enable DSMs for all test NFITs md: convert to use the generic badblocks code block: Add badblock management for gendisks badblocks: Add core badblock management code block: fix del_gendisk() vs blkdev_ioctl crash block: enable dax for raw block devices block: introduce bdev_file_inode() restrict /dev/mem to idle io memory ranges arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug ...
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/Kconfig.debug14
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/Kconfig.debug14
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/Kconfig.debug12
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/Kconfig.debug12
-rw-r--r--arch/tile/Kconfig4
-rw-r--r--arch/unicore32/Kconfig1
-rw-r--r--arch/unicore32/Kconfig.debug14
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.debug17
-rw-r--r--block/Makefile2
-rw-r--r--block/badblocks.c585
-rw-r--r--block/genhd.c30
-rw-r--r--block/ioctl.c71
-rw-r--r--drivers/acpi/nfit.c203
-rw-r--r--drivers/md/md.c516
-rw-r--r--drivers/md/md.h40
-rw-r--r--drivers/nvdimm/core.c169
-rw-r--r--drivers/nvdimm/namespace_devs.c132
-rw-r--r--drivers/nvdimm/nd-core.h2
-rw-r--r--drivers/nvdimm/nd.h16
-rw-r--r--drivers/nvdimm/pfn_devs.c93
-rw-r--r--drivers/nvdimm/pmem.c90
-rw-r--r--drivers/nvdimm/region_devs.c66
-rw-r--r--fs/block_dev.c122
-rw-r--r--include/linux/badblocks.h65
-rw-r--r--include/linux/fs.h11
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/libnvdimm.h1
-rw-r--r--include/uapi/linux/fs.h2
-rw-r--r--kernel/resource.c11
-rw-r--r--lib/Kconfig.debug39
-rw-r--r--tools/testing/nvdimm/Kbuild2
-rw-r--r--tools/testing/nvdimm/test/iomap.c93
-rw-r--r--tools/testing/nvdimm/test/nfit.c11
40 files changed, 1673 insertions, 796 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 426115f7cb63..84b1b21b08ae 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM
bool
default y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_CUSTOM_GPIO_H
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 259c0ca9c99a..e356357d86bb 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -15,20 +15,6 @@ config ARM_PTDUMP
kernel.
If in doubt, say "N"
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to memory mapped peripherals.
-
- If in doubt, say Y.
-
# RMK wants arm kernels compiled with frame pointers or stack unwinding.
# If you know what you are doing and are willing to live without stack
# traces, you can get a slightly smaller kernel by setting this option to
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d6ebffdc6bb1..4d5b416e2e4b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -3,6 +3,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 04fb73b973f1..e13c4bf84d9e 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -14,20 +14,6 @@ config ARM64_PTDUMP
kernel.
If in doubt, say "N"
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU
- help
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to memory mapped peripherals.
-
- If in doubt, say Y.
-
config PID_IN_CONTEXTIDR
bool "Write the current PID to the CONTEXTIDR register"
help
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 34aa19352dc1..03bfd6bf03e7 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -10,6 +10,7 @@ config FRV
select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CPU_DEVICES
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_WANT_IPC_PARSE_VERSION
select OLD_SIGSUSPEND3
select OLD_SIGACTION
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 9e44bbd8051e..836ac5a963c8 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -13,6 +13,7 @@ config M32R
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_ATOMIC64
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_USES_GETTIMEOFFSET
select MODULES_USE_ELF_RELA
select HAVE_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db49e0d796b1..85eabc49de61 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select ARCH_HAS_DMA_SET_COHERENT_MASK
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
config GENERIC_CSUM
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3a510f4a6b68..a0e44a9c456f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -335,18 +335,6 @@ config PPC_EARLY_DEBUG_CPM_ADDR
platform probing is done, all platforms selected must
share the same address.
-config STRICT_DEVMEM
- def_bool y
- prompt "Filter access to /dev/mem"
- help
- This option restricts access to /dev/mem. If this option is
- disabled, you allow userspace access to all memory, including
- kernel and userspace memory. Accidental memory access is likely
- to be disastrous.
- Memory access is required for experts who want to debug the kernel.
-
- If you are unsure, say Y.
-
config FAIL_IOMMU
bool "Fault-injection capability for IOMMU"
depends on FAULT_INJECTION
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6d0ae7d30724..24490344c30f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -66,6 +66,7 @@ config S390
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SG_CHAIN
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c56878e1245f..26c5d5beb4be 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
-config STRICT_DEVMEM
- def_bool y
- prompt "Filter access to /dev/mem"
- ---help---
- This option restricts access to /dev/mem. If this option is
- disabled, you allow userspace access to all memory, including
- kernel and userspace memory. Accidental memory access is likely
- to be disastrous.
- Memory access is required for experts who want to debug the kernel.
-
- If you are unsure, say Y.
-
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 8ec7a4599c08..be05b4ae02b6 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -19,6 +19,7 @@ config TILE
select VIRT_TO_BUS
select SYS_HYPERVISOR
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
@@ -116,9 +117,6 @@ config ARCH_DISCONTIGMEM_DEFAULT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config STRICT_DEVMEM
- def_bool y
-
# SMP is required for Tilera Linux.
config SMP
def_bool y
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c9faddc61100..5dc4c0a43ccd 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -1,5 +1,6 @@
config UNICORE32
def_bool y
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select HAVE_MEMBLOCK
diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug
index 1a3626239843..f075bbe1d46f 100644
--- a/arch/unicore32/Kconfig.debug
+++ b/arch/unicore32/Kconfig.debug
@@ -2,20 +2,6 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to memory mapped peripherals.
-
- If in doubt, say Y.
-
config EARLY_PRINTK
def_bool DEBUG_OCD
help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ccfededfe470..5d2293417946 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 110253ce83af..9b18ed97a8a2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,23 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
source "lib/Kconfig.debug"
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel. Note that with PAT support
- enabled, even in this case there are restrictions on /dev/mem
- use due to the cache aliasing requirements.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to PCI space and the BIOS code and data regions.
- This is sufficient for dosemu and X and all common users of
- /dev/mem.
-
- If in doubt, say Y.
-
config X86_VERBOSE_BOOTUP
bool "Enable verbose x86 bootup info messages"
default y
diff --git a/block/Makefile b/block/Makefile
index 00ecc97629db..db5f622c9d67 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
- partitions/
+ badblocks.o partitions/
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
diff --git a/block/badblocks.c b/block/badblocks.c
new file mode 100644
index 000000000000..7be53cb1cc3c
--- /dev/null
+++ b/block/badblocks.c
@@ -0,0 +1,585 @@
+/*
+ * Bad block management
+ *
+ * - Heavily based on MD badblocks code from Neil Brown
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/badblocks.h>
+#include <linux/seqlock.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+/**
+ * badblocks_check() - check a given range for bad sectors
+ * @bb: the badblocks structure that holds all badblock information
+ * @s: sector (start) at which to check for badblocks
+ * @sectors: number of sectors to check for badblocks
+ * @first_bad: pointer to store location of the first badblock
+ * @bad_sectors: pointer to store number of badblocks after @first_bad
+ *
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide. This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ * A 'shift' can be set so that larger blocks are tracked and
+ * consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so badblocks_check
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad. So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ *
+ * Return:
+ * 0: there are no known bad blocks in the range
+ * 1: there are known bad block which are all acknowledged
+ * -1: there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
+ sector_t *first_bad, int *bad_sectors)
+{
+ int hi;
+ int lo;
+ u64 *p = bb->page;
+ int rv;
+ sector_t target = s + sectors;
+ unsigned seq;
+
+ if (bb->shift > 0) {
+ /* round the start down, and the end up */
+ s >>= bb->shift;
+ target += (1<<bb->shift) - 1;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+ /* 'target' is now the first block after the bad range */
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+ lo = 0;
+ rv = 0;
+ hi = bb->count;
+
+ /* Binary search between lo and hi for 'target'
+ * i.e. for the last range that starts before 'target'
+ */
+ /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+ * are known not to be the last range before target.
+ * VARIANT: hi-lo is the number of possible
+ * ranges, and decreases until it reaches 1
+ */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+
+ if (a < target)
+ /* This could still be the one, earlier ranges
+ * could not.
+ */
+ lo = mid;
+ else
+ /* This and later ranges are definitely out. */
+ hi = mid;
+ }
+ /* 'lo' might be the last that started before target, but 'hi' isn't */
+ if (hi > lo) {
+ /* need to check all range that end after 's' to see if
+ * any are unacknowledged.
+ */
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ if (BB_OFFSET(p[lo]) < target) {
+ /* starts before the end, and finishes after
+ * the start, so they must overlap
+ */
+ if (rv != -1 && BB_ACK(p[lo]))
+ rv = 1;
+ else
+ rv = -1;
+ *first_bad = BB_OFFSET(p[lo]);
+ *bad_sectors = BB_LEN(p[lo]);
+ }
+ lo--;
+ }
+ }
+
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_check);
+
+/**
+ * badblocks_set() - Add a range of bad blocks to the table.
+ * @bb: the badblocks structure that holds all badblock information
+ * @s: first sector to mark as bad
+ * @sectors: number of sectors to mark as bad
+ * @acknowledged: weather to mark the bad sectors as acknowledged
+ *
+ * This might extend the table, or might contract it if two adjacent ranges
+ * can be merged. We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ *
+ * Return:
+ * 0: success
+ * 1: failed to set badblocks (out of space)
+ */
+int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
+ int acknowledged)
+{
+ u64 *p;
+ int lo, hi;
+ int rv = 0;
+ unsigned long flags;
+
+ if (bb->shift < 0)
+ /* badblocks are disabled */
+ return 0;
+
+ if (bb->shift) {
+ /* round the start down, and the end up */
+ sector_t next = s + sectors;
+
+ s >>= bb->shift;
+ next += (1<<bb->shift) - 1;
+ next >>= bb->shift;
+ sectors = next - s;
+ }
+
+ write_seqlock_irqsave(&bb->lock, flags);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts at-or-before 's' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+
+ if (a <= s)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi > lo && BB_OFFSET(p[lo]) > s)
+ hi = lo;
+
+ if (hi > lo) {
+ /* we found a range that might merge with the start
+ * of our new range
+ */
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t e = a + BB_LEN(p[lo]);
+ int ack = BB_ACK(p[lo]);
+
+ if (e >= s) {
+ /* Yes, we can merge with a previous range */
+ if (s == a && s + sectors >= e)
+ /* new range covers old */
+ ack = acknowledged;
+ else
+ ack = ack && acknowledged;
+
+ if (e < s + sectors)
+ e = s + sectors;
+ if (e - a <= BB_MAX_LEN) {
+ p[lo] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ /* does not all fit in one range,
+ * make p[lo] maximal
+ */
+ if (BB_LEN(p[lo]) != BB_MAX_LEN)
+ p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ }
+ }
+ if (sectors && hi < bb->count) {
+ /* 'hi' points to the first range that starts after 's'.
+ * Maybe we can merge with the start of that range
+ */
+ sector_t a = BB_OFFSET(p[hi]);
+ sector_t e = a + BB_LEN(p[hi]);
+ int ack = BB_ACK(p[hi]);
+
+ if (a <= s + sectors) {
+ /* merging is possible */
+ if (e <= s + sectors) {
+ /* full overlap */
+ e = s + sectors;
+ ack = acknowledged;
+ } else
+ ack = ack && acknowledged;
+
+ a = s;
+ if (e - a <= BB_MAX_LEN) {
+ p[hi] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ lo = hi;
+ hi++;
+ }
+ }
+ if (sectors == 0 && hi < bb->count) {
+ /* we might be able to combine lo and hi */
+ /* Note: 's' is at the end of 'lo' */
+ sector_t a = BB_OFFSET(p[hi]);
+ int lolen = BB_LEN(p[lo]);
+ int hilen = BB_LEN(p[hi]);
+ int newlen = lolen + hilen - (s - a);
+
+ if (s >= a && newlen < BB_MAX_LEN) {
+ /* yes, we can combine them */
+ int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+
+ p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+ memmove(p + hi, p + hi + 1,
+ (bb->count - hi - 1) * 8);
+ bb->count--;
+ }
+ }
+ while (sectors) {
+ /* didn't merge (it all).
+ * Need to add a range just before 'hi'
+ */
+ if (bb->count >= MAX_BADBLOCKS) {
+ /* No room for more */
+ rv = 1;
+ break;
+ } else {
+ int this_sectors = sectors;
+
+ memmove(p + hi + 1, p + hi,
+ (bb->count - hi) * 8);
+ bb->count++;
+
+ if (this_sectors > BB_MAX_LEN)
+ this_sectors = BB_MAX_LEN;
+ p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+ sectors -= this_sectors;
+ s += this_sectors;
+ }
+ }
+
+ bb->changed = 1;
+ if (!acknowledged)
+ bb->unacked_exist = 1;
+ write_sequnlock_irqrestore(&bb->lock, flags);
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_set);
+
+/**
+ * badblocks_clear() - Remove a range of bad blocks to the table.
+ * @bb: the badblocks structure that holds all badblock information
+ * @s: first sector to mark as bad
+ * @sectors: number of sectors to mark as bad
+ *
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail. So if the table becomes full, we just
+ * drop the remove request.
+ *
+ * Return:
+ * 0: success
+ * 1: failed to clear badblocks
+ */
+int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
+{
+ u64 *p;
+ int lo, hi;
+ sector_t target = s + sectors;
+ int rv = 0;
+
+ if (bb->shift > 0) {
+ /* When clearing we round the start up and the end down.
+ * This should not matter as the shift should align with
+ * the block size and no rounding should ever be needed.
+ * However it is better the think a block is bad when it
+ * isn't than to think a block is not bad when it is.
+ */
+ s += (1<<bb->shift) - 1;
+ s >>= bb->shift;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+
+ write_seqlock_irq(&bb->lock);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts before 'target' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+
+ if (a < target)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi > lo) {
+ /* p[lo] is the last range that could overlap the
+ * current range. Earlier ranges could also overlap,
+ * but only this one can overlap the end of the range.
+ */
+ if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+ /* Partial overlap, leave the tail of this range */
+ int ack = BB_ACK(p[lo]);
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t end = a + BB_LEN(p[lo]);
+
+ if (a < s) {
+ /* we need to split this range */
+ if (bb->count >= MAX_BADBLOCKS) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+ bb->count++;
+ p[lo] = BB_MAKE(a, s-a, ack);
+ lo++;
+ }
+ p[lo] = BB_MAKE(target, end - target, ack);
+ /* there is no longer an overlap */
+ hi = lo;
+ lo--;
+ }
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ /* This range does overlap */
+ if (BB_OFFSET(p[lo]) < s) {
+ /* Keep the early parts of this range. */
+ int ack = BB_ACK(p[lo]);
+ sector_t start = BB_OFFSET(p[lo]);
+
+ p[lo] = BB_MAKE(start, s - start, ack);
+ /* now low doesn't overlap, so.. */
+ break;
+ }
+ lo--;
+ }
+ /* 'lo' is strictly before, 'hi' is strictly after,
+ * anything between needs to be discarded
+ */
+ if (hi - lo > 1) {
+ memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+ bb->count -= (hi - lo - 1);
+ }
+ }
+
+ bb->changed = 1;
+out:
+ write_sequnlock_irq(&bb->lock);
+ return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_clear);
+
+/**
+ * ack_all_badblocks() - Acknowledge all bad blocks in a list.
+ * @bb: the badblocks structure that holds all badblock information
+ *
+ * This only succeeds if ->changed is clear. It is used by
+ * in-kernel metadata updates
+ */
+void ack_all_badblocks(struct badblocks *bb)
+{
+ if (bb->page == NULL || bb->changed)
+ /* no point even trying */
+ return;