summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 13:10:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 13:10:57 -0700
commit89fd915c402113528750353ad6de9ea68a787e5c (patch)
tree03ba8e8e6400e43ef518393259941eae39ee32d8
parent66c9457df3926efff65529dab1a8c742df756790 (diff)
parent04c3c982fcc0151ed3574d7ae4f1e62278054d72 (diff)
downloadlinux-89fd915c402113528750353ad6de9ea68a787e5c.tar.gz
linux-89fd915c402113528750353ad6de9ea68a787e5c.tar.bz2
linux-89fd915c402113528750353ad6de9ea68a787e5c.zip
Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm from Dan Williams: "A rework of media error handling in the BTT driver and other updates. It has appeared in a few -next releases and collected some late- breaking build-error and warning fixups as a result. Summary: - Media error handling support in the Block Translation Table (BTT) driver is reworked to address sleeping-while-atomic locking and memory-allocation-context conflicts. - The dax_device lookup overhead for xfs and ext4 is moved out of the iomap hot-path to a mount-time lookup. - A new 'ecc_unit_size' sysfs attribute is added to advertise the read-modify-write boundary property of a persistent memory range. - Preparatory fix-ups for arm and powerpc pmem support are included along with other miscellaneous fixes" * tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (26 commits) libnvdimm, btt: fix format string warnings libnvdimm, btt: clean up warning and error messages ext4: fix null pointer dereference on sbi libnvdimm, nfit: move the check on nd_reserved2 to the endpoint dax: fix FS_DAX=n BLOCK=y compilation libnvdimm: fix integer overflow static analysis warning libnvdimm, nd_blk: remove mmio_flush_range() libnvdimm, btt: rework error clearing libnvdimm: fix potential deadlock while clearing errors libnvdimm, btt: cache sector_size in arena_info libnvdimm, btt: ensure that flags were also unchanged during a map_read libnvdimm, btt: refactor map entry operations with macros libnvdimm, btt: fix a missed NVDIMM_IO_ATOMIC case in the write path libnvdimm, nfit: export an 'ecc_unit_size' sysfs attribute ext4: perform dax_device lookup at mount ext2: perform dax_device lookup at mount xfs: perform dax_device lookup at mount dax: introduce a fs_dax_get_by_bdev() helper libnvdimm, btt: check memory allocation failure libnvdimm, label: fix index block size calculation ...
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/cacheflush.h2
-rw-r--r--drivers/acpi/nfit/Kconfig2
-rw-r--r--drivers/acpi/nfit/core.c50
-rw-r--r--drivers/dax/super.c12
-rw-r--r--drivers/nvdimm/btt.c197
-rw-r--r--drivers/nvdimm/btt.h11
-rw-r--r--drivers/nvdimm/btt_devs.c4
-rw-r--r--drivers/nvdimm/bus.c27
-rw-r--r--drivers/nvdimm/claim.c9
-rw-r--r--drivers/nvdimm/core.c10
-rw-r--r--drivers/nvdimm/label.c30
-rw-r--r--drivers/nvdimm/namespace_devs.c6
-rw-r--r--drivers/nvdimm/nd.h16
-rw-r--r--drivers/nvdimm/pfn_devs.c53
-rw-r--r--drivers/nvdimm/pmem.h14
-rw-r--r--drivers/nvdimm/region_devs.c6
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/xfs/xfs_aops.c13
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_buf.c4
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_super.c25
-rw-r--r--include/linux/dax.h6
-rw-r--r--include/linux/libnvdimm.h16
-rw-r--r--lib/Kconfig3
-rw-r--r--tools/testing/nvdimm/test/nfit.c4
33 files changed, 397 insertions, 170 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a3e6e6136a47..971feac13506 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -53,7 +53,6 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
- select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64
# Causing hangs/crashes, see the commit that added this change for details.
select ARCH_HAS_REFCOUNT if BROKEN
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 8b4140f6724f..cb9a1af109b4 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -7,6 +7,4 @@
void clflush_cache_range(void *addr, unsigned int size);
-#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig
index 6d3351452ea2..929ba4da0b30 100644
--- a/drivers/acpi/nfit/Kconfig
+++ b/drivers/acpi/nfit/Kconfig
@@ -2,7 +2,7 @@ config ACPI_NFIT
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV
- depends on ARCH_HAS_MMIO_FLUSH
+ depends on ARCH_HAS_PMEM_API
select LIBNVDIMM
help
Infrastructure to probe ACPI 6 compliant platforms for
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 1893e416e7c0..9c2c49b6a240 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -228,6 +228,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
if (cmd == ND_CMD_CALL) {
call_pkg = buf;
func = call_pkg->nd_command;
+
+ for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
+ if (call_pkg->nd_reserved2[i])
+ return -EINVAL;
}
if (nvdimm) {
@@ -1674,8 +1678,19 @@ static ssize_t range_index_show(struct device *dev,
}
static DEVICE_ATTR_RO(range_index);
+static ssize_t ecc_unit_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+ return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
+}
+static DEVICE_ATTR_RO(ecc_unit_size);
+
static struct attribute *acpi_nfit_region_attributes[] = {
&dev_attr_range_index.attr,
+ &dev_attr_ecc_unit_size.attr,
NULL,
};
@@ -1804,6 +1819,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
spa->range_index, i);
+ struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
if (!memdev || !nfit_mem->dcr) {
dev_err(dev, "%s: failed to find DCR\n", __func__);
@@ -1811,13 +1827,13 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
}
map->region_offset = memdev->region_offset;
- map->serial_number = nfit_mem->dcr->serial_number;
+ map->serial_number = dcr->serial_number;
map2->region_offset = memdev->region_offset;
- map2->serial_number = nfit_mem->dcr->serial_number;
- map2->vendor_id = nfit_mem->dcr->vendor_id;
- map2->manufacturing_date = nfit_mem->dcr->manufacturing_date;
- map2->manufacturing_location = nfit_mem->dcr->manufacturing_location;
+ map2->serial_number = dcr->serial_number;
+ map2->vendor_id = dcr->vendor_id;
+ map2->manufacturing_date = dcr->manufacturing_date;
+ map2->manufacturing_location = dcr->manufacturing_location;
}
/* v1.1 namespaces */
@@ -1835,6 +1851,28 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
cmp_map_compat, NULL);
nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+ /* record the result of the sort for the mapping position */
+ for (i = 0; i < nr; i++) {
+ struct nfit_set_info_map2 *map2 = &info2->mapping[i];
+ int j;
+
+ for (j = 0; j < nr; j++) {
+ struct nd_mapping_desc *mapping = &ndr_desc->mapping[j];
+ struct nvdimm *nvdimm = mapping->nvdimm;
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
+
+ if (map2->serial_number == dcr->serial_number &&
+ map2->vendor_id == dcr->vendor_id &&
+ map2->manufacturing_date == dcr->manufacturing_date &&
+ map2->manufacturing_location
+ == dcr->manufacturing_location) {
+ mapping->position = i;
+ break;
+ }
+ }
+ }
+
ndr_desc->nd_set = nd_set;
devm_kfree(dev, info);
devm_kfree(dev, info2);
@@ -1930,7 +1968,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
else {
if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
- mmio_flush_range((void __force *)
+ arch_invalidate_pmem((void __force *)
mmio->addr.aperture + offset, c);
memcpy(iobuf + copied, mmio->addr.aperture + offset, c);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 938eb4868f7f..3600ff786646 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -46,6 +46,8 @@ void dax_read_unlock(int id)
EXPORT_SYMBOL_GPL(dax_read_unlock);
#ifdef CONFIG_BLOCK
+#include <linux/blkdev.h>
+
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
{
@@ -59,6 +61,16 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
}
EXPORT_SYMBOL(bdev_dax_pgoff);
+#if IS_ENABLED(CONFIG_FS_DAX)
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
+{
+ if (!blk_queue_dax(bdev->bd_queue))
+ return NULL;
+ return fs_dax_get_by_host(bdev->bd_disk->disk_name);
+}
+EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
+#endif
+
/**
* __bdev_dax_supported() - Check if the device supports dax for filesystem
* @sb: The superblock of the device
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 60491641a8d6..d5612bd1cc81 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -31,6 +31,16 @@ enum log_ent_request {
LOG_OLD_ENT
};
+static struct device *to_dev(struct arena_info *arena)
+{
+ return &arena->nd_btt->dev;
+}
+
+static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
+{
+ return offset + nd_btt->initial_offset;
+}
+
static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
void *buf, size_t n, unsigned long flags)
{
@@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
- offset += arena->nd_btt->initial_offset;
+ offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_read_bytes(ndns, offset, buf, n, flags);
}
@@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
- offset += arena->nd_btt->initial_offset;
+ offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_write_bytes(ndns, offset, buf, n, flags);
}
@@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
* We rely on that to make sure rw_bytes does error clearing
* correctly, so make sure that is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
- WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512),
+ "arena->infooff: %#llx is unaligned\n", arena->infooff);
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
+ "arena->info2off: %#llx is unaligned\n", arena->info2off);
ret = arena_write_bytes(arena, arena->info2off, super,
sizeof(struct btt_sb), 0);
@@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
{
- WARN_ON(!super);
return arena_read_bytes(arena, arena->infooff, super,
sizeof(struct btt_sb), 0);
}
@@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
{
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
- WARN_ON(lba >= arena->external_nlba);
+ if (unlikely(lba >= arena->external_nlba))
+ dev_err_ratelimited(to_dev(arena),
+ "%s: lba %#x out of range (max: %#x)\n",
+ __func__, lba, arena->external_nlba);
return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
}
@@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* This 'mapping' is supposed to be just the LBA mapping, without
* any flags set, so strip the flag bits.
*/
- mapping &= MAP_LBA_MASK;
+ mapping = ent_lba(mapping);
ze = (z_flag << 1) + e_flag;
switch (ze) {
@@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* construed as a valid 'normal' case, but we decide not to,
* to avoid confusion
*/
- WARN_ONCE(1, "Invalid use of Z and E flags\n");
+ dev_err_ratelimited(to_dev(arena),
+ "Invalid use of Z and E flags\n");
return -EIO;
}
@@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
u32 raw_mapping, postmap, ze, z_flag, e_flag;
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
- WARN_ON(lba >= arena->external_nlba);
+ if (unlikely(lba >= arena->external_nlba))
+ dev_err_ratelimited(to_dev(arena),
+ "%s: lba %#x out of range (max: %#x)\n",
+ __func__, lba, arena->external_nlba);
ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
if (ret)
@@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
raw_mapping = le32_to_cpu(in);
- z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
- e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
+ z_flag = ent_z_flag(raw_mapping);
+ e_flag = ent_e_flag(raw_mapping);
ze = (z_flag << 1) + e_flag;
- postmap = raw_mapping & MAP_LBA_MASK;
+ postmap = ent_lba(raw_mapping);
/* Reuse the {z,e}_flag variables for *trim and *error */
z_flag = 0;
@@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
static int btt_log_read_pair(struct arena_info *arena, u32 lane,
struct log_entry *ent)
{
- WARN_ON(!ent);
return arena_read_bytes(arena,
arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
2 * LOG_ENT_SIZE, 0);
@@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent)
return old;
}
-static struct device *to_dev(struct arena_info *arena)
-{
- return &arena->nd_btt->dev;
-}
-
/*
* This function copies the desired (old/new) log entry into ent if
* it is not NULL. It returns the sub-slot number (0 or 1)
@@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
if (++(arena->freelist[lane].seq) == 4)
arena->freelist[lane].seq = 1;
- arena->freelist[lane].block = le32_to_cpu(ent->old_map);
+ if (ent_e_flag(ent->old_map))
+ arena->freelist[lane].has_err = 1;
+ arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
return ret;
}
@@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512),
+ "arena->mapoff: %#llx is unaligned\n", arena->mapoff);
while (mapsize) {
size_t size = min(mapsize, chunk_size);
- WARN_ON_ONCE(size < 512);
+ dev_WARN_ONCE(to_dev(arena), size < 512,
+ "chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
size, 0);
if (ret)
@@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
- WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+ dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512),
+ "arena->logoff: %#llx is unaligned\n", arena->logoff);
while (logsize) {
size_t size = min(logsize, chunk_size);
- WARN_ON_ONCE(size < 512);
+ dev_WARN_ONCE(to_dev(arena), size < 512,
+ "chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
size, 0);
if (ret)
@@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena)
return ret;
}
+static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
+{
+ return arena->dataoff + ((u64)lba * arena->internal_lbasize);
+}
+
+static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
+{
+ int ret = 0;
+
+ if (arena->freelist[lane].has_err) {
+ void *zero_page = page_address(ZERO_PAGE(0));
+ u32 lba = arena->freelist[lane].block;
+ u64 nsoff = to_namespace_offset(arena, lba);
+ unsigned long len = arena->sector_size;
+
+ mutex_lock(&arena->err_lock);
+
+ while (len) {
+ unsigned long chunk = min(len, PAGE_SIZE);
+
+ ret = arena_write_bytes(arena, nsoff, zero_page,
+ chunk, 0);
+ if (ret)
+ break;
+ len -= chunk;
+ nsoff += chunk;
+ if (len == 0)
+ arena->freelist[lane].has_err = 0;
+ }
+ mutex_unlock(&arena->err_lock);
+ }
+ return ret;
+}
+
static int btt_freelist_init(struct arena_info *arena)
{
int old, new, ret;
@@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena)
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
arena->freelist[i].block = le32_to_cpu(log_new.old_map);
+ /*
+ * FIXME: if error clearing fails during init, we want to make
+ * the BTT read-only
+ */
+ if (ent_e_flag(log_new.old_map)) {
+ ret = arena_clear_freelist_error(arena, i);
+ if (ret)
+ dev_err_ratelimited(to_dev(arena),
+ "Unable to clear known errors\n");
+ }
+
/* This implies a newly created or untouched flog entry */
if (log_new.old_map == log_new.new_map)
continue;
@@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena)
if (ret)
return ret;
}
-
}
return 0;
@@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
if (!arena)
return NULL;
arena->nd_btt = btt->nd_btt;
+ arena->sector_size = btt->sector_size;
if (!size)
return arena;
@@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
goto out;
@@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap)
spin_unlock(&arena->map_locks[idx].lock);
}
-static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
-{
- return arena->dataoff + ((u64)lba * arena->internal_lbasize);
-}
-
static int btt_data_read(struct arena_info *arena, struct page *page,
unsigned int off, u32 lba, u32 len)
{
@@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
while (1) {
u32 new_map;
+ int new_t, new_e;
if (t_flag) {
zero_fill_data(page, off, cur_len);
@@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
barrier();
- ret = btt_map_read(arena, premap, &new_map, &t_flag,
- &e_flag, NVDIMM_IO_ATOMIC);
+ ret = btt_map_read(arena, premap, &new_map, &new_t,
+ &new_e, NVDIMM_IO_ATOMIC);
if (ret)
goto out_rtt;
- if (postmap == new_map)
+ if ((postmap == new_map) && (t_flag == new_t) &&
+ (e_flag == new_e))
break;
postmap = new_map;
+ t_flag = new_t;
+ e_flag = new_e;
}
ret = btt_data_read(arena, page, off, postmap, cur_len);
- if (ret)
+ if (ret) {
+ int rc;
+
+ /* Media error - set the e_flag */
+ rc = btt_map_write(arena, premap, postmap, 0, 1,
+ NVDIMM_IO_ATOMIC);
goto out_rtt;
+ }
if (bip) {
ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
@@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
return ret;
}
+/*
+ * Normally, arena_{read,write}_bytes will take care of the initial offset
+ * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
+ * we need the final, raw namespace offset here
+ */
+static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
+ u32 postmap)
+{
+ u64 nsoff = adjust_initial_offset(arena->nd_btt,
+ to_namespace_offset(arena, postmap));
+ sector_t phys_sector = nsoff >> 9;
+
+ return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
+}
+
static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
sector_t sector, struct page *page, unsigned int off,
unsigned int len)
@@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
while (len) {
u32 cur_len;
+ int e_flag;
+ retry:
lane = nd_region_acquire_lane(btt->nd_region);
ret = lba_to_arena(btt, sector, &premap, &arena);
@@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
goto out_lane;
}
+ if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
+ arena->freelist[lane].has_err = 1;
+
+ if (mutex_is_locked(&arena->err_lock)
+ || arena->freelist[lane].has_err) {
+ nd_region_release_lane(btt->nd_region, lane);
+
+ ret = arena_clear_freelist_error(arena, lane);
+ if (ret)
+ return ret;
+
+ /* OK to acquire a different lane/free block */
+ goto retry;
+ }
+
new_postmap = arena->freelist[lane].block;
/* Wait if the new block is being read from */
@@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
}
lock_map(arena, premap);
- ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+ ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
@@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
ret = -EIO;
goto out_map;
}
+ if (e_flag)
+ set_e_flag(old_postmap);
log.lba = cpu_to_le32(premap);
log.old_map = cpu_to_le32(old_postmap);
@@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
if (ret)
goto out_map;
- ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
+ ret = btt_map_write(arena, premap, new_postmap, 0, 0,
+ NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
unlock_map(arena, premap);
nd_region_release_lane(btt->nd_region, lane);
+ if (e_flag) {
+ ret = arena_clear_freelist_error(arena, lane);
+ if (ret)
+ return ret;
+ }
+
len -= cur_len;
off += cur_len;
sector += btt->sector_size >> SECTOR_SHIFT;
@@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
- BUG_ON(len > PAGE_SIZE);
- /* Make sure len is in multiples of sector size. */
- /* XXX is this right? */
- BUG_ON(len < btt->sector_size);
- BUG_ON(len % btt->sector_size);
+ if (len > PAGE_SIZE || len < btt->sector_size ||
+ len % btt->sector_size) {
+ dev_err_ratelimited(&btt->nd_btt->dev,
+ "unaligned bio segment (len: %d)\n", len);
+ bio->bi_status = BLK_STS_IOERR;
+ break;
+ }
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector);
@@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
{
int ret;
struct btt *btt;
+ struct nd_namespace_io *nsio;
struct device *dev = &nd_btt->dev;
btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
@@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
INIT_LIST_HEAD(&btt->arena_list);
mutex_init(&btt->init_lock);
btt->nd_region = nd_region;
+ nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
+ btt->phys_bb = &nsio->bb;
ret = discover_arenas(btt);
if (ret) {
@@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
}
btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
+ if (!btt_sb)
+ return -ENOMEM;
/*
* If this returns < 0, that is ok as it just means there wasn't
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 888e862907a0..578c2057524d 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -15,6 +15,7 @@
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
+#include <linux/badblocks.h>
#include <linux/types.h>
#define BTT_SIG_LEN 16
@@ -38,6 +39,11 @@
#define IB_FLAG_ERROR 0x00000001
#define IB_FLAG_ERROR_MASK 0x00000001
+#define ent_lba(ent) (ent & MAP_LBA_MASK)
+#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
+#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
+#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
+
enum btt_init_state {
INIT_UNCHECKED = 0,
INIT_NOTFOUND,
@@ -78,6 +84,7 @@ struct free_entry {
u32 block;
u8 sub;
u8 seq;
+ u8 has_err;
};
struct aligned_lock {
@@ -104,6 +111,7 @@ struct aligned_lock {
* handle incoming writes.
* @version_major: Metadata layout version major.
* @version_minor: Metadata layout version minor.
+ * @sector_size: The Linux sector size - 512 or 4096
* @nextoff: Offset in bytes to the start of the next arena.
* @infooff: Offset in bytes to the info block of this arena.
* @dataoff: Offset in bytes to the data area of this arena.
@@ -131,6 +139,7 @@ struct arena_info {
u32 nfree;
u16 version_major;
u16 version_minor;
+ u32 sector_size;
/* Byte offsets to the different on-media structures */
u64 nextoff;
u64 infooff;
@@ -147,6 +156,7 @@ struct arena_info {
struct dentry *debugfs_dir;
/* Arena flags */
u32 flags;
+ struct mutex err_lock;
};
/**
@@ -181,6 +191,7 @@ struct btt {
struct mutex init_lock;
int init_state;
int num_arenas;
+ struct badblocks *phys_bb;
};
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 3e359d282f8e..d58925295aa7 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -61,7 +61,7 @@ static ssize_t sector_size_show(struct device *dev,
{
struct nd_btt *nd_btt = to_nd_btt(dev);
- return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+ return nd_size_select_show(nd_btt->lbasize, btt_lbasize_supported, buf);
}
static ssize_t sector_size_store(struct device *dev,
@@ -72,7 +72,7 @@ static ssize_t sector_size_store(struct device *dev,
device_lock(dev);
nvdimm_bus_lock(dev);
- rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+ rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 937fafa1886a..baf283986a7e 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,6 +11,7 @@
* General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/sched/mm.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/module.h>
@@ -234,6 +235,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
struct nd_cmd_clear_error clear_err;
struct nd_cmd_ars_cap ars_cap;
u32 clear_err_unit, mask;
+ unsigned int noio_flag;
int cmd_rc, rc;
if (!nvdimm_bus)
@@ -250,8 +252,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&ars_cap, 0, sizeof(ars_cap));
ars_cap.address = phys;
ars_cap.length = len;
+ noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
sizeof(ars_cap), &cmd_rc);
+ memalloc_noio_restore(noio_flag);
if (rc < 0)
return rc;
if (cmd_rc < 0)
@@ -266,8 +270,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
memset(&clear_err, 0, sizeof(clear_err));
clear_err.address = phys;
clear_err.length = len;
+ noio_flag = memalloc_noio_save();
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
sizeof(clear_err), &cmd_rc);