diff options
| -rw-r--r-- | drivers/acpi/nfit/core.c | 210 | ||||
| -rw-r--r-- | drivers/acpi/nfit/mce.c | 24 | ||||
| -rw-r--r-- | drivers/acpi/nfit/nfit.h | 17 | ||||
| -rw-r--r-- | drivers/dax/Kconfig | 5 | ||||
| -rw-r--r-- | drivers/dax/dax.c | 577 | ||||
| -rw-r--r-- | drivers/dax/dax.h | 5 | ||||
| -rw-r--r-- | drivers/dax/pmem.c | 7 | ||||
| -rw-r--r-- | drivers/nvdimm/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/nvdimm/bus.c | 2 | ||||
| -rw-r--r-- | drivers/nvdimm/core.c | 73 | ||||
| -rw-r--r-- | drivers/nvdimm/dimm.c | 11 | ||||
| -rw-r--r-- | drivers/nvdimm/dimm_devs.c | 226 | ||||
| -rw-r--r-- | drivers/nvdimm/label.c | 192 | ||||
| -rw-r--r-- | drivers/nvdimm/namespace_devs.c | 792 | ||||
| -rw-r--r-- | drivers/nvdimm/nd-core.h | 24 | ||||
| -rw-r--r-- | drivers/nvdimm/nd.h | 29 | ||||
| -rw-r--r-- | drivers/nvdimm/pmem.c | 28 | ||||
| -rw-r--r-- | drivers/nvdimm/region_devs.c | 75 | ||||
| -rw-r--r-- | fs/char_dev.c | 1 | ||||
| -rw-r--r-- | include/linux/libnvdimm.h | 28 | ||||
| -rw-r--r-- | include/linux/nd.h | 8 | ||||
| -rw-r--r-- | include/uapi/linux/magic.h | 1 | ||||
| -rw-r--r-- | include/uapi/linux/ndctl.h | 30 | ||||
| -rw-r--r-- | tools/testing/nvdimm/Kbuild | 1 | ||||
| -rw-r--r-- | tools/testing/nvdimm/test/iomap.c | 151 | ||||
| -rw-r--r-- | tools/testing/nvdimm/test/nfit.c | 160 | ||||
| -rw-r--r-- | tools/testing/nvdimm/test/nfit_test.h | 12 |
27 files changed, 1940 insertions, 751 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index e1d5ea6d5e40..71a7d07c28c9 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -886,6 +886,58 @@ static ssize_t revision_show(struct device *dev, } static DEVICE_ATTR_RO(revision); +static ssize_t hw_error_scrub_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + return sprintf(buf, "%d\n", acpi_desc->scrub_mode); +} + +/* + * The 'hw_error_scrub' attribute can have the following values written to it: + * '0': Switch to the default mode where an exception will only insert + * the address of the memory error into the poison and badblocks lists. + * '1': Enable a full scrub to happen if an exception for a memory error is + * received. + */ +static ssize_t hw_error_scrub_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct nvdimm_bus_descriptor *nd_desc; + ssize_t rc; + long val; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + device_lock(dev); + nd_desc = dev_get_drvdata(dev); + if (nd_desc) { + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + switch (val) { + case HW_ERROR_SCRUB_ON: + acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON; + break; + case HW_ERROR_SCRUB_OFF: + acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF; + break; + default: + rc = -EINVAL; + break; + } + } + device_unlock(dev); + if (rc) + return rc; + return size; +} +static DEVICE_ATTR_RW(hw_error_scrub); + /* * This shows the number of full Address Range Scrubs that have been * completed since driver load time. Userspace can wait on this using @@ -958,6 +1010,7 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) static struct attribute *acpi_nfit_attributes[] = { &dev_attr_revision.attr, &dev_attr_scrub.attr, + &dev_attr_hw_error_scrub.attr, NULL, }; @@ -1256,6 +1309,44 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc, return NULL; } +void __acpi_nvdimm_notify(struct device *dev, u32 event) +{ + struct nfit_mem *nfit_mem; + struct acpi_nfit_desc *acpi_desc; + + dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__, + event); + + if (event != NFIT_NOTIFY_DIMM_HEALTH) { + dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev), + event); + return; + } + + acpi_desc = dev_get_drvdata(dev->parent); + if (!acpi_desc) + return; + + /* + * If we successfully retrieved acpi_desc, then we know nfit_mem data + * is still valid. + */ + nfit_mem = dev_get_drvdata(dev); + if (nfit_mem && nfit_mem->flags_attr) + sysfs_notify_dirent(nfit_mem->flags_attr); +} +EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify); + +static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) +{ + struct acpi_device *adev = data; + struct device *dev = &adev->dev; + + device_lock(dev->parent); + __acpi_nvdimm_notify(dev, event); + device_unlock(dev->parent); +} + static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, u32 device_handle) { @@ -1280,6 +1371,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return force_enable_dimms ? 0 : -ENODEV; } + if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle, + ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) { + dev_err(dev, "%s: notification registration failed\n", + dev_name(&adev_dimm->dev)); + return -ENXIO; + } + /* * Until standardization materializes we need to consider 4 * different command sets. Note, that checking for function0 (bit0) @@ -1318,18 +1416,41 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } +static void shutdown_dimm_notify(void *data) +{ + struct acpi_nfit_desc *acpi_desc = data; + struct nfit_mem *nfit_mem; + + mutex_lock(&acpi_desc->init_mutex); + /* + * Clear out the nfit_mem->flags_attr and shut down dimm event + * notifications. + */ + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct acpi_device *adev_dimm = nfit_mem->adev; + + if (nfit_mem->flags_attr) { + sysfs_put(nfit_mem->flags_attr); + nfit_mem->flags_attr = NULL; + } + if (adev_dimm) + acpi_remove_notify_handler(adev_dimm->handle, + ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + } + mutex_unlock(&acpi_desc->init_mutex); +} + static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) { struct nfit_mem *nfit_mem; - int dimm_count = 0; + int dimm_count = 0, rc; + struct nvdimm *nvdimm; list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { struct acpi_nfit_flush_address *flush; unsigned long flags = 0, cmd_mask; - struct nvdimm *nvdimm; u32 device_handle; u16 mem_flags; - int rc; device_handle = __to_nfit_memdev(nfit_mem)->device_handle; nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle); @@ -1382,7 +1503,30 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) } - return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); + rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); + if (rc) + return rc; + + /* + * Now that dimms are successfully registered, and async registration + * is flushed, attempt to enable event notification. + */ + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct kernfs_node *nfit_kernfs; + + nvdimm = nfit_mem->nvdimm; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); + if (nfit_kernfs) + nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, + "flags"); + sysfs_put(nfit_kernfs); + if (!nfit_mem->flags_attr) + dev_warn(acpi_desc->dev, "%s: notifications disabled\n", + nvdimm_name(nvdimm)); + } + + return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify, + acpi_desc); } static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) @@ -1491,9 +1635,9 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, if (!info) return -ENOMEM; for (i = 0; i < nr; i++) { - struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; struct nfit_set_info_map *map = &info->mapping[i]; - struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nvdimm *nvdimm = mapping->nvdimm; struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, spa->range_index, i); @@ -1917,7 +2061,7 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, } static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, - struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, + struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, struct nfit_spa *nfit_spa) { @@ -1934,12 +2078,12 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, return -ENODEV; } - nd_mapping->nvdimm = nvdimm; + mapping->nvdimm = nvdimm; switch (nfit_spa_type(spa)) { case NFIT_SPA_PM: case NFIT_SPA_VOLATILE: - nd_mapping->start = memdev->address; - nd_mapping->size = memdev->region_size; + mapping->start = memdev->address; + mapping->size = memdev->region_size; break; case NFIT_SPA_DCR: nfit_mem = nvdimm_provider_data(nvdimm); @@ -1947,13 +2091,13 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", spa->range_index, nvdimm_name(nvdimm)); } else { - nd_mapping->size = nfit_mem->bdw->capacity; - nd_mapping->start = nfit_mem->bdw->start_address; + mapping->size = nfit_mem->bdw->capacity; + mapping->start = nfit_mem->bdw->start_address; ndr_desc->num_lanes = nfit_mem->bdw->windows; blk_valid = 1; } - ndr_desc->nd_mapping = nd_mapping; + ndr_desc->mapping = mapping; ndr_desc->num_mappings = blk_valid; ndbr_desc = to_blk_region_desc(ndr_desc); ndbr_desc->enable = acpi_nfit_blk_region_enable; @@ -1979,7 +2123,7 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { - static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS]; + static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS]; struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc ndbr_desc; struct nd_region_desc *ndr_desc; @@ -1998,7 +2142,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } memset(&res, 0, sizeof(res)); - memset(&nd_mappings, 0, sizeof(nd_mappings)); + memset(&mappings, 0, sizeof(mappings)); memset(&ndbr_desc, 0, sizeof(ndbr_desc)); res.start = spa->address; res.end = res.start + spa->length - 1; @@ -2014,7 +2158,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; - struct nd_mapping *nd_mapping; + struct nd_mapping_desc *mapping; if (memdev->range_index != spa->range_index) continue; @@ -2023,14 +2167,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, spa->range_index, ND_MAX_MAPPINGS); return -ENXIO; } - nd_mapping = &nd_mappings[count++]; - rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, + mapping = &mappings[count++]; + rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc, memdev, nfit_spa); if (rc) goto out; } - ndr_desc->nd_mapping = nd_mappings; + ndr_desc->mapping = mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) @@ -2678,29 +2822,30 @@ static int acpi_nfit_remove(struct acpi_device *adev) return 0; } -static void acpi_nfit_notify(struct acpi_device *adev, u32 event) +void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) { - struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; - struct device *dev = &adev->dev; union acpi_object *obj; acpi_status status; int ret; dev_dbg(dev, "%s: event: %d\n", __func__, event); - device_lock(dev); + if (event != NFIT_NOTIFY_UPDATE) + return; + if (!dev->driver) { /* dev->driver may be null if we're being removed */ dev_dbg(dev, "%s: no driver found for dev\n", __func__); - goto out_unlock; + return; } if (!acpi_desc) { acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); if (!acpi_desc) - goto out_unlock; - acpi_nfit_desc_init(acpi_desc, &adev->dev); + return; + acpi_nfit_desc_init(acpi_desc, dev); } else { /* * Finish previous registration before considering new @@ -2710,10 +2855,10 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } /* Evaluate _FIT */ - status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); + status = acpi_evaluate_object(handle, "_FIT", NULL, &buf); if (ACPI_FAILURE(status)) { dev_err(dev, "failed to evaluate _FIT\n"); - goto out_unlock; + return; } obj = buf.pointer; @@ -2725,9 +2870,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } else dev_err(dev, "Invalid _FIT\n"); kfree(buf.pointer); +} +EXPORT_SYMBOL_GPL(__acpi_nfit_notify); - out_unlock: - device_unlock(dev); +static void acpi_nfit_notify(struct acpi_device *adev, u32 event) +{ + device_lock(&adev->dev); + __acpi_nfit_notify(&adev->dev, adev->handle, event); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index 161f91539ae6..e5ce81c38eed 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -14,6 +14,7 @@ */ #include <linux/notifier.h> #include <linux/acpi.h> +#include <linux/nd.h> #include <asm/mce.h> #include "nfit.h" @@ -62,12 +63,25 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, } mutex_unlock(&acpi_desc->init_mutex); - /* - * We can ignore an -EBUSY here because if an ARS is already - * in progress, just let that be the last authoritative one - */ - if (found_match) + if (!found_match) + continue; + + /* If this fails due to an -ENOMEM, there is little we can do */ + nvdimm_bus_add_poison(acpi_desc->nvdimm_bus, + ALIGN(mce->addr, L1_CACHE_BYTES), + L1_CACHE_BYTES); + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + + if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { + /* + * We can ignore an -EBUSY here because if an ARS is + * already in progress, just let that be the last + * authoritative one + */ acpi_nfit_ars_rescan(acpi_desc); + } + break; } mutex_unlock(&acpi_desc_lock); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index e894ded24d99..14296f5267c8 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -78,6 +78,14 @@ enum { NFIT_ARS_TIMEOUT = 90, }; +enum nfit_root_notifiers { + NFIT_NOTIFY_UPDATE = 0x80, +}; + +enum nfit_dimm_notifiers { + NFIT_NOTIFY_DIMM_HEALTH = 0x81, +}; + struct nfit_spa { struct list_head list; struct nd_region *nd_region; @@ -124,6 +132,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct kernfs_node *flags_attr; struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; @@ -152,6 +161,7 @@ struct acpi_nfit_desc { struct list_head list; struct kernfs_node *scrub_count_state; unsigned int scrub_count; + unsigned int scrub_mode; unsigned int cancel:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; @@ -159,6 +169,11 @@ struct acpi_nfit_desc { void *iobuf, u64 len, int rw); }; +enum scrub_mode { + HW_ERROR_SCRUB_OFF, + HW_ERROR_SCRUB_ON, +}; + enum nd_blk_mmio_selector { BDW, DCR, @@ -223,5 +238,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); +void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); +void __acpi_nvdimm_notify(struct device *dev, u32 event); void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index cedab7572de3..daadd20aa936 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -23,4 +23,9 @@ config DEV_DAX_PMEM Say Y if unsure +config NR_DEV_DAX + int "Maximum number of Device-DAX instances" + default 32768 + range 256 2147483647 + endif diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 29f600f2c447..0e499bfca41c 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -13,15 +13,25 @@ #include <linux/pagemap.h> #include <linux/module.h> #include <linux/device.h> +#include <linux/mount.h> #include <linux/pfn_t.h> +#include <linux/hash.h> +#include <linux/cdev.h> #include <linux/slab.h> #include <linux/dax.h> #include <linux/fs.h> #include <linux/mm.h> +#include "dax.h" -static int dax_major; +static dev_t dax_devt; static struct class *dax_class; static DEFINE_IDA(dax_minor_ida); +static int nr_dax = CONFIG_NR_DEV_DAX; +module_param(nr_dax, int, S_IRUGO); +static struct vfsmount *dax_mnt; +static struct kmem_cache *dax_cache __read_mostly; +static struct super_block *dax_superblock __read_mostly; +MODULE_PARM_DESC(nr_dax, "max number of device-dax instances"); /** * struct dax_region - mapping infrastructure for dax devices @@ -48,7 +58,7 @@ struct dax_region { * struct dax_dev - subdivision of a dax region * @region - parent region * @dev - device backing the character device - * @kref - enable this data to be tracked in filp->private_data + * @cdev - core chardev data * @alive - !alive + rcu grace period == no new mappings can be established * @id - child id in the region * @num_resources - number of physical address extents in this device @@ -56,41 +66,139 @@ struct dax_region { */ struct dax_dev { struct dax_region *region; - struct device *dev; - struct kref kref; + struct inode *inode; + struct device dev; + struct cdev cdev; bool alive; int id; int num_resources; struct resource res[0]; }; -static void dax_region_free(struct kref *kref) +static struct inode *dax_alloc_inode(struct super_block *sb) { - struct dax_region *dax_region; + return kmem_cache_alloc(dax_cache, GFP_KERNEL); +} - dax_region = container_of(kref, struct dax_region, kref); - kfree(dax_region); +static void dax_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(dax_cache, inode); } -void dax_region_put(struct dax_region *dax_region) +static void dax_destroy_inode(struct inode *inode) { - kref_put(&dax_region->kref, dax_region_free); + call_rcu(&inode->i_rcu, dax_i_callback); } -EXPORT_SYMBOL_GPL(dax_region_put); -static void dax_dev_free(struct kref *kref) +static const struct super_operations dax_sops = { + .statfs = simple_statfs, + .alloc_inode = dax_alloc_inode, + .destroy_inode = dax_destroy_inode, + .drop_inode = generic_delete_inode, +}; + +static struct dentry *dax_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - struct dax_dev *dax_dev; + return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); +} - dax_dev = container_of(kref, struct dax_dev, kref); - dax_region_put(dax_dev->region); - kfree(dax_dev); +static struct file_system_type dax_type = { + .name = "dax", + .mount = dax_mount, + .kill_sb = kill_anon_super, +}; + +static int dax_test(struct inode *inode, void *data) +{ + return inode->i_cdev == data; +} + +static int dax_set(struct inode *inode, void *data) +{ + inode->i_cdev = data; + return 0; +} + +static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt) +{ + struct inode *inode; + + inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), + dax_test, dax_set, cdev); + + if (!inode) + return NULL; + + if (inode->i_state & I_NEW) { + inode->i_mode = S_IFCHR; + inode->i_flags = S_DAX; + inode->i_rdev = devt; + mapping_set_gfp_mask(&inode->i_data, GFP_USER); + unlock_new_inode(inode); + } + return inode; +} + +static void init_once(void *inode) +{ + inode_init_once(inode); +} + +static int dax_inode_init(void) +{ + int rc; + + dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0, + (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD|SLAB_ACCOUNT), + init_once); + if (!dax_cache) + return -ENOMEM; + + rc = register_filesystem(&dax_type); + if (rc) + goto err_register_fs; + + dax_mnt = kern_mount(&dax_type); + if (IS_ERR(dax_mnt)) { + rc = PTR_ERR(dax_mnt); + goto err_mount; + } + dax_superblock = dax_mnt->mnt_sb; + + return 0; + + err_mount: + unregister_filesystem(&dax_type); + err_register_fs: + kmem_cache_destroy(dax_cache); + + return rc; } -static void dax_dev_put(struct dax_dev *dax_dev) +static void dax_inode_exit(void) +{ + kern_unmount(dax_mnt); + unregister_filesystem(&dax_type); + kmem_cache_destroy(dax_cache); +} + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) { - kref_put(&dax_dev->kref, dax_dev_free); + kref_put(&dax_region->kref, dax_region_free); } +EXPORT_SYMBOL_GPL(dax_region_put); struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct resource *res, unsigned int align, void *addr, @@ -98,8 +206,11 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, { struct dax_region *dax_region; - dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + if (!IS_ALIGNED(res->start, align) + || !IS_ALIGNED(resource_size(res), align)) + return NULL; + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); if (!dax_region) return NULL; @@ -116,10 +227,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, } EXPORT_SYMBOL_GPL(alloc_dax_region); +static struct dax_dev *to_dax_dev(struct device *dev) +{ + return container_of(dev, struct dax_dev, dev); +} + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dax_dev *dax_dev = dev_get_drvdata(dev); + struct dax_dev *dax_dev = to_dax_dev(dev); unsigned long long size = 0; int i; @@ -144,180 +260,11 @@ static const struct attribute_group *dax_attribute_groups[] = { NULL, }; -static void unregister_dax_dev(void *_dev) -{ - struct device *dev = _dev; - struct dax_dev *dax_dev = dev_get_drvdata(dev); - struct dax_region *dax_region = dax_dev->region; - - dev_dbg(dev, "%s\n", __func__); - - /* - * Note, rcu is not protecting the liveness of dax_dev, rcu is - * ensuring that any fault handlers that might have seen - * dax_dev->alive == true, have completed. Any fault handlers - * that start after synchronize_rcu() has started will abort - * upon seeing dax_dev->alive == false. - */ - dax_dev->alive = false; - synchronize_rcu(); - - get_device(dev); - device_unregister(dev); - ida_simple_remove(&dax_region->ida, dax_dev->id); - ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); - put_device(dev); - dax_dev_put(dax_dev); -} - -int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, - int count) -{ - struct device *parent = dax_region->dev; - struct dax_dev *dax_dev; - struct device *dev; - int rc, minor; - dev_t dev_t; - - dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); - if (!dax_dev) - return -ENOMEM; - memcpy(dax_dev->res, res, sizeof(*res) * count); - dax_dev->num_resources = count; - kref_init(&dax_dev->kref); - dax_dev->alive = true; - dax_dev->region = dax_region; - kref_get(&dax_region->kref); - - dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); - if (dax_dev->id < 0) { - rc = dax_dev->id; - goto err_id; - } - - minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); - if (minor < 0) { - rc = minor; - goto err_minor; - } - - dev_t = MKDEV(dax_major, minor); - dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev, - dax_attribute_groups, "dax%d.%d", dax_region->id, - dax_dev->id); - if (IS_ERR(dev)) { - rc = PTR_ERR(dev); - goto err_create; - } - dax_dev->dev = dev; - - rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev); - if (rc) - return rc; - - return 0; - - err_create: - ida_simple_remove(&dax_minor_ida, minor); - err_minor: - ida_simple_remove(&dax_region->ida, dax_dev->id); - err_id: - dax_dev_put(dax_dev); - - return rc; -} -EXPORT_SYMBOL_GPL(devm_create_dax_dev); - -/* return an unmapped area aligned to the dax region specified alignment */ -static unsigned long dax_dev_get_unmapped_area(struct file *filp, - unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - unsigned long off, off_end, off_align, len_align, addr_align, align; - struct dax_dev *dax_dev = filp ? filp->private_data : NULL; - struct dax_region *dax_region; - - if (!dax_dev || addr) - goto out; - - dax_region = dax_dev->region; - align = dax_region->align; - off = pgoff << PAGE_SHIFT; - off_end = off + len; - off_align = round_up(off, align); - - if ((off_end <= off_align) || ((off_end - off_align) < align)) - goto out; - - len_align = len + align; - if ((off + len_align) < off) - goto out; - - addr_align = current->mm->get_unmapped_area(filp, addr, len_align, - pgoff, flags); - if (!IS_ERR_VALUE(addr_align)) { - addr_align += (off - addr_align) & (align - 1); - return addr_align; - } - out: - return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); -} - -static int __match_devt(struct device *dev, const void *data) -{ - const dev_t *devt = data; - - return dev->devt == *devt; -} - -static struct device *dax_dev_find(dev_t dev_t) -{ - return class_find_device(dax_class, NULL, &dev_t, __match_devt); -} - -static int dax_dev_open(struct inode *inode, struct file *filp) -{ - struct dax_dev *dax_dev = NULL; - struct device *dev; - - dev = dax_dev_find(inode->i_rdev); - if (!dev) - return -ENXIO; - - device_lock(dev); - dax_dev = dev_get_drvdata(dev); - if (dax_dev) { - dev_dbg(dev, "%s\n", __func__); - filp->private_data = dax_dev; - kref_get(&dax_dev->kref); - inode->i_flags = S_DAX; - } - device_unlock(dev); - - if (!dax_dev) { - put_device(dev); - return -ENXIO; - } - return 0; -} - -static int dax_dev_release(struct inode *inode, struct file *filp) -{ - struct dax_dev *dax_dev = filp->private_data; - struct device *dev = dax_dev->dev; - - dev_dbg(dax_dev->dev, "%s\n", __func__); - dax_dev_put(dax_dev); - put_device(dev); - - return 0; -} - static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, const char *func) { struct dax_region *dax_region = dax_dev->region; - struct device *dev = dax_dev->dev; + struct device *dev = &dax_dev->dev; unsigned long mask; if (!dax_dev->alive) @@ -382,7 +329,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, struct vm_fault *vmf) { unsigned long vaddr = (unsigned long) vmf->virtual_address; - struct device *dev = dax_dev->dev; + struct device *dev = &dax_dev->dev; struct dax_region *dax_region; int rc = VM_FAULT_SIGBUS; phys_addr_t phys; @@ -422,7 +369,7 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct file *filp = vma->vm_file; struct dax_dev *dax_dev = filp->private_data; - dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, + dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, current->comm, (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", vma->vm_start, vma->vm_end); rcu_read_lock(); @@ -437,7 +384,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, unsigned int flags) { unsigned long pmd_addr = addr & PMD_MASK; - struct device *dev = dax_dev->dev; + struct device *dev = &dax_dev->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; @@ -479,7 +426,7 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, struct file *filp = vma->vm_file; struct dax_dev *dax_dev = filp->private_data; - dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, + dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, current->comm, (flags & FAULT_FLAG_WRITE) ? "write" : "read", vma->vm_start, vma->vm_end); @@ -490,81 +437,2 |
