summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/obsolete/sysfs-class-dax22
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c1
-rw-r--r--drivers/acpi/nfit/core.c8
-rw-r--r--drivers/acpi/numa.c1
-rw-r--r--drivers/base/memory.c1
-rw-r--r--drivers/dax/Kconfig28
-rw-r--r--drivers/dax/Makefile6
-rw-r--r--drivers/dax/bus.c503
-rw-r--r--drivers/dax/bus.h61
-rw-r--r--drivers/dax/dax-private.h34
-rw-r--r--drivers/dax/dax.h18
-rw-r--r--drivers/dax/device-dax.h25
-rw-r--r--drivers/dax/device.c363
-rw-r--r--drivers/dax/kmem.c108
-rw-r--r--drivers/dax/pmem.c153
-rw-r--r--drivers/dax/pmem/Makefile7
-rw-r--r--drivers/dax/pmem/compat.c73
-rw-r--r--drivers/dax/pmem/core.c71
-rw-r--r--drivers/dax/pmem/pmem.c40
-rw-r--r--drivers/dax/super.c41
-rw-r--r--drivers/nvdimm/e820.c1
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/of_pmem.c1
-rw-r--r--drivers/nvdimm/region_devs.c1
-rw-r--r--include/linux/acpi.h5
-rw-r--r--include/linux/libnvdimm.h1
-rw-r--r--kernel/resource.c18
-rw-r--r--mm/memory_hotplug.c32
-rw-r--r--tools/testing/nvdimm/Kbuild7
-rw-r--r--tools/testing/nvdimm/dax-dev.c16
30 files changed, 1111 insertions, 537 deletions
diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax
new file mode 100644
index 000000000000..2cb9fc5e8bd1
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-class-dax
@@ -0,0 +1,22 @@
+What: /sys/class/dax/
+Date: May, 2016
+KernelVersion: v4.7
+Contact: linux-nvdimm@lists.01.org
+Description: Device DAX is the device-centric analogue of Filesystem
+ DAX (CONFIG_FS_DAX). It allows memory ranges to be
+ allocated and mapped without need of an intervening file
+ system. Device DAX is strict, precise and predictable.
+ Specifically this interface:
+
+ 1/ Guarantees fault granularity with respect to a given
+ page size (pte, pmd, or pud) set at configuration time.
+
+ 2/ Enforces deterministic behavior by being strict about
+ what fault scenarios are supported.
+
+ The /sys/class/dax/ interface enumerates all the
+ device-dax instances in the system. The ABI is
+ deprecated and will be removed after 2020. It is
+ replaced with the DAX bus interface /sys/bus/dax/ where
+ device-dax instances can be found under
+ /sys/bus/dax/devices/
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index bba281b1fe1b..96c53b23e58f 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -239,6 +239,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.attr_groups = region_attr_groups;
ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+ ndr_desc.target_node = ndr_desc.numa_node;
ndr_desc.res = &p->res;
ndr_desc.of_node = p->dn;
ndr_desc.provider_data = p;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index df8979008dd4..5a389a4f4f65 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2956,11 +2956,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc->res = &res;
ndr_desc->provider_data = nfit_spa;
ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
- if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+ if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
ndr_desc->numa_node = acpi_map_pxm_to_online_node(
spa->proximity_domain);
- else
+ ndr_desc->target_node = acpi_map_pxm_to_node(
+ spa->proximity_domain);
+ } else {
ndr_desc->numa_node = NUMA_NO_NODE;
+ ndr_desc->target_node = NUMA_NO_NODE;
+ }
/*
* Persistence domain bits are hierarchical, if
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 7bbbf8256a41..867f6e3f2b4f 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm)
return node;
}
+EXPORT_SYMBOL(acpi_map_pxm_to_node);
/**
* acpi_map_pxm_to_online_node - Map proximity ID to online node
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 048cbf7d5233..cb8347500ce2 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -88,6 +88,7 @@ unsigned long __weak memory_block_size_bytes(void)
{
return MIN_MEMORY_BLOCK_SIZE;
}
+EXPORT_SYMBOL_GPL(memory_block_size_bytes);
static unsigned long get_memory_block_size(void)
{
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index e0700bf4893a..5ef624fe3934 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -23,12 +23,38 @@ config DEV_DAX
config DEV_DAX_PMEM
tristate "PMEM DAX: direct access to persistent memory"
depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
+ depends on m # until we can kill DEV_DAX_PMEM_COMPAT
default DEV_DAX
help
Support raw access to persistent memory. Note that this
driver consumes memory ranges allocated and exported by the
libnvdimm sub-system.
- Say Y if unsure
+ Say M if unsure
+
+config DEV_DAX_KMEM
+ tristate "KMEM DAX: volatile-use of persistent memory"
+ default DEV_DAX
+ depends on DEV_DAX
+ depends on MEMORY_HOTPLUG # for add_memory() and friends
+ help
+ Support access to persistent memory as if it were RAM. This
+ allows easier use of persistent memory by unmodified
+ applications.
+
+ To use this feature, a DAX device must be unbound from the
+ device_dax driver (PMEM DAX) and bound to this kmem driver
+ on each boot.
+
+ Say N if unsure.
+
+config DEV_DAX_PMEM_COMPAT
+ tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
+ depends on DEV_DAX_PMEM
+ default DEV_DAX_PMEM
+ help
+ Older versions of the libdaxctl library expect to find all
+ device-dax instances under /sys/class/dax. If libdaxctl in
+ your distribution is older than v58 say M, otherwise say N.
endif
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 574286fac87c..81f7d54dadfb 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
dax-y := super.o
-dax_pmem-y := pmem.o
+dax-y += bus.o
device_dax-y := device.o
+
+obj-y += pmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
new file mode 100644
index 000000000000..2109cfe80219
--- /dev/null
+++ b/drivers/dax/bus.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/dax.h>
+#include "dax-private.h"
+#include "bus.h"
+
+static struct class *dax_class;
+
+static DEFINE_MUTEX(dax_bus_lock);
+
+#define DAX_NAME_LEN 30
+struct dax_id {
+ struct list_head list;
+ char dev_name[DAX_NAME_LEN];
+};
+
+static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ /*
+ * We only ever expect to handle device-dax instances, i.e. the
+ * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
+ */
+ return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
+}
+
+static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
+{
+ return container_of(drv, struct dax_device_driver, drv);
+}
+
+static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
+ const char *dev_name)
+{
+ struct dax_id *dax_id;
+
+ lockdep_assert_held(&dax_bus_lock);
+
+ list_for_each_entry(dax_id, &dax_drv->ids, list)
+ if (sysfs_streq(dax_id->dev_name, dev_name))
+ return dax_id;
+ return NULL;
+}
+
+static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
+{
+ int match;
+
+ mutex_lock(&dax_bus_lock);
+ match = !!__dax_match_id(dax_drv, dev_name(dev));
+ mutex_unlock(&dax_bus_lock);
+
+ return match;
+}
+
+enum id_action {
+ ID_REMOVE,
+ ID_ADD,
+};
+
+static ssize_t do_id_store(struct device_driver *drv, const char *buf,
+ size_t count, enum id_action action)
+{
+ struct dax_device_driver *dax_drv = to_dax_drv(drv);
+ unsigned int region_id, id;
+ char devname[DAX_NAME_LEN];
+ struct dax_id *dax_id;
+ ssize_t rc = count;
+ int fields;
+
+ fields = sscanf(buf, "dax%d.%d", &region_id, &id);
+ if (fields != 2)
+ return -EINVAL;
+ sprintf(devname, "dax%d.%d", region_id, id);
+ if (!sysfs_streq(buf, devname))
+ return -EINVAL;
+
+ mutex_lock(&dax_bus_lock);
+ dax_id = __dax_match_id(dax_drv, buf);
+ if (!dax_id) {
+ if (action == ID_ADD) {
+ dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
+ if (dax_id) {
+ strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
+ list_add(&dax_id->list, &dax_drv->ids);
+ } else
+ rc = -ENOMEM;
+ } else
+ /* nothing to remove */;
+ } else if (action == ID_REMOVE) {
+ list_del(&dax_id->list);
+ kfree(dax_id);
+ } else
+ /* dax_id already added */;
+ mutex_unlock(&dax_bus_lock);
+
+ if (rc < 0)
+ return rc;
+ if (action == ID_ADD)
+ rc = driver_attach(drv);
+ if (rc)
+ return rc;
+ return count;
+}
+
+static ssize_t new_id_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ return do_id_store(drv, buf, count, ID_ADD);
+}
+static DRIVER_ATTR_WO(new_id);
+
+static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ return do_id_store(drv, buf, count, ID_REMOVE);
+}
+static DRIVER_ATTR_WO(remove_id);
+
+static struct attribute *dax_drv_attrs[] = {
+ &driver_attr_new_id.attr,
+ &driver_attr_remove_id.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(dax_drv);
+
+static int dax_bus_match(struct device *dev, struct device_driver *drv);
+
+static struct bus_type dax_bus_type = {
+ .name = "dax",
+ .uevent = dax_bus_uevent,
+ .match = dax_bus_match,
+ .drv_groups = dax_drv_groups,
+};
+
+static int dax_bus_match(struct device *dev, struct device_driver *drv)
+{
+ struct dax_device_driver *dax_drv = to_dax_drv(drv);
+
+ /*
+ * All but the 'device-dax' driver, which has 'match_always'
+ * set, requires an exact id match.
+ */
+ if (dax_drv->match_always)
+ return 1;
+
+ return dax_match_id(dax_drv, dev);
+}
+
+/*
+ * Rely on the fact that drvdata is set before the attributes are
+ * registered, and that the attributes are unregistered before drvdata
+ * is cleared to assume that drvdata is always valid.
+ */
+static ssize_t id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dax_region *dax_region = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", dax_region->id);
+}
+static DEVICE_ATTR_RO(id);
+
+static ssize_t region_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dax_region *dax_region = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%llu\n", (unsigned long long)
+ resource_size(&dax_region->res));
+}
+static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
+ region_size_show, NULL);
+
+static ssize_t align_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dax_region *dax_region = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", dax_region->align);
+}
+static DEVICE_ATTR_RO(align);
+
+static struct attribute *dax_region_attributes[] = {
+ &dev_attr_region_size.attr,
+ &dev_attr_align.attr,
+ &dev_attr_id.attr,
+ NULL,
+};
+
+static const struct attribute_group dax_region_attribute_group = {
+ .name = "dax_region",
+ .attrs = dax_region_attributes,
+};
+
+static const struct attribute_group *dax_region_attribute_groups[] = {
+ &dax_region_attribute_group,
+ NULL,
+};
+
+static void dax_region_free(struct kref *kref)
+{
+ struct dax_region *dax_region;
+
+ dax_region = container_of(kref, struct dax_region, kref);
+ kfree(dax_region);
+}
+
+void dax_region_put(struct dax_region *dax_region)
+{
+ kref_put(&dax_region->kref, dax_region_free);
+}
+EXPORT_SYMBOL_GPL(dax_region_put);
+
+static void dax_region_unregister(void *region)
+{
+ struct dax_region *dax_region = region;
+
+ sysfs_remove_groups(&dax_region->dev->kobj,
+ dax_region_attribute_groups);
+ dax_region_put(dax_region);
+}
+
+struct dax_region *alloc_dax_region(struct device *parent, int region_id,
+ struct resource *res, int target_node, unsigned int align,
+ unsigned long pfn_flags)
+{
+ struct dax_region *dax_region;
+
+ /*
+ * The DAX core assumes that it can store its private data in
+ * parent->driver_data. This WARN is a reminder / safeguard for
+ * developers of device-dax drivers.
+ */
+ if (dev_get_drvdata(parent)) {
+ dev_WARN(parent, "dax core failed to setup private data\n");
+ return NULL;
+ }
+
+ if (!IS_ALIGNED(res->start, align)
+ || !IS_ALIGNED(resource_size(res), align))
+ return NULL;
+
+ dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
+ if (!dax_region)
+ return NULL;
+
+ dev_set_drvdata(parent, dax_region);
+ memcpy(&dax_region->res, res, sizeof(*res));
+ dax_region->pfn_flags = pfn_flags;
+ kref_init(&dax_region->kref);
+ dax_region->id = region_id;
+ dax_region->align = align;
+ dax_region->dev = parent;
+ dax_region->target_node = target_node;
+ if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
+ kfree(dax_region);
+ return NULL;
+ }
+
+ kref_get(&dax_region->kref);
+ if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
+ return NULL;
+ return dax_region;
+}
+EXPORT_SYMBOL_GPL(alloc_dax_region);
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+ unsigned long long size = resource_size(&dev_dax->region->res);
+
+ return sprintf(buf, "%llu\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static int dev_dax_target_node(struct dev_dax *dev_dax)
+{
+ struct dax_region *dax_region = dev_dax->region;
+
+ return dax_region->target_node;
+}
+
+static ssize_t target_node_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
+}
+static DEVICE_ATTR_RO(target_node);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ /*
+ * We only ever expect to handle device-dax instances, i.e. the
+ * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
+ */
+ return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
+ return 0;
+ return a->mode;
+}
+
+static struct attribute *dev_dax_attributes[] = {
+ &dev_attr_modalias.attr,
+ &dev_attr_size.attr,
+ &dev_attr_target_node.attr,
+ NULL,
+};
+
+static const struct attribute_group dev_dax_attribute_group = {
+ .attrs = dev_dax_attributes,
+ .is_visible = dev_dax_visible,
+};
+
+static const struct attribute_group *dax_attribute_groups[] = {
+ &dev_dax_attribute_group,
+ NULL,
+};
+
+void kill_dev_dax(struct dev_dax *dev_dax)
+{
+ struct dax_device *dax_dev = dev_dax->dax_dev;
+ struct inode *inode = dax_inode(dax_dev);
+
+ kill_dax(dax_dev);
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+}
+EXPORT_SYMBOL_GPL(kill_dev_dax);
+
+static void dev_dax_release(struct device *dev)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+ struct dax_region *dax_region = dev_dax->region;
+ struct dax_device *dax_dev = dev_dax->dax_dev;
+
+ dax_region_put(dax_region);
+ put_dax(dax_dev);
+ kfree(dev_dax);
+}
+
+static void unregister_dev_dax(void *dev)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ kill_dev_dax(dev_dax);
+ device_del(dev);
+ put_device(dev);
+}
+
+struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
+ struct dev_pagemap *pgmap, enum dev_dax_subsys subsys)
+{
+ struct device *parent = dax_region->dev;
+ struct dax_device *dax_dev;
+ struct dev_dax *dev_dax;
+ struct inode *inode;
+ struct device *dev;
+ int rc = -ENOMEM;
+
+ if (id < 0)
+ return ERR_PTR(-EINVAL);
+
+ dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
+ if (!dev_dax)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
+
+ /*
+ * No 'host' or dax_operations since there is no access to this
+ * device outside of mmap of the resulting character device.
+ */
+ dax_dev = alloc_dax(dev_dax, NULL, NULL);
+ if (!dax_dev)
+ goto err;
+
+ /* a device_dax instance is dead while the driver is not attached */
+ kill_dax(dax_dev);
+
+ /* from here on we're committed to teardown via dax_dev_release() */
+ dev = &dev_dax->dev;
+ device_initialize(dev);
+
+ dev_dax->dax_dev = dax_dev;
+ dev_dax->region = dax_region;
+ dev_dax->target_node = dax_region->target_node;
+ kref_get(&dax_region->kref);
+
+ inode = dax_inode(dax_dev);
+ dev->devt = inode->i_rdev;
+ if (subsys == DEV_DAX_BUS)
+ dev->bus = &dax_bus_type;
+ else
+ dev->class = dax_class;
+ dev->parent = parent;
+ dev->groups = dax_attribute_groups;
+ dev->release = dev_dax_release;
+ dev_set_name(dev, "dax%d.%d", dax_region->id, id);
+
+ rc = device_add(dev);
+ if (rc) {
+ kill_dev_dax(dev_dax);
+ put_device(dev);
+ return ERR_PTR(rc);
+ }
+
+ rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return dev_dax;
+
+ err:
+ kfree(dev_dax);
+
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(__devm_create_dev_dax);
+
+static int match_always_count;
+
+int __dax_driver_register(struct dax_device_driver *dax_drv,
+ struct module *module, const char *mod_name)
+{
+ struct device_driver *drv = &dax_drv->drv;
+ int rc = 0;
+
+ INIT_LIST_HEAD(&dax_drv->ids);
+ drv->owner = module;
+ drv->name = mod_name;
+ drv->mod_name = mod_name;
+ drv->bus = &dax_bus_type;
+
+ /* there can only be one default driver */
+ mutex_lock(&dax_bus_lock);
+ match_always_count += dax_drv->match_always;
+ if (match_always_count > 1) {
+ match_always_count--;
+ WARN_ON(1);
+ rc = -EINVAL;
+ }
+ mutex_unlock(&dax_bus_lock);
+ if (rc)
+ return rc;
+ return driver_register(drv);
+}
+EXPORT_SYMBOL_GPL(__dax_driver_register);
+
+void dax_driver_unregister(struct dax_device_driver *dax_drv)
+{
+ struct device_driver *drv = &dax_drv->drv;
+ struct dax_id *dax_id, *_id;
+
+ mutex_lock(&dax_bus_lock);
+ match_always_count -= dax_drv->match_always;
+ list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
+ list_del(&dax_id->list);
+ kfree(dax_id);
+ }
+ mutex_unlock(&dax_bus_lock);
+ driver_unregister(drv);
+}
+EXPORT_SYMBOL_GPL(dax_driver_unregister);
+
+int __init dax_bus_init(void)
+{
+ int rc;
+
+ if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
+ dax_class = class_create(THIS_MODULE, "dax");
+ if (IS_ERR(dax_class))
+ return PTR_ERR(dax_class);
+ }
+
+ rc = bus_register(&dax_bus_type);
+ if (rc)
+ class_destroy(dax_class);
+ return rc;
+}
+
+void __exit dax_bus_exit(void)
+{
+ bus_unregister(&dax_bus_type);
+ class_destroy(dax_class);
+}
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
new file mode 100644
index 000000000000..8619e3299943
--- /dev/null
+++ b/drivers/dax/bus.h
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
+#ifndef __DAX_BUS_H__
+#define __DAX_BUS_H__
+#include <linux/device.h>
+
+struct dev_dax;
+struct resource;
+struct dax_device;
+struct dax_region;
+void dax_region_put(struct dax_region *dax_region);
+struct dax_region *alloc_dax_region(struct device *parent, int region_id,
+ struct resource *res, int target_node, unsigned int align,
+ unsigned long flags);
+
+enum dev_dax_subsys {
+ DEV_DAX_BUS,
+ DEV_DAX_CLASS,
+};
+
+struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
+ struct dev_pagemap *pgmap, enum dev_dax_subsys subsys);
+
+static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
+ int id, struct dev_pagemap *pgmap)
+{
+ return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
+}
+
+/* to be deleted when DEV_DAX_CLASS is removed */
+struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
+
+struct dax_device_driver {
+ struct device_driver drv;
+ struct list_head ids;
+ int match_always;
+};
+
+int __dax_driver_register(struct dax_device_driver *dax_drv,
+ struct module *module, const char *mod_name);
+#define dax_driver_register(driver) \
+ __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+void dax_driver_unregister(struct dax_device_driver *dax_drv);
+void kill_dev_dax(struct dev_dax *dev_dax);
+
+#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
+int dev_dax_probe(struct device *dev);
+#endif
+
+/*
+ * While run_dax() is potentially a generic operation that could be
+ * defined in include/linux/dax.h we don't want to grow any users
+ * outside of drivers/dax/
+ */
+void run_dax(struct dax_device *dax_dev);
+
+#define MODULE_ALIAS_DAX_DEVICE(type) \
+ MODULE_ALIAS("dax:t" __stringify(type) "*")
+#define DAX_DEVICE_MODALIAS_FMT "dax:t%d"
+
+#endif /* __DAX_BUS_H__ */
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index b6fc4f04636d..a45612148ca0 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -16,10 +16,17 @@
#include <linux/device.h>
#include <linux/cdev.h>
+/* private routines between core files */
+struct dax_device;
+struct dax_device *inode_dax(struct inode *inode);
+struct inode *dax_inode(struct dax_device *dax_dev);
+int dax_bus_init(void);
+void dax_bus_exit(void);
+
/**
* struct dax_region - mapping infrastructure for dax devices
* @id: kernel-wide unique region for a memory range
- * @base: linear address corresponding to @res
+ * @target_node: effective numa node if this memory range is onlined
* @kref: to pin while other agents have a need to do lookups
* @dev: parent device backing this region
* @align: allocation and mapping alignment for child dax devices
@@ -28,8 +35,7 @@
*/
struct dax_region {
int id;
- struct ida ida;
- void *base;
+ int target_node;
struct kref kref;
struct device *dev;
unsigned int align;
@@ -38,20 +44,28 @@ struct dax_region {
};
/**
- * struct dev_dax - instance data for a subdivision of a dax region
+ * struct dev_dax - instance data for a subdivision of a dax region, and
+ * data while the device is activated in the driver.
* @region - parent region
* @dax_dev - core dax functionality
+ * @target_node: effective numa node if dev_dax memory range is onlined
* @dev - device core
- * @id - child id in the region
- * @num_resources - number of physical address extents in this device
- * @res - array of physical address ranges
+ * @pgmap - pgmap for memmap setup / lifetime (driver owned)
+ * @ref: pgmap reference count (driver owned)
+ * @cmp: @ref final put completion (driver owned)
*/
struct dev_dax {
struct dax_region *region;
struct dax_device *dax_dev;
+ int target_node;
struct device dev;
- int id;
- int num_resources;
- struct resource res[0];
+ struct dev_pagemap pgmap;
+ struct percpu_ref ref;
+ struct completion cmp;
};
+
+static inline struct dev_dax *to_dev_dax(struct device *dev)
+{
+ return container_of(dev, struct dev_dax, dev);
+}
#endif
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
deleted file mode 100644
index f9e5feea742c..000000000000
--- a/drivers/dax/dax.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#ifndef __DAX_H__
-#define __DAX_H__
-struct dax_device;
-struct dax_device *inode_dax(struct inode *inode);
-struct inode *dax_inode(struct dax_device *dax_dev);
-#endif /* __DAX_H__ */
diff --git a/drivers/dax/device-dax.h b/drivers/dax/device-dax.h
deleted file mode 100644
index 688b051750bd..000000000000
--- a/drivers/dax/device-dax.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#ifndef __DEVICE_DAX_H__
-#define __DEVICE_DAX_H__
-struct device;
-struct dev_dax;
-struct resource;
-struct dax_region;
-void dax_region_put(struct dax_region *dax_region);
-struct dax_region *alloc_dax_region(struct device *parent,
- int region_id, struct resource *res, unsigned int align,
- void *addr, unsigned long flags);
-struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
- int id, struct resource *res, int count);
-#endif /* __DEVICE_DAX_H__ */
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 948806e57cee..e428468ab661 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -1,15 +1,6 @@
-/*
- * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
+#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
@@ -21,161 +12,39 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include "dax-private.h"
-#include "dax.h"
+#include "bus.h"
-static struct class *dax_class;
-
-/*
- * Rely on the fact that drvdata is set before the attributes are
- * registered, and that the attributes are unregistered before drvdata
- * is cleared to assume that drvdata is always valid.
- */
-static ssize_t id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dax_region *dax_region = dev_get_drvdata(dev);
-
- return sprintf(buf, "%d\n", dax_region->id);
-}
-static DEVICE_ATTR_RO(id);
-
-static ssize_t region_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dax_region *dax_region = dev_get_drvdata(dev);
-
- return sprintf(buf, "%llu\n", (unsigned long long)
- resource_size(&dax_region->res));
-}
-static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
- region_size_show, NULL);
-
-static ssize_t align_show(struct device *dev,
-