diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 11:07:26 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 11:07:26 -0700 |
| commit | c235698355fa94df7073b51befda7d4be00a0e23 (patch) | |
| tree | 3cb9d2fa40862f2484eda9c148b4326505a8534d /drivers | |
| parent | 5e2e7383b57fa03ec2b00c82bb7f49a4a707c1f7 (diff) | |
| parent | 1cd8a2537eb07751d405ab7e2223f20338a90506 (diff) | |
| download | linux-c235698355fa94df7073b51befda7d4be00a0e23.tar.gz linux-c235698355fa94df7073b51befda7d4be00a0e23.tar.bz2 linux-c235698355fa94df7073b51befda7d4be00a0e23.zip | |
Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams:
"Compute Express Link (CXL) updates for 6.0:
- Introduce a 'struct cxl_region' object with support for
provisioning and assembling persistent memory regions.
- Introduce alloc_free_mem_region() to accompany the existing
request_free_mem_region() as a method to allocate physical memory
capacity out of an existing resource.
- Export insert_resource_expand_to_fit() for the CXL subsystem to
late-publish CXL platform windows in iomem_resource.
- Add a polled mode PCI DOE (Data Object Exchange) driver service and
use it in cxl_pci to retrieve the CDAT (Coherent Device Attribute
Table)"
* tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (74 commits)
cxl/hdm: Fix skip allocations vs multiple pmem allocations
cxl/region: Disallow region granularity != window granularity
cxl/region: Fix x1 interleave to greater than x1 interleave routing
cxl/region: Move HPA setup to cxl_region_attach()
cxl/region: Fix decoder interleave programming
Documentation: cxl: remove dangling kernel-doc reference
cxl/region: describe targets and nr_targets members of cxl_region_params
cxl/regions: add padding for cxl_rr_ep_add nested lists
cxl/region: Fix IS_ERR() vs NULL check
cxl/region: Fix region reference target accounting
cxl/region: Fix region commit uninitialized variable warning
cxl/region: Fix port setup uninitialized variable warnings
cxl/region: Stop initializing interleave granularity
cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime
cxl/acpi: Minimize granularity for x1 interleaves
cxl/region: Delete 'region' attribute from root decoders
cxl/acpi: Autoload driver for 'cxl_acpi' test devices
cxl/region: decrement ->nr_targets on error in cxl_region_attach()
cxl/region: prevent underflow in ways_to_cxl()
cxl/region: uninitialized variable in alloc_hpa()
...
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/cxl/Kconfig | 9 | ||||
| -rw-r--r-- | drivers/cxl/acpi.c | 243 | ||||
| -rw-r--r-- | drivers/cxl/core/Makefile | 1 | ||||
| -rw-r--r-- | drivers/cxl/core/core.h | 51 | ||||
| -rw-r--r-- | drivers/cxl/core/hdm.c | 691 | ||||
| -rw-r--r-- | drivers/cxl/core/mbox.c | 95 | ||||
| -rw-r--r-- | drivers/cxl/core/memdev.c | 4 | ||||
| -rw-r--r-- | drivers/cxl/core/pci.c | 181 | ||||
| -rw-r--r-- | drivers/cxl/core/pmem.c | 4 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 738 | ||||
| -rw-r--r-- | drivers/cxl/core/region.c | 1896 | ||||
| -rw-r--r-- | drivers/cxl/cxl.h | 312 | ||||
| -rw-r--r-- | drivers/cxl/cxlmem.h | 42 | ||||
| -rw-r--r-- | drivers/cxl/cxlpci.h | 1 | ||||
| -rw-r--r-- | drivers/cxl/mem.c | 49 | ||||
| -rw-r--r-- | drivers/cxl/pci.c | 46 | ||||
| -rw-r--r-- | drivers/cxl/pmem.c | 259 | ||||
| -rw-r--r-- | drivers/cxl/port.c | 53 | ||||
| -rw-r--r-- | drivers/nvdimm/region_devs.c | 28 | ||||
| -rw-r--r-- | drivers/pci/Kconfig | 3 | ||||
| -rw-r--r-- | drivers/pci/Makefile | 1 | ||||
| -rw-r--r-- | drivers/pci/doe.c | 536 | ||||
| -rw-r--r-- | drivers/pci/probe.c | 2 |
23 files changed, 4828 insertions, 417 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index f64e3984689f..768ced3d6fe8 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -2,6 +2,7 @@ menuconfig CXL_BUS tristate "CXL (Compute Express Link) Devices Support" depends on PCI + select PCI_DOE help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -102,4 +103,12 @@ config CXL_SUSPEND def_bool y depends on SUSPEND && CXL_MEM +config CXL_REGION + bool + default CXL_BUS + # For MAX_PHYSMEM_BITS + depends on SPARSEMEM + select MEMREGION + select GET_FREE_REGION + endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40286f5df812..fb649683dd3a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -9,10 +9,6 @@ #include "cxlpci.h" #include "cxl.h" -/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ -#define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways) -#define CFMWS_INTERLEAVE_GRANULARITY(x) ((x)->granularity + 8) - static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; @@ -34,7 +30,8 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) static int cxl_acpi_cfmws_verify(struct device *dev, struct acpi_cedt_cfmws *cfmws) { - int expected_len; + int rc, expected_len; + unsigned int ways; if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); @@ -51,14 +48,14 @@ static int cxl_acpi_cfmws_verify(struct device *dev, return -EINVAL; } - if (CFMWS_INTERLEAVE_WAYS(cfmws) > CXL_DECODER_MAX_INTERLEAVE) { - dev_err(dev, "CFMWS Interleave Ways (%d) too large\n", - CFMWS_INTERLEAVE_WAYS(cfmws)); + rc = cxl_to_ways(cfmws->interleave_ways, &ways); + if (rc) { + dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n", + cfmws->interleave_ways); return -EINVAL; } - expected_len = struct_size((cfmws), interleave_targets, - CFMWS_INTERLEAVE_WAYS(cfmws)); + expected_len = struct_size(cfmws, interleave_targets, ways); if (cfmws->header.length < expected_len) { dev_err(dev, "CFMWS length %d less than expected %d\n", @@ -76,6 +73,8 @@ static int cxl_acpi_cfmws_verify(struct device *dev, struct cxl_cfmws_context { struct device *dev; struct cxl_port *root_port; + struct resource *cxl_res; + int id; }; static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, @@ -84,10 +83,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; + struct resource *cxl_res = ctx->cxl_res; + struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; struct acpi_cedt_cfmws *cfmws; struct cxl_decoder *cxld; - int rc, i; + unsigned int ways, i, ig; + struct resource *res; + int rc; cfmws = (struct acpi_cedt_cfmws *) header; @@ -99,19 +102,51 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return 0; } - for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++) + rc = cxl_to_ways(cfmws->interleave_ways, &ways); + if (rc) + return rc; + rc = cxl_to_granularity(cfmws->granularity, &ig); + if (rc) + return rc; + for (i = 0; i < ways; i++) target_map[i] = cfmws->interleave_targets[i]; - cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); - if (IS_ERR(cxld)) + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++); + if (!res->name) + goto err_name; + + res->start = cfmws->base_hpa; + res->end = cfmws->base_hpa + cfmws->window_size - 1; + res->flags = IORESOURCE_MEM; + + /* add to the local resource tracking to establish a sort order */ + rc = insert_resource(cxl_res, res); + if (rc) + goto err_insert; + + cxlrd = cxl_root_decoder_alloc(root_port, ways); + if (IS_ERR(cxlrd)) return 0; + cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); cxld->target_type = CXL_DECODER_EXPANDER; - cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa, - cfmws->window_size); - cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws); - cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws); + cxld->hpa_range = (struct range) { + .start = res->start, + .end = res->end, + }; + cxld->interleave_ways = ways; + /* + * Minimize the x1 granularity to advertise support for any + * valid region granularity + */ + if (ways == 1) + ig = CXL_DECODER_MIN_GRANULARITY; + cxld->interleave_granularity = ig; rc = cxl_decoder_add(cxld, target_map); if (rc) @@ -119,15 +154,22 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, else rc = cxl_decoder_autoremove(dev, cxld); if (rc) { - dev_err(dev, "Failed to add decoder for %pr\n", - &cxld->platform_res); + dev_err(dev, "Failed to add decode range [%#llx - %#llx]\n", + cxld->hpa_range.start, cxld->hpa_range.end); return 0; } - dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev), - phys_to_target_node(cxld->platform_res.start), - &cxld->platform_res); + dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", + dev_name(&cxld->dev), + phys_to_target_node(cxld->hpa_range.start), + cxld->hpa_range.start, cxld->hpa_range.end); return 0; + +err_insert: + kfree(res->name); +err_name: + kfree(res); + return -ENOMEM; } __mock struct acpi_device *to_cxl_host_bridge(struct device *host, @@ -175,8 +217,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) if (rc) return rc; - port = devm_cxl_add_port(host, match, dport->component_reg_phys, - root_port); + port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport); if (IS_ERR(port)) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); @@ -282,9 +323,127 @@ static void cxl_acpi_lock_reset_class(void *dev) device_lock_reset_class(dev); } +static void del_cxl_resource(struct resource *res) +{ + kfree(res->name); + kfree(res); +} + +static void cxl_set_public_resource(struct resource *priv, struct resource *pub) +{ + priv->desc = (unsigned long) pub; +} + +static struct resource *cxl_get_public_resource(struct resource *priv) +{ + return (struct resource *) priv->desc; +} + +static void remove_cxl_resources(void *data) +{ + struct resource *res, *next, *cxl = data; + + for (res = cxl->child; res; res = next) { + struct resource *victim = cxl_get_public_resource(res); + + next = res->sibling; + remove_resource(res); + + if (victim) { + remove_resource(victim); + kfree(victim); + } + + del_cxl_resource(res); + } +} + +/** + * add_cxl_resources() - reflect CXL fixed memory windows in iomem_resource + * @cxl_res: A standalone resource tree where each CXL window is a sibling + * + * Walk each CXL window in @cxl_res and add it to iomem_resource potentially + * expanding its boundaries to ensure that any conflicting resources become + * children. If a window is expanded it may then conflict with a another window + * entry and require the window to be truncated or trimmed. Consider this + * situation: + * + * |-- "CXL Window 0" --||----- "CXL Window 1" -----| + * |--------------- "System RAM" -------------| + * + * ...where platform firmware has established as System RAM resource across 2 + * windows, but has left some portion of window 1 for dynamic CXL region + * provisioning. In this case "Window 0" will span the entirety of the "System + * RAM" span, and "CXL Window 1" is truncated to the remaining tail past the end + * of that "System RAM" resource. + */ +static int add_cxl_resources(struct resource *cxl_res) +{ + struct resource *res, *new, *next; + + for (res = cxl_res->child; res; res = next) { + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->name = res->name; + new->start = res->start; + new->end = res->end; + new->flags = IORESOURCE_MEM; + new->desc = IORES_DESC_CXL; + + /* + * Record the public resource in the private cxl_res tree for + * later removal. + */ + cxl_set_public_resource(res, new); + + insert_resource_expand_to_fit(&iomem_resource, new); + + next = res->sibling; + while (next && resource_overlaps(new, next)) { + if (resource_contains(new, next)) { + struct resource *_next = next->sibling; + + remove_resource(next); + del_cxl_resource(next); + next = _next; + } else + next->start = new->end + 1; + } + } + return 0; +} + +static int pair_cxl_resource(struct device *dev, void *data) +{ + struct resource *cxl_res = data; + struct resource *p; + + if (!is_root_decoder(dev)) + return 0; + + for (p = cxl_res->child; p; p = p->sibling) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct resource res = { + .start = cxld->hpa_range.start, + .end = cxld->hpa_range.end, + .flags = IORESOURCE_MEM, + }; + + if (resource_contains(p, &res)) { + cxlrd->res = cxl_get_public_resource(p); + break; + } + } + + return 0; +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; + struct resource *cxl_res; struct cxl_port *root_port; struct device *host = &pdev->dev; struct acpi_device *adev = ACPI_COMPANION(host); @@ -296,6 +455,14 @@ static int cxl_acpi_probe(struct platform_device *pdev) if (rc) return rc; + cxl_res = devm_kzalloc(host, sizeof(*cxl_res), GFP_KERNEL); + if (!cxl_res) + return -ENOMEM; + cxl_res->name = "CXL mem"; + cxl_res->start = 0; + cxl_res->end = -1; + cxl_res->flags = IORESOURCE_MEM; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); @@ -306,11 +473,28 @@ static int cxl_acpi_probe(struct platform_device *pdev) if (rc < 0) return rc; + rc = devm_add_action_or_reset(host, remove_cxl_resources, cxl_res); + if (rc) + return rc; + ctx = (struct cxl_cfmws_context) { .dev = host, .root_port = root_port, + .cxl_res = cxl_res, }; - acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx); + rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx); + if (rc < 0) + return -ENXIO; + + rc = add_cxl_resources(cxl_res); + if (rc) + return rc; + + /* + * Populate the root decoders with their related iomem resource, + * if present + */ + device_for_each_child(&root_port->dev, cxl_res, pair_cxl_resource); /* * Root level scanned with host-bridge as dports, now scan host-bridges @@ -337,12 +521,19 @@ static const struct acpi_device_id cxl_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, cxl_acpi_ids); +static const struct platform_device_id cxl_test_ids[] = { + { "cxl_acpi" }, + { }, +}; +MODULE_DEVICE_TABLE(platform, cxl_test_ids); + static struct platform_driver cxl_acpi_driver = { .probe = cxl_acpi_probe, .driver = { .name = KBUILD_MODNAME, .acpi_match_table = cxl_acpi_ids, }, + .id_table = cxl_test_ids, }; module_platform_driver(cxl_acpi_driver); diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9d35085d25af..79c7257f4107 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -10,3 +10,4 @@ cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o +cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1a50c0fc399c..1d8f87be283f 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -9,6 +9,36 @@ extern const struct device_type cxl_nvdimm_type; extern struct attribute_group cxl_base_attribute_group; +#ifdef CONFIG_CXL_REGION +extern struct device_attribute dev_attr_create_pmem_region; +extern struct device_attribute dev_attr_delete_region; +extern struct device_attribute dev_attr_region; +extern const struct device_type cxl_pmem_region_type; +extern const struct device_type cxl_region_type; +void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); +#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) +#define CXL_REGION_TYPE(x) (&cxl_region_type) +#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), +#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) +int cxl_region_init(void); +void cxl_region_exit(void); +#else +static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) +{ +} +static inline int cxl_region_init(void) +{ + return 0; +} +static inline void cxl_region_exit(void) +{ +} +#define CXL_REGION_ATTR(x) NULL +#define CXL_REGION_TYPE(x) NULL +#define SET_CXL_REGION_ATTR(x) +#define CXL_PMEM_REGION_TYPE(x) NULL +#endif + struct cxl_send_command; struct cxl_mem_query_commands; int cxl_query_cmd(struct cxl_memdev *cxlmd, @@ -17,9 +47,28 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, resource_size_t length); +struct dentry *cxl_debugfs_create_dir(const char *dir); +int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, + enum cxl_decoder_mode mode); +int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size); +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); +resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); +resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); +extern struct rw_semaphore cxl_dpa_rwsem; + +bool is_switch_decoder(struct device *dev); +struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); +static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, + struct cxl_memdev *cxlmd) +{ + if (!port) + return NULL; + + return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); +} + int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); -void cxl_mbox_exit(void); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index bfc8ee876278..d1d2caea5c62 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/seq_file.h> #include <linux/device.h> #include <linux/delay.h> @@ -16,6 +17,8 @@ * for enumerating these registers and capabilities. */ +DECLARE_RWSEM(cxl_dpa_rwsem); + static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map) { @@ -46,20 +49,22 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, */ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { - struct cxl_decoder *cxld; - struct cxl_dport *dport; + struct cxl_switch_decoder *cxlsd; + struct cxl_dport *dport = NULL; int single_port_map[1]; + unsigned long index; - cxld = cxl_switch_decoder_alloc(port, 1); - if (IS_ERR(cxld)) - return PTR_ERR(cxld); + cxlsd = cxl_switch_decoder_alloc(port, 1); + if (IS_ERR(cxlsd)) + return PTR_ERR(cxlsd); device_lock_assert(&port->dev); - dport = list_first_entry(&port->dports, typeof(*dport), list); + xa_for_each(&port->dports, index, dport) + break; single_port_map[0] = dport->port_id; - return add_hdm_decoder(port, cxld, single_port_map); + return add_hdm_decoder(port, &cxlsd->cxld, single_port_map); } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL); @@ -124,47 +129,577 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) return ERR_PTR(-ENXIO); } + dev_set_drvdata(dev, cxlhdm); + return cxlhdm; } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); -static int to_interleave_granularity(u32 ctrl) +static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth) +{ + unsigned long long start = r->start, end = r->end; + + seq_printf(file, "%*s%08llx-%08llx : %s\n", depth * 2, "", start, end, + r->name); +} + +void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) +{ + struct resource *p1, *p2; + + down_read(&cxl_dpa_rwsem); + for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { + __cxl_dpa_debug(file, p1, 0); + for (p2 = p1->child; p2; p2 = p2->sibling) + __cxl_dpa_debug(file, p2, 1); + } + up_read(&cxl_dpa_rwsem); +} +EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, CXL); + +/* + * Must be called in a context that synchronizes against this decoder's + * port ->remove() callback (like an endpoint decoder sysfs attribute) + */ +static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct resource *res = cxled->dpa_res; + resource_size_t skip_start; + + lockdep_assert_held_write(&cxl_dpa_rwsem); + + /* save @skip_start, before @res is released */ + skip_start = res->start - cxled->skip; + __release_region(&cxlds->dpa_res, res->start, resource_size(res)); + if (cxled->skip) + __release_region(&cxlds->dpa_res, skip_start, cxled->skip); + cxled->skip = 0; + cxled->dpa_res = NULL; + put_device(&cxled->cxld.dev); + port->hdm_end--; +} + +static void cxl_dpa_release(void *cxled) +{ + down_write(&cxl_dpa_rwsem); + __cxl_dpa_release(cxled); + up_write(&cxl_dpa_rwsem); +} + +/* + * Must be called from context that will not race port device + * unregistration, like decoder sysfs attribute methods + */ +static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxled_to_port(cxled); + + lockdep_assert_held_write(&cxl_dpa_rwsem); + devm_remove_action(&port->dev, cxl_dpa_release, cxled); + __cxl_dpa_release(cxled); +} + +static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, + resource_size_t base, resource_size_t len, + resource_size_t skipped) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct device *dev = &port->dev; + struct resource *res; + + lockdep_assert_held_write(&cxl_dpa_rwsem); + + if (!len) + goto success; + + if (cxled->dpa_res) { + dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n", + port->id, cxled->cxld.id, cxled->dpa_res); + return -EBUSY; + } + + if (port->hdm_end + 1 != cxled->cxld.id) { + /* + * Assumes alloc and commit order is always in hardware instance + * order per expectations from 8.2.5.12.20 Committing Decoder + * Programming that enforce decoder[m] committed before + * decoder[m+1] commit start. + */ + dev_dbg(dev, "decoder%d.%d: expected decoder%d.%d\n", port->id, + cxled->cxld.id, port->id, port->hdm_end + 1); + return -EBUSY; + } + + if (skipped) { + res = __request_region(&cxlds->dpa_res, base - skipped, skipped, + dev_name(&cxled->cxld.dev), 0); + if (!res) { + dev_dbg(dev, + "decoder%d.%d: failed to reserve skipped space\n", + port->id, cxled->cxld.id); + return -EBUSY; + } + } + res = __request_region(&cxlds->dpa_res, base, len, + dev_name(&cxled->cxld.dev), 0); + if (!res) { + dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n", + port->id, cxled->cxld.id); + if (skipped) + __release_region(&cxlds->dpa_res, base - skipped, + skipped); + return -EBUSY; + } + cxled->dpa_res = res; + cxled->skip = skipped; + + if (resource_contains(&cxlds->pmem_res, res)) + cxled->mode = CXL_DECODER_PMEM; + else if (resource_contains(&cxlds->ram_res, res)) + cxled->mode = CXL_DECODER_RAM; + else { + dev_dbg(dev, "decoder%d.%d: %pr mixed\n", port->id, + cxled->cxld.id, cxled->dpa_res); + cxled->mode = CXL_DECODER_MIXED; + } + +success: + port->hdm_end++; + get_device(&cxled->cxld.dev); + return 0; +} + +static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, + resource_size_t base, resource_size_t len, + resource_size_t skipped) { - int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl); + struct cxl_port *port = cxled_to_port(cxled); + int rc; + + down_write(&cxl_dpa_rwsem); + rc = __cxl_dpa_reserve(cxled, base, len, skipped); + up_write(&cxl_dpa_rwsem); + + if (rc) + return rc; - return 256 << val; + return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } -static int to_interleave_ways(u32 ctrl) +resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { - int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl); + resource_size_t size = 0; + + down_read(&cxl_dpa_rwsem); + if (cxled->dpa_res) + size = resource_size(cxled->dpa_res); + up_read(&cxl_dpa_rwsem); - switch (val) { - case 0 ... 4: - return 1 << val; - case 8 ... 10: - return 3 << (val - 8); + return size; +} + +resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) +{ + resource_size_t base = -1; + + down_read(&cxl_dpa_rwsem); + if (cxled->dpa_res) + base = cxled->dpa_res->start; + up_read(&cxl_dpa_rwsem); + + return base; +} + +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxled_to_port(cxled); + struct device *dev = &cxled->cxld.dev; + int rc; + + down_write(&cxl_dpa_rwsem); + if (!cxled->dpa_res) { + rc = 0; + goto out; + } + if (cxled->cxld.region) { + dev_dbg(dev, "decoder assigned to: %s\n", + dev_name(&cxled->cxld.region->dev)); + rc = -EBUSY; + goto out; + } + if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { + dev_dbg(dev, "decoder enabled\n"); + rc = -EBUSY; + goto out; + } + if (cxled->cxld.id != port->hdm_end) { + dev_dbg(dev, "expected decoder%d.%d\n", port->id, + port->hdm_end); + rc = -EBUSY; + goto out; + } + devm_cxl_dpa_release(cxled); + rc = 0; +out: + up_write(&cxl_dpa_rwsem); + return rc; +} + +int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, + enum cxl_decoder_mode mode) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct device *dev = &cxled->cxld.dev; + int rc; + + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; default: + dev_dbg(dev, "unsupported mode: %d\n", mode); + return -EINVAL; + } + + down_write(&cxl_dpa_rwsem); + if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { + rc = -EBUSY; + goto out; + } + + /* + * Only allow modes that are supported by the current partition + * configuration + */ + if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { + dev_dbg(dev, "no available pmem capacity\n"); + rc = -ENXIO; + goto out; + } + if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { + dev_dbg(dev, "no available ram capacity\n"); + rc = -ENXIO; + goto out; + } + + cxled->mode = mode; + rc = 0; +out: + up_write(&cxl_dpa_rwsem); + + return rc; +} + +int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + resource_size_t free_ram_start, free_pmem_start; + struct cxl_port *port = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct device *dev = &cxled->cxld.dev; + resource_size_t start, avail, skip; + struct resource *p, *last; + int rc; + + down_write(&cxl_dpa_rwsem); + if (cxled->cxld.region) { + dev_dbg(dev, "decoder attached to %s\n", + dev_name(&cxled->cxld.region->dev)); + rc = -EBUSY; + goto out; + } + + if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { + dev_dbg(dev, "decoder enabled\n"); + rc = -EBUSY; + goto out; + } + + for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) + last = p; + if (last) + free_ram_start = last->end + 1; + else + free_ram_start = cxlds->ram_res.start; + + for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling) + last = p; + if (last) + free_pmem_start = last->end + 1; + else + free_pmem_start = cxlds->pmem_res.start; + + if (cxled->mode == CXL_DECODER_RAM) { + start = free_ram_start; + avail = cxlds->ram_res.end - start + 1; + skip = 0; + } else if (cxled->mode == CXL_DECODER_PMEM) { + resource_size_t skip_start, skip_end; + + start = free_pmem_start; + avail = cxlds->pmem_res.end - start + 1; + skip_start = free_ram_start; + + /* + * If some pmem is already allocated, then that allocation + * already handled the skip. + */ + if (cxlds->pmem_res.child && + skip_start == cxlds->pmem_res.child->start) + skip_end = skip_start - 1; + else + skip_end = start - 1; + skip = skip_end - skip_start + 1; + } else { + dev_dbg(dev, "mode not set\n"); + rc = -EINVAL; + goto out; + } + + if (size > avail) { + dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, + cxled->mode == CXL_DECODER_RAM ? "ram" : "pmem", + &avail); + rc = -ENOSPC; + goto out; + } + + rc = __cxl_dpa_reserve(cxled, start, size, skip); +out: + up_write(&cxl_dpa_rwsem); + + if (rc) + return rc; + + return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); +} + +static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) +{ + u16 eig; + u8 eiw; + + /* + * Input validation ensures these warns never fire, but otherwise + * suppress unititalized variable usage warnings. + */ + if (WARN_ONCE(ways_to_cxl(cxld->inte |
