diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 11:07:26 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 11:07:26 -0700 |
| commit | c235698355fa94df7073b51befda7d4be00a0e23 (patch) | |
| tree | 3cb9d2fa40862f2484eda9c148b4326505a8534d /drivers/cxl/core/region.c | |
| parent | 5e2e7383b57fa03ec2b00c82bb7f49a4a707c1f7 (diff) | |
| parent | 1cd8a2537eb07751d405ab7e2223f20338a90506 (diff) | |
| download | linux-c235698355fa94df7073b51befda7d4be00a0e23.tar.gz linux-c235698355fa94df7073b51befda7d4be00a0e23.tar.bz2 linux-c235698355fa94df7073b51befda7d4be00a0e23.zip | |
Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams:
"Compute Express Link (CXL) updates for 6.0:
- Introduce a 'struct cxl_region' object with support for
provisioning and assembling persistent memory regions.
- Introduce alloc_free_mem_region() to accompany the existing
request_free_mem_region() as a method to allocate physical memory
capacity out of an existing resource.
- Export insert_resource_expand_to_fit() for the CXL subsystem to
late-publish CXL platform windows in iomem_resource.
- Add a polled mode PCI DOE (Data Object Exchange) driver service and
use it in cxl_pci to retrieve the CDAT (Coherent Device Attribute
Table)"
* tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (74 commits)
cxl/hdm: Fix skip allocations vs multiple pmem allocations
cxl/region: Disallow region granularity != window granularity
cxl/region: Fix x1 interleave to greater than x1 interleave routing
cxl/region: Move HPA setup to cxl_region_attach()
cxl/region: Fix decoder interleave programming
Documentation: cxl: remove dangling kernel-doc reference
cxl/region: describe targets and nr_targets members of cxl_region_params
cxl/regions: add padding for cxl_rr_ep_add nested lists
cxl/region: Fix IS_ERR() vs NULL check
cxl/region: Fix region reference target accounting
cxl/region: Fix region commit uninitialized variable warning
cxl/region: Fix port setup uninitialized variable warnings
cxl/region: Stop initializing interleave granularity
cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime
cxl/acpi: Minimize granularity for x1 interleaves
cxl/region: Delete 'region' attribute from root decoders
cxl/acpi: Autoload driver for 'cxl_acpi' test devices
cxl/region: decrement ->nr_targets on error in cxl_region_attach()
cxl/region: prevent underflow in ways_to_cxl()
cxl/region: uninitialized variable in alloc_hpa()
...
Diffstat (limited to 'drivers/cxl/core/region.c')
| -rw-r--r-- | drivers/cxl/core/region.c | 1896 |
1 files changed, 1896 insertions, 0 deletions
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c new file mode 100644 index 000000000000..401148016978 --- /dev/null +++ b/drivers/cxl/core/region.c @@ -0,0 +1,1896 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <linux/memregion.h> +#include <linux/genalloc.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/uuid.h> +#include <linux/idr.h> +#include <cxlmem.h> +#include <cxl.h> +#include "core.h" + +/** + * DOC: cxl core region + * + * CXL Regions represent mapped memory capacity in system physical address + * space. Whereas the CXL Root Decoders identify the bounds of potential CXL + * Memory ranges, Regions represent the active mapped capacity by the HDM + * Decoder Capability structures throughout the Host Bridges, Switches, and + * Endpoints in the topology. + * + * Region configuration has ordering constraints. UUID may be set at any time + * but is only visible for persistent regions. + * 1. Interleave granularity + * 2. Interleave size + * 3. Decoder targets + */ + +/* + * All changes to the interleave configuration occur with this lock held + * for write. + */ +static DECLARE_RWSEM(cxl_region_rwsem); + +static struct cxl_region *to_cxl_region(struct device *dev); + +static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + rc = sysfs_emit(buf, "%pUb\n", &p->uuid); + up_read(&cxl_region_rwsem); + + return rc; +} + +static int is_dup(struct device *match, void *data) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + uuid_t *uuid = data; + + if (!is_cxl_region(match)) + return 0; + + lockdep_assert_held(&cxl_region_rwsem); + cxlr = to_cxl_region(match); + p = &cxlr->params; + + if (uuid_equal(&p->uuid, uuid)) { + dev_dbg(match, "already has uuid: %pUb\n", uuid); + return -EBUSY; + } + + return 0; +} + +static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + uuid_t temp; + ssize_t rc; + + if (len != UUID_STRING_LEN + 1) + return -EINVAL; + + rc = uuid_parse(buf, &temp); + if (rc) + return rc; + + if (uuid_is_null(&temp)) + return -EINVAL; + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + + if (uuid_equal(&p->uuid, &temp)) + goto out; + + rc = -EBUSY; + if (p->state >= CXL_CONFIG_ACTIVE) + goto out; + + rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup); + if (rc < 0) + goto out; + + uuid_copy(&p->uuid, &temp); +out: + up_write(&cxl_region_rwsem); + + if (rc) + return rc; + return len; +} +static DEVICE_ATTR_RW(uuid); + +static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port, + struct cxl_region *cxlr) +{ + return xa_load(&port->regions, (unsigned long)cxlr); +} + +static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) +{ + struct cxl_region_params *p = &cxlr->params; + int i; + + for (i = count - 1; i >= 0; i--) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *iter = cxled_to_port(cxled); + struct cxl_ep *ep; + int rc; + + while (!is_cxl_root(to_cxl_port(iter->dev.parent))) + iter = to_cxl_port(iter->dev.parent); + + for (ep = cxl_ep_load(iter, cxlmd); iter; + iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { + struct cxl_region_ref *cxl_rr; + struct cxl_decoder *cxld; + + cxl_rr = cxl_rr_load(iter, cxlr); + cxld = cxl_rr->decoder; + rc = cxld->reset(cxld); + if (rc) + return rc; + } + + rc = cxled->cxld.reset(&cxled->cxld); + if (rc) + return rc; + } + + return 0; +} + +static int cxl_region_decode_commit(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int i, rc = 0; + + for (i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_region_ref *cxl_rr; + struct cxl_decoder *cxld; + struct cxl_port *iter; + struct cxl_ep *ep; + + /* commit bottom up */ + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + cxl_rr = cxl_rr_load(iter, cxlr); + cxld = cxl_rr->decoder; + rc = cxld->commit(cxld); + if (rc) + break; + } + + if (rc) { + /* programming @iter failed, teardown */ + for (ep = cxl_ep_load(iter, cxlmd); ep && iter; + iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { + cxl_rr = cxl_rr_load(iter, cxlr); + cxld = cxl_rr->decoder; + cxld->reset(cxld); + } + + cxled->cxld.reset(&cxled->cxld); + goto err; + } + } + + return 0; + +err: + /* undo the targets that were successfully committed */ + cxl_region_decode_reset(cxlr, i); + return rc; +} + +static ssize_t commit_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + bool commit; + ssize_t rc; + + rc = kstrtobool(buf, &commit); + if (rc) + return rc; + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + + /* Already in the requested state? */ + if (commit && p->state >= CXL_CONFIG_COMMIT) + goto out; + if (!commit && p->state < CXL_CONFIG_COMMIT) + goto out; + + /* Not ready to commit? */ + if (commit && p->state < CXL_CONFIG_ACTIVE) { + rc = -ENXIO; + goto out; + } + + if (commit) + rc = cxl_region_decode_commit(cxlr); + else { + p->state = CXL_CONFIG_RESET_PENDING; + up_write(&cxl_region_rwsem); + device_release_driver(&cxlr->dev); + down_write(&cxl_region_rwsem); + + /* + * The lock was dropped, so need to revalidate that the reset is + * still pending. + */ + if (p->state == CXL_CONFIG_RESET_PENDING) + rc = cxl_region_decode_reset(cxlr, p->interleave_ways); + } + + if (rc) + goto out; + + if (commit) + p->state = CXL_CONFIG_COMMIT; + else if (p->state == CXL_CONFIG_RESET_PENDING) + p->state = CXL_CONFIG_ACTIVE; + +out: + up_write(&cxl_region_rwsem); + + if (rc) + return rc; + return len; +} + +static ssize_t commit_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT); + up_read(&cxl_region_rwsem); + + return rc; +} +static DEVICE_ATTR_RW(commit); + +static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_region *cxlr = to_cxl_region(dev); + + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) + return 0; + return a->mode; +} + +static ssize_t interleave_ways_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + rc = sysfs_emit(buf, "%d\n", p->interleave_ways); + up_read(&cxl_region_rwsem); + + return rc; +} + +static const struct attribute_group *get_cxl_region_target_group(void); + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + unsigned int val, save; + int rc; + u8 iw; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + rc = ways_to_cxl(val, &iw); + if (rc) + return rc; + + /* + * Even for x3, x9, and x12 interleaves the region interleave must be a + * power of 2 multiple of the host bridge interleave. + */ + if (!is_power_of_2(val / cxld->interleave_ways) || + (val % cxld->interleave_ways)) { + dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val); + return -EINVAL; + } + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + rc = -EBUSY; + goto out; + } + + save = p->interleave_ways; + p->interleave_ways = val; + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); + if (rc) + p->interleave_ways = save; +out: + up_write(&cxl_region_rwsem); + if (rc) + return rc; + return len; +} +static DEVICE_ATTR_RW(interleave_ways); + +static ssize_t interleave_granularity_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + rc = sysfs_emit(buf, "%d\n", p->interleave_granularity); + up_read(&cxl_region_rwsem); + + return rc; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int rc, val; + u16 ig; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + rc = granularity_to_cxl(val, &ig); + if (rc) + return rc; + + /* + * When the host-bridge is interleaved, disallow region granularity != + * root granularity. Regions with a granularity less than the root + * interleave result in needing multiple endpoints to support a single + * slot in the interleave (possible to suport in the future). Regions + * with a granularity greater than the root interleave result in invalid + * DPA translations (invalid to support). + */ + if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) + return -EINVAL; + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + rc = -EBUSY; + goto out; + } + + p->interleave_granularity = val; +out: + up_write(&cxl_region_rwsem); + if (rc) + return rc; + return len; +} +static DEVICE_ATTR_RW(interleave_granularity); + +static ssize_t resource_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + u64 resource = -1ULL; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + if (p->res) + resource = p->res->start; + rc = sysfs_emit(buf, "%#llx\n", resource); + up_read(&cxl_region_rwsem); + + return rc; +} +static DEVICE_ATTR_RO(resource); + +static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_region_params *p = &cxlr->params; + struct resource *res; + u32 remainder = 0; + + lockdep_assert_held_write(&cxl_region_rwsem); + + /* Nothing to do... */ + if (p->res && resource_size(p->res) == size) + return 0; + + /* To change size the old size must be freed first */ + if (p->res) + return -EBUSY; + + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) + return -EBUSY; + + /* ways, granularity and uuid (if PMEM) need to be set before HPA */ + if (!p->interleave_ways || !p->interleave_granularity || + (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) + return -ENXIO; + + div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + if (remainder) + return -EINVAL; + + res = alloc_free_mem_region(cxlrd->res, size, SZ_256M, + dev_name(&cxlr->dev)); + if (IS_ERR(res)) { + dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n", + PTR_ERR(res)); + return PTR_ERR(res); + } + + p->res = res; + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; + + return 0; +} + +static void cxl_region_iomem_release(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + + if (device_is_registered(&cxlr->dev)) + lockdep_assert_held_write(&cxl_region_rwsem); + if (p->res) { + remove_resource(p->res); + kfree(p->res); + p->res = NULL; + } +} + +static int free_hpa(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + + lockdep_assert_held_write(&cxl_region_rwsem); + + if (!p->res) + return 0; + + if (p->state >= CXL_CONFIG_ACTIVE) + return -EBUSY; + + cxl_region_iomem_release(cxlr); + p->state = CXL_CONFIG_IDLE; + return 0; +} + +static ssize_t size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + u64 val; + int rc; + + rc = kstrtou64(buf, 0, &val); + if (rc) + return rc; + + rc = down_write_killable(&cxl_region_rwsem); + if (rc) + return rc; + + if (val) + rc = alloc_hpa(cxlr, val); + else + rc = free_hpa(cxlr); + up_write(&cxl_region_rwsem); + + if (rc) + return rc; + + return len; +} + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + u64 size = 0; + ssize_t rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + if (p->res) + size = resource_size(p->res); + rc = sysfs_emit(buf, "%#llx\n", size); + up_read(&cxl_region_rwsem); + + return rc; +} +static DEVICE_ATTR_RW(size); + +static struct attribute *cxl_region_attrs[] = { + &dev_attr_uuid.attr, + &dev_attr_commit.attr, + &dev_attr_interleave_ways.attr, + &dev_attr_interleave_granularity.attr, + &dev_attr_resource.attr, + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group cxl_region_group = { + .attrs = cxl_region_attrs, + .is_visible = cxl_region_visible, +}; + +static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled; + int rc; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + + if (pos >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, + p->interleave_ways); + rc = -ENXIO; + goto out; + } + + cxled = p->targets[pos]; + if (!cxled) + rc = sysfs_emit(buf, "\n"); + else + rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev)); +out: + up_read(&cxl_region_rwsem); + + return rc; +} + +static int match_free_decoder(struct device *dev, void *data) +{ + struct cxl_decoder *cxld; + int *id = data; + + if (!is_switch_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + + /* enforce ordered allocation */ + if (cxld->id != *id) + return 0; + + if (!cxld->region) + return 1; + + (*id)++; + + return 0; +} + +static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, + struct cxl_region *cxlr) +{ + struct device *dev; + int id = 0; + + dev = device_find_child(&port->dev, &id, match_free_decoder); + if (!dev) + return NULL; + /* + * This decoder is pinned registered as long as the endpoint decoder is + * registered, and endpoint decoder unregistration holds the + * cxl_region_rwsem over unregister events, so no need to hold on to + * this extra reference. + */ + put_device(dev); + return to_cxl_decoder(dev); +} + +static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, + struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_region_ref *cxl_rr, *iter; + unsigned long index; + int rc; + + xa_for_each(&port->regions, index, iter) { + struct cxl_region_params *ip = &iter->region->params; + + if (ip->res->start > p->res->start) { + dev_dbg(&cxlr->dev, + "%s: HPA order violation %s:%pr vs %pr\n", + dev_name(&port->dev), + dev_name(&iter->region->dev), ip->res, p->res); + return ERR_PTR(-EBUSY); + } + } + + cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL); + if (!cxl_rr) + return ERR_PTR(-ENOMEM); + cxl_rr->port = port; + cxl_rr->region = cxlr; + cxl_rr->nr_targets = 1; + xa_init(&cxl_rr->endpoints); + + rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL); + if (rc) { + dev_dbg(&cxlr->dev, + "%s: failed to track region reference: %d\n", + dev_name(&port->dev), rc); + kfree(cxl_rr); + return ERR_PTR(rc); + } + + return cxl_rr; +} + +static void free_region_ref(struct cxl_region_ref *cxl_rr) +{ + struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + struct cxl_decoder *cxld = cxl_rr->decoder; + + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); + if (cxld->region == cxlr) { + cxld->region = NULL; + put_device(&cxlr->dev); + } + + xa_erase(&port->regions, (unsigned long)cxlr); + xa_destroy(&cxl_rr->endpoints); + kfree(cxl_rr); +} + +static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, + struct cxl_endpoint_decoder *cxled) +{ + int rc; + struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + struct cxl_decoder *cxld = cxl_rr->decoder; + struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled)); + + if (ep) { + rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep, + GFP_KERNEL); + if (rc) + return rc; + } + cxl_rr->nr_eps++; + + if (!cxld->region) { + cxld->region = cxlr; + get_device(&cxlr->dev); + } + + return 0; +} + +/** + * cxl_port_attach_region() - track a region's interest in a port by endpoint + * @port: port to add a new region reference 'struct cxl_region_ref' + * @cxlr: region to attach to @port + * @cxled: endpoint decoder used to create or further pin a region reference + * @pos: interleave position of @cxled in @cxlr + * + * The attach event is an opportunity to validate CXL decode setup + * constraints and record metadata needed for programming HDM decoders, + * in particular decoder target lists. + * + * The steps are: + * + * - validate that there are no other regions with a higher HPA already + * associated with @port + * - establish a region reference if one is not already present + * + * - additionally allocate a decoder instance that will host @cxlr on + * @port + * + * - pin the region reference by the endpoint + * - account for how many entries in @port's target list are needed to + * cover all of the added endpoints. + */ +static int cxl_port_attach_region(struct cxl_port *port, + struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_ep *ep = cxl_ep_load(port, cxlmd); + struct cxl_region_ref *cxl_rr; + bool nr_targets_inc = false; + struct cxl_decoder *cxld; + unsigned long index; + int rc = -EBUSY; + + lockdep_assert_held_write(&cxl_region_rwsem); + + cxl_rr = cxl_rr_load(port, cxlr); + if (cxl_rr) { + struct cxl_ep *ep_iter; + int found = 0; + + /* + * Walk the existing endpoints that have been attached to + * @cxlr at @port and see if they share the same 'next' port + * in the downstream direction. I.e. endpoints that share common + * upstream switch. + */ + xa_for_each(&cxl_rr->endpoints, index, ep_iter) { + if (ep_iter == ep) + continue; + if (ep_iter->next == ep->next) { + found++; + break; + } + } + + /* + * New target port, or @port is an endpoint port that always + * accounts its own local decode as a target. + */ + if (!found || !ep->next) { + cxl_rr->nr_targets++; + nr_targets_inc = true; + } + + /* + * The decoder for @cxlr was allocated when the region was first + * attached to @port. + */ + cxld = cxl_rr->decoder; + } else { + cxl_rr = alloc_region_ref(port, cxlr); + if (IS_ERR(cxl_rr)) { + dev_dbg(&cxlr->dev, + "%s: failed to allocate region reference\n", + dev_name(&port->dev)); + return PTR_ERR(cxl_rr); + } + nr_targets_inc = true; + + if (port == cxled_to_port(cxled)) + cxld = &cxled->cxld; + else + cxld = cxl_region_find_decoder(port, cxlr); + if (!cxld) { + dev_dbg(&cxlr->dev, "%s: no decoder available\n", + dev_name(&port->dev)); + goto out_erase; + } + + if (cxld->region) { + dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", + dev_name(&port->dev), dev_name(&cxld->dev), + dev_name(&cxld->region->dev)); + rc = -EBUSY; + goto out_erase; + } + + cxl_rr->decoder = cxld; + } + + rc = cxl_rr_ep_add(cxl_rr, cxled); + if (rc) { + dev_dbg(&cxlr->dev, + "%s: failed to track endpoint %s:%s reference\n", + dev_name(&port->dev), dev_name(&cxlmd->dev), + dev_name(&cxld->dev)); + goto out_erase; + } + + dev_dbg(&cxlr->dev, + "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxld->dev), dev_name(&cxlmd->dev), + dev_name(&cxled->cxld.dev), pos, + ep ? ep->next ? dev_name(ep->next->uport) : + dev_name(&cxlmd->dev) : + "none", + cxl_rr->nr_eps, cxl_rr->nr_targets); + + return 0; +out_erase: + if (nr_targets_inc) + cxl_rr->nr_targets--; + if (cxl_rr->nr_eps == 0) + free_region_ref(cxl_rr); + return rc; +} + +static void cxl_port_detach_region(struct cxl_port *port, + struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_region_ref *cxl_rr; + struct cxl_ep *ep = NULL; + + lockdep_assert_held_write(&cxl_region_rwsem); + + cxl_rr = cxl_rr_load(port, cxlr); + if (!cxl_rr) + return; + + /* + * Endpoint ports do not carry cxl_ep references, and they + * never target more than one endpoint by definition + */ + if (cxl_rr->decoder == &cxled->cxld) + cxl_rr->nr_eps--; + else + ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled); + if (ep) { + struct cxl_ep *ep_iter; + unsigned long index; + int found = 0; + + cxl_rr->nr_eps--; + xa_for_each(&cxl_rr->endpoints, index, ep_iter) { + if (ep_iter->next == ep->next) { + found++; + break; + } + } + if (!found) + cxl_rr->nr_targets--; + } + + if (cxl_rr->nr_eps == 0) + free_region_ref(cxl_rr); +} + +static int check_last_peer(struct cxl_endpoint_decoder *cxled, + struct cxl_ep *ep, struct cxl_region_ref *cxl_rr, + int distance) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_region *cxlr = cxl_rr->region; + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled_peer; + struct cxl_port *port = cxl_rr->port; + struct cxl_memdev *cxlmd_peer; + struct cxl_ep *ep_peer; + int pos = cxled->pos; + + /* + * If this position wants to share a dport with the last endpoint mapped + * then that endpoint, at index 'position - distance', must also be + * mapped by this dport. + */ + if (pos < distance) { + dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); + return -ENXIO; + } + cxled_peer = p->targets[pos - distance]; + cxlmd_peer = cxled_to_memdev(cxled_peer); + ep_peer = cxl_ep_load(port, cxlmd_peer); + if (ep->dport != ep_peer->dport) { + dev_dbg(&cxlr->dev, + "%s:%s: %s:%s pos %d mismatched peer %s:%s\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos, + dev_name(&cxlmd_peer->dev), + dev_name(&cxled_peer->cxld.dev)); + return -ENXIO; + } + + return 0; +} + +static int cxl_port_setup_targets(struct cxl_port *port, + struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos; + struct cxl_port *parent_port = to_cxl_port(port->dev.parent); + struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_ep *ep = cxl_ep_load(port, cxlmd); + struct cxl_region_params *p = &cxlr->params; + struct cxl_decoder *cxld = cxl_rr->decoder; + struct cxl_switch_decoder *cxlsd; + u16 eig, peig; + u8 eiw, peiw; + + /* + * While root level decoders support x3, x6, x12, switch level + * decoders only support powers of 2 up to x16. + */ + if (!is_power_of_2(cxl_rr->nr_targets)) { + dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n", + dev_name(port->uport), dev_name(&port->dev), + cxl_rr->nr_targets); + return -EINVAL; + } + + cxlsd = to_cxl_switch_decoder(&cxld->dev); + if (cxl_rr->nr_targets_set) { + int i, distance; + + distance = p->nr_targets / cxl_rr->nr_targets; + for (i = 0; i < cxl_rr->nr_targets_set; i++) + if (ep->dport == cxlsd->target[i]) { + rc = check_last_peer(cxled, ep, cxl_rr, + distance); + if (rc) + return rc; + goto out_target_set; + } + goto add_target; + } + + if (is_cxl_root(parent_port)) { + parent_ig = cxlrd->cxlsd.cxld.interleave_granularity; + parent_iw = cxlrd->cxlsd.cxld.interleave_ways; + /* + * For purposes of address bit routing, use power-of-2 math for + * switch ports. + */ + if (!is_power_of_2(parent_iw)) + parent_iw /= 3; + } else { + struct cxl_region_ref *parent_rr; + struct cxl_decoder *parent_cxld; + + parent_rr = cxl_rr_load(parent_port, cxlr); + parent_cxld = parent_rr->decoder; + parent_ig = parent_cxld->interleave_granularity; + parent_iw = parent_cxld->interleave_ways; + } + + rc = granularity_to_cxl(parent_ig, &peig); + if (rc) { + dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n", + dev_name(parent_port->uport), + dev_name(&parent_port->dev), parent_ig); + return rc; + } + + rc = ways_to_cxl(parent_iw, &peiw); + if (rc) { + dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n", + dev_name(parent_port->uport), + dev_name(&parent_port->dev), parent_iw); + return rc; + } + + iw = cxl_rr->nr_targets; + rc = ways_to_cxl(iw, &eiw); + if (rc) { + dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n", + dev_name(port->uport), dev_name(&port->dev), iw); + return rc; + } + + /* + * If @parent_port is masking address bits, pick the next unused address + * bit to route @port's targets. + */ + if (parent_iw > 1 && cxl_rr->nr_targets > 1) { + u32 address_bit = max(peig + peiw, eiw + peig); + + eig = address_bit - eiw + 1; + } else { + eiw = peiw; + eig = peig; + } + + rc = cxl_to_granularity(eig, &ig); + if (rc) { + dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n", + dev_name(port->uport), dev_name(&port->dev), + 256 << eig); + return rc; + } + + cxld->interleave_ways = iw; + cxld->interleave_granularity = ig; + cxld->hpa_range = (struct range) { + .start = p->res->start, + .end = p->res->end, + }; + dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport), + dev_name(&port->dev), iw, ig); +add_target: + if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s: targets full trying to add %s:%s at %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); + return -ENXIO; + } + cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + inc = 1; +out_target_set: + cxl_rr->nr_targets_set += inc; + dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n", + dev_name(port->uport), dev_name(&port->dev), + cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport), + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); + + return 0; +} + +static void cxl_port_reset_targets(struct cxl_port *port, + struct cxl_region *cxlr) +{ + struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); + struct cxl_decoder *cxld; + + /* + * After the last endpoint has been detached the entire cxl_rr may now + * be gone. + */ + if (!cxl_rr) + return; + cxl_rr->nr_targets_set = 0; + + cxld = cxl_rr->decoder; + cxld->hpa_range = (struct range) { + .start = 0, + .end = -1, + }; +} + +static void cxl_region_teardown_targets(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled; + struct cxl_memdev *cxlmd; + struct cxl_port *iter; + struct cxl_ep *ep; + int i; + + for (i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + cxlmd = cxled_to_memdev(cxled); + + iter = cxled_to_port(cxled); + while (!is_cxl_root(to_cxl_por |
