diff options
39 files changed, 5498 insertions, 576 deletions
diff --git a/.clang-format b/.clang-format index 9b87ea1fc16e..1247d54f9e49 100644 --- a/.clang-format +++ b/.clang-format @@ -516,6 +516,7 @@ ForEachMacros: - 'of_property_for_each_string' - 'of_property_for_each_u32' - 'pci_bus_for_each_resource' + - 'pci_doe_for_each_off' - 'pcl_for_each_chunk' - 'pcl_for_each_segment' - 'pcm_for_each_format' diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 7c2b846521f3..8494ef27e8d2 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -7,6 +7,7 @@ Description: all descendant memdevs for unbind. Writing '1' to this attribute flushes that work. + What: /sys/bus/cxl/devices/memX/firmware_version Date: December, 2020 KernelVersion: v5.12 @@ -16,6 +17,7 @@ Description: Memory Device Output Payload in the CXL-2.0 specification. + What: /sys/bus/cxl/devices/memX/ram/size Date: December, 2020 KernelVersion: v5.12 @@ -25,6 +27,7 @@ Description: identically named field in the Identify Memory Device Output Payload in the CXL-2.0 specification. + What: /sys/bus/cxl/devices/memX/pmem/size Date: December, 2020 KernelVersion: v5.12 @@ -34,6 +37,7 @@ Description: identically named field in the Identify Memory Device Output Payload in the CXL-2.0 specification. + What: /sys/bus/cxl/devices/memX/serial Date: January, 2022 KernelVersion: v5.18 @@ -43,6 +47,7 @@ Description: capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2 Memory Device PCIe Capabilities and Extended Capabilities. + What: /sys/bus/cxl/devices/memX/numa_node Date: January, 2022 KernelVersion: v5.18 @@ -52,114 +57,334 @@ Description: host PCI device for this memory device, emit the CPU node affinity for this device. + What: /sys/bus/cxl/devices/*/devtype Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - CXL device objects export the devtype attribute which mirrors - the same value communicated in the DEVTYPE environment variable - for uevents for devices on the "cxl" bus. + (RO) CXL device objects export the devtype attribute which + mirrors the same value communicated in the DEVTYPE environment + variable for uevents for devices on the "cxl" bus. + What: /sys/bus/cxl/devices/*/modalias Date: December, 2021 KernelVersion: v5.18 Contact: linux-cxl@vger.kernel.org Description: - CXL device objects export the modalias attribute which mirrors - the same value communicated in the MODALIAS environment variable - for uevents for devices on the "cxl" bus. + (RO) CXL device objects export the modalias attribute which + mirrors the same value communicated in the MODALIAS environment + variable for uevents for devices on the "cxl" bus. + What: /sys/bus/cxl/devices/portX/uport Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - CXL port objects are enumerated from either a platform firmware - device (ACPI0017 and ACPI0016) or PCIe switch upstream port with - CXL component registers. The 'uport' symlink connects the CXL - portX object to the device that published the CXL port + (RO) CXL port objects are enumerated from either a platform + firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream + port with CXL component registers. The 'uport' symlink connects + the CXL portX object to the device that published the CXL port capability. + What: /sys/bus/cxl/devices/portX/dportY Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - CXL port objects are enumerated from either a platform firmware - device (ACPI0017 and ACPI0016) or PCIe switch upstream port with - CXL component registers. The 'dportY' symlink identifies one or - more downstream ports that the upstream port may target in its - decode of CXL memory resources. The 'Y' integer reflects the - hardware port unique-id used in the hardware decoder target - list. + (RO) CXL port objects are enumerated from either a platform + firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream + port with CXL component registers. The 'dportY' symlink + identifies one or more downstream ports that the upstream port + may target in its decode of CXL memory resources. The 'Y' + integer reflects the hardware port unique-id used in the + hardware decoder target list. + What: /sys/bus/cxl/devices/decoderX.Y Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - CXL decoder objects are enumerated from either a platform + (RO) CXL decoder objects are enumerated from either a platform firmware description, or a CXL HDM decoder register set in a PCIe device (see CXL 2.0 section 8.2.5.12 CXL HDM Decoder Capability Structure). The 'X' in decoderX.Y represents the cxl_port container of this decoder, and 'Y' represents the instance id of a given decoder resource. + What: /sys/bus/cxl/devices/decoderX.Y/{start,size} Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - The 'start' and 'size' attributes together convey the physical - address base and number of bytes mapped in the decoder's decode - window. For decoders of devtype "cxl_decoder_root" the address - range is fixed. For decoders of devtype "cxl_decoder_switch" the - address is bounded by the decode range of the cxl_port ancestor - of the decoder's cxl_port, and dynamically updates based on the - active memory regions in that address space. + (RO) The 'start' and 'size' attributes together convey the + physical address base and number of bytes mapped in the + decoder's decode window. For decoders of devtype + "cxl_decoder_root" the address range is fixed. For decoders of + devtype "cxl_decoder_switch" the address is bounded by the + decode range of the cxl_port ancestor of the decoder's cxl_port, + and dynamically updates based on the active memory regions in + that address space. + What: /sys/bus/cxl/devices/decoderX.Y/locked Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - CXL HDM decoders have the capability to lock the configuration - until the next device reset. For decoders of devtype - "cxl_decoder_root" there is no standard facility to unlock them. - For decoders of devtype "cxl_decoder_switch" a secondary bus - reset, of the PCIe bridge that provides the bus for this - decoders uport, unlocks / resets the decoder. + (RO) CXL HDM decoders have the capability to lock the + configuration until the next device reset. For decoders of + devtype "cxl_decoder_root" there is no standard facility to + unlock them. For decoders of devtype "cxl_decoder_switch" a + secondary bus reset, of the PCIe bridge that provides the bus + for this decoders uport, unlocks / resets the decoder. + What: /sys/bus/cxl/devices/decoderX.Y/target_list Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - Display a comma separated list of the current decoder target - configuration. The list is ordered by the current configured - interleave order of the decoder's dport instances. Each entry in - the list is a dport id. + (RO) Display a comma separated list of the current decoder + target configuration. The list is ordered by the current + configured interleave order of the decoder's dport instances. + Each entry in the list is a dport id. + What: /sys/bus/cxl/devices/decoderX.Y/cap_{pmem,ram,type2,type3} Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - When a CXL decoder is of devtype "cxl_decoder_root", it + (RO) When a CXL decoder is of devtype "cxl_decoder_root", it represents a fixed memory window identified by platform firmware. A fixed window may only support a subset of memory types. The 'cap_*' attributes indicate whether persistent memory, volatile memory, accelerator memory, and / or expander memory may be mapped behind this decoder's memory window. + What: /sys/bus/cxl/devices/decoderX.Y/target_type Date: June, 2021 KernelVersion: v5.14 Contact: linux-cxl@vger.kernel.org Description: - When a CXL decoder is of devtype "cxl_decoder_switch", it can - optionally decode either accelerator memory (type-2) or expander - memory (type-3). The 'target_type' attribute indicates the - current setting which may dynamically change based on what + (RO) When a CXL decoder is of devtype "cxl_decoder_switch", it + can optionally decode either accelerator memory (type-2) or + expander memory (type-3). The 'target_type' attribute indicates + the current setting which may dynamically change based on what memory regions are activated in this decode hierarchy. + + +What: /sys/bus/cxl/devices/endpointX/CDAT +Date: July, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) If this sysfs entry is not present no DOE mailbox was + found to support CDAT data. If it is present and the length of + the data is 0 reading the CDAT data failed. Otherwise the CDAT + data is reported. + + +What: /sys/bus/cxl/devices/decoderX.Y/mode +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it + translates from a host physical address range, to a device local + address range. Device-local address ranges are further split + into a 'ram' (volatile memory) range and 'pmem' (persistent + memory) range. The 'mode' attribute emits one of 'ram', 'pmem', + 'mixed', or 'none'. The 'mixed' indication is for error cases + when a decoder straddles the volatile/persistent partition + boundary, and 'none' indicates the decoder is not actively + decoding, or no DPA allocation policy has been set. + + 'mode' can be written, when the decoder is in the 'disabled' + state, with either 'ram' or 'pmem' to set the boundaries for the + next allocation. + + +What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) When a CXL decoder is of devtype "cxl_decoder_endpoint", + and its 'dpa_size' attribute is non-zero, this attribute + indicates the device physical address (DPA) base address of the + allocation. + + +What: /sys/bus/cxl/devices/decoderX.Y/dpa_size +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it + translates from a host physical address range, to a device local + address range. The range, base address plus length in bytes, of + DPA allocated to this decoder is conveyed in these 2 attributes. + Allocations can be mutated as long as the decoder is in the + disabled state. A write to 'dpa_size' releases the previous DPA + allocation and then attempts to allocate from the free capacity + in the device partition referred to by 'decoderX.Y/mode'. + Allocate and free requests can only be performed on the highest + instance number disabled decoder with non-zero size. I.e. + allocations are enforced to occur in increasing 'decoderX.Y/id' + order and frees are enforced to occur in decreasing + 'decoderX.Y/id' order. + + +What: /sys/bus/cxl/devices/decoderX.Y/interleave_ways +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The number of targets across which this decoder's host + physical address (HPA) memory range is interleaved. The device + maps every Nth block of HPA (of size == + 'interleave_granularity') to consecutive DPA addresses. The + decoder's position in the interleave is determined by the + device's (endpoint or switch) switch ancestry. For root + decoders their interleave is specified by platform firmware and + they only specify a downstream target order for host bridges. + + +What: /sys/bus/cxl/devices/decoderX.Y/interleave_granularity +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The number of consecutive bytes of host physical address + space this decoder claims at address N before the decode rotates + to the next target in the interleave at address N + + interleave_granularity (assuming N is aligned to + interleave_granularity). + + +What: /sys/bus/cxl/devices/decoderX.Y/create_pmem_region +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Write a string in the form 'regionZ' to start the process + of defining a new persistent memory region (interleave-set) + within the decode range bounded by root decoder 'decoderX.Y'. + The value written must match the current value returned from + reading this attribute. An atomic compare exchange operation is + done on write to assign the requested id to a region and + allocate the region-id for the next creation attempt. EBUSY is + returned if the region name written does not match the current + cached value. + + +What: /sys/bus/cxl/devices/decoderX.Y/delete_region +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) Write a string in the form 'regionZ' to delete that region, + provided it is currently idle / not bound to a driver. + + +What: /sys/bus/cxl/devices/regionZ/uuid +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Write a unique identifier for the region. This field must + be set for persistent regions and it must not conflict with the + UUID of another region. + + +What: /sys/bus/cxl/devices/regionZ/interleave_granularity +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Set the number of consecutive bytes each device in the + interleave set will claim. The possible interleave granularity + values are determined by the CXL spec and the participating + devices. + + +What: /sys/bus/cxl/devices/regionZ/interleave_ways +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Configures the number of devices participating in the + region is set by writing this value. Each device will provide + 1/interleave_ways of storage for the region. + + +What: /sys/bus/cxl/devices/regionZ/size +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) System physical address space to be consumed by the region. + When written trigger the driver to allocate space out of the + parent root decoder's address space. When read the size of the + address space is reported and should match the span of the + region's resource attribute. Size shall be set after the + interleave configuration parameters. Once set it cannot be + changed, only freed by writing 0. The kernel makes no guarantees + that data is maintained over an address space freeing event, and + there is no guarantee that a free followed by an allocate + results in the same address being allocated. + + +What: /sys/bus/cxl/devices/regionZ/resource +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) A region is a contiguous partition of a CXL root decoder + address space. Region capacity is allocated by writing to the + size attribute, the resulting physical address space determined + by the driver is reflected here. It is therefore not useful to + read this before writing a value to the size attribute. + + +What: /sys/bus/cxl/devices/regionZ/target[0..N] +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Write an endpoint decoder object name to 'targetX' where X + is the intended position of the endpoint device in the region + interleave and N is the 'interleave_ways' setting for the + region. ENXIO is returned if the write results in an impossible + to map decode scenario, like the endpoint is unreachable at that + position relative to the root decoder interleave. EBUSY is + returned if the position in the region is already occupied, or + if the region is not in a state to accept interleave + configuration changes. EINVAL is returned if the object name is + not an endpoint decoder. Once all positions have been + successfully written a final validation for decode conflicts is + performed before activating the region. + + +What: /sys/bus/cxl/devices/regionZ/commit +Date: May, 2022 +KernelVersion: v5.20 +Contact: linux-cxl@vger.kernel.org +Description: + (RW) Write a boolean 'true' string value to this attribute to + trigger the region to transition from the software programmed + state to the actively decoding in hardware state. The commit + operation in addition to validating that the region is in proper + configured state, validates that the decoders are being + committed in spec mandated order (last committed decoder id + + 1), and checks that the hardware accepts the commit request. + Reading this value indicates whether the region is committed or + not. diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index db476bb170b6..5149ecdc53c7 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -362,6 +362,14 @@ CXL Core .. kernel-doc:: drivers/cxl/core/mbox.c :doc: cxl mbox +CXL Regions +----------- +.. kernel-doc:: drivers/cxl/core/region.c + :doc: cxl core region + +.. kernel-doc:: drivers/cxl/core/region.c + :identifiers: + External Interfaces =================== diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7b0d286bf9ba..01772e79fd93 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -55,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start) { return hot_add_scn_to_nid(start); } +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif int __weak create_section_mapping(unsigned long start, unsigned long end, diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index f64e3984689f..768ced3d6fe8 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -2,6 +2,7 @@ menuconfig CXL_BUS tristate "CXL (Compute Express Link) Devices Support" depends on PCI + select PCI_DOE help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -102,4 +103,12 @@ config CXL_SUSPEND def_bool y depends on SUSPEND && CXL_MEM +config CXL_REGION + bool + default CXL_BUS + # For MAX_PHYSMEM_BITS + depends on SPARSEMEM + select MEMREGION + select GET_FREE_REGION + endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40286f5df812..fb649683dd3a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -9,10 +9,6 @@ #include "cxlpci.h" #include "cxl.h" -/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ -#define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways) -#define CFMWS_INTERLEAVE_GRANULARITY(x) ((x)->granularity + 8) - static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; @@ -34,7 +30,8 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) static int cxl_acpi_cfmws_verify(struct device *dev, struct acpi_cedt_cfmws *cfmws) { - int expected_len; + int rc, expected_len; + unsigned int ways; if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); @@ -51,14 +48,14 @@ static int cxl_acpi_cfmws_verify(struct device *dev, return -EINVAL; } - if (CFMWS_INTERLEAVE_WAYS(cfmws) > CXL_DECODER_MAX_INTERLEAVE) { - dev_err(dev, "CFMWS Interleave Ways (%d) too large\n", - CFMWS_INTERLEAVE_WAYS(cfmws)); + rc = cxl_to_ways(cfmws->interleave_ways, &ways); + if (rc) { + dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n", + cfmws->interleave_ways); return -EINVAL; } - expected_len = struct_size((cfmws), interleave_targets, - CFMWS_INTERLEAVE_WAYS(cfmws)); + expected_len = struct_size(cfmws, interleave_targets, ways); if (cfmws->header.length < expected_len) { dev_err(dev, "CFMWS length %d less than expected %d\n", @@ -76,6 +73,8 @@ static int cxl_acpi_cfmws_verify(struct device *dev, struct cxl_cfmws_context { struct device *dev; struct cxl_port *root_port; + struct resource *cxl_res; + int id; }; static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, @@ -84,10 +83,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; + struct resource *cxl_res = ctx->cxl_res; + struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; struct acpi_cedt_cfmws *cfmws; struct cxl_decoder *cxld; - int rc, i; + unsigned int ways, i, ig; + struct resource *res; + int rc; cfmws = (struct acpi_cedt_cfmws *) header; @@ -99,19 +102,51 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return 0; } - for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++) + rc = cxl_to_ways(cfmws->interleave_ways, &ways); + if (rc) + return rc; + rc = cxl_to_granularity(cfmws->granularity, &ig); + if (rc) + return rc; + for (i = 0; i < ways; i++) target_map[i] = cfmws->interleave_targets[i]; - cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); - if (IS_ERR(cxld)) + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++); + if (!res->name) + goto err_name; + + res->start = cfmws->base_hpa; + res->end = cfmws->base_hpa + cfmws->window_size - 1; + res->flags = IORESOURCE_MEM; + + /* add to the local resource tracking to establish a sort order */ + rc = insert_resource(cxl_res, res); + if (rc) + goto err_insert; + + cxlrd = cxl_root_decoder_alloc(root_port, ways); + if (IS_ERR(cxlrd)) return 0; + cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); cxld->target_type = CXL_DECODER_EXPANDER; - cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa, - cfmws->window_size); - cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws); - cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws); + cxld->hpa_range = (struct range) { + .start = res->start, + .end = res->end, + }; + cxld->interleave_ways = ways; + /* + * Minimize the x1 granularity to advertise support for any + * valid region granularity + */ + if (ways == 1) + ig = CXL_DECODER_MIN_GRANULARITY; + cxld->interleave_granularity = ig; rc = cxl_decoder_add(cxld, target_map); if (rc) @@ -119,15 +154,22 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, else rc = cxl_decoder_autoremove(dev, cxld); if (rc) { - dev_err(dev, "Failed to add decoder for %pr\n", - &cxld->platform_res); + dev_err(dev, "Failed to add decode range [%#llx - %#llx]\n", + cxld->hpa_range.start, cxld->hpa_range.end); return 0; } - dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev), - phys_to_target_node(cxld->platform_res.start), - &cxld->platform_res); + dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", + dev_name(&cxld->dev), + phys_to_target_node(cxld->hpa_range.start), + cxld->hpa_range.start, cxld->hpa_range.end); return 0; + +err_insert: + kfree(res->name); +err_name: + kfree(res); + return -ENOMEM; } __mock struct acpi_device *to_cxl_host_bridge(struct device *host, @@ -175,8 +217,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) if (rc) return rc; - port = devm_cxl_add_port(host, match, dport->component_reg_phys, - root_port); + port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport); if (IS_ERR(port)) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); @@ -282,9 +323,127 @@ static void cxl_acpi_lock_reset_class(void *dev) device_lock_reset_class(dev); } +static void del_cxl_resource(struct resource *res) +{ + kfree(res->name); + kfree(res); +} + +static void cxl_set_public_resource(struct resource *priv, struct resource *pub) +{ + priv->desc = (unsigned long) pub; +} + +static struct resource *cxl_get_public_resource(struct resource *priv) +{ + return (struct resource *) priv->desc; +} + +static void remove_cxl_resources(void *data) +{ + struct resource *res, *next, *cxl = data; + + for (res = cxl->child; res; res = next) { + struct resource *victim = cxl_get_public_resource(res); + + next = res->sibling; + remove_resource(res); + + if (victim) { + remove_resource(victim); + kfree(victim); + } + + del_cxl_resource(res); + } +} + +/** + * add_cxl_resources() - reflect CXL fixed memory windows in iomem_resource + * @cxl_res: A standalone resource tree where each CXL window is a sibling + * + * Walk each CXL window in @cxl_res and add it to iomem_resource potentially + * expanding its boundaries to ensure that any conflicting resources become + * children. If a window is expanded it may then conflict with a another window + * entry and require the window to be truncated or trimmed. Consider this + * situation: + * + * |-- "CXL Window 0" --||----- "CXL Window 1" -----| + * |--------------- "System RAM" -------------| + * + * ...where platform firmware has established as System RAM resource across 2 + * windows, but has left some portion of window 1 for dynamic CXL region + * provisioning. In this case "Window 0" will span the entirety of the "System + * RAM" span, and "CXL Window 1" is truncated to the remaining tail past the end + * of that "System RAM" resource. + */ +static int add_cxl_resources(struct resource *cxl_res) +{ + struct resource *res, *new, *next; + + for (res = cxl_res->child; res; res = next) { + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->name = res->name; + new->start = res->start; + new->end = res->end; + new->flags = IORESOURCE_MEM; + new->desc = IORES_DESC_CXL; + + /* + * Record the public resource in the private cxl_res tree for + * later removal. + */ + cxl_set_public_resource(res, new); + + insert_resource_expand_to_fit(&iomem_resource, new); + + next = res->sibling; + while (next && resource_overlaps(new, next)) { + if (resource_contains(new, next)) { + struct resource *_next = next->sibling; + + remove_resource(next); + del_cxl_resource(next); + next = _next; + } else |
