diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 13:55:31 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 13:55:31 -0800 |
| commit | c1f0fcd85d3d66f002fc1a4986363840fcca766d (patch) | |
| tree | 414ad8ea3b38a33585de78686528d4c99e89598e /drivers/cxl/pci.c | |
| parent | 691806e977a3a64895bd891878ed726cdbd282c0 (diff) | |
| parent | f04facfb993de47e2133b2b842d72b97b1c50162 (diff) | |
| download | linux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.gz linux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.bz2 linux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.zip | |
Merge tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams:
"Compute Express Link (CXL) updates for 6.2.
While it may seem backwards, the CXL update this time around includes
some focus on CXL 1.x enabling where the work to date had been with
CXL 2.0 (VH topologies) in mind.
First generation CXL can mostly be supported via BIOS, similar to DDR,
however it became clear there are use cases for OS native CXL error
handling and some CXL 3.0 endpoint features can be deployed on CXL 1.x
hosts (Restricted CXL Host (RCH) topologies). So, this update brings
RCH topologies into the Linux CXL device model.
In support of the ongoing CXL 2.0+ enabling two new core kernel
facilities are added.
One is the ability for the kernel to flag collisions between userspace
access to PCI configuration registers and kernel accesses. This is
brought on by the PCIe Data-Object-Exchange (DOE) facility, a hardware
mailbox over config-cycles.
The other is a cpu_cache_invalidate_memregion() API that maps to
wbinvd_on_all_cpus() on x86. To prevent abuse it is disabled in guest
VMs and architectures that do not support it yet. The CXL paths that
need it, dynamic memory region creation and security commands (erase /
unlock), are disabled when it is not present.
As for the CXL 2.0+ this cycle the subsystem gains support Persistent
Memory Security commands, error handling in response to PCIe AER
notifications, and support for the "XOR" host bridge interleave
algorithm.
Summary:
- Add the cpu_cache_invalidate_memregion() API for cache flushing in
response to physical memory reconfiguration, or memory-side data
invalidation from operations like secure erase or memory-device
unlock.
- Add a facility for the kernel to warn about collisions between
kernel and userspace access to PCI configuration registers
- Add support for Restricted CXL Host (RCH) topologies (formerly CXL
1.1)
- Add handling and reporting of CXL errors reported via the PCIe AER
mechanism
- Add support for CXL Persistent Memory Security commands
- Add support for the "XOR" algorithm for CXL host bridge interleave
- Rework / simplify CXL to NVDIMM interactions
- Miscellaneous cleanups and fixes"
* tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (71 commits)
cxl/region: Fix memdev reuse check
cxl/pci: Remove endian confusion
cxl/pci: Add some type-safety to the AER trace points
cxl/security: Drop security command ioctl uapi
cxl/mbox: Add variable output size validation for internal commands
cxl/mbox: Enable cxl_mbox_send_cmd() users to validate output size
cxl/security: Fix Get Security State output payload endian handling
cxl: update names for interleave ways conversion macros
cxl: update names for interleave granularity conversion macros
cxl/acpi: Warn about an invalid CHBCR in an existing CHBS entry
tools/testing/cxl: Require cache invalidation bypass
cxl/acpi: Fail decoder add if CXIMS for HBIG is missing
cxl/region: Fix spelling mistake "memergion" -> "memregion"
cxl/regs: Fix sparse warning
cxl/acpi: Set ACPI's CXL _OSC to indicate RCD mode support
tools/testing/cxl: Add an RCH topology
cxl/port: Add RCD endpoint port enumeration
cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem
tools/testing/cxl: Add XOR Math support to cxl_test
cxl/acpi: Support CXL XOR Interleave Math (CXIMS)
...
Diffstat (limited to 'drivers/cxl/pci.c')
| -rw-r--r-- | drivers/cxl/pci.c | 228 |
1 files changed, 186 insertions, 42 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index faeb5d9d7a7a..33083a522fd1 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -9,10 +9,13 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/pci-doe.h> +#include <linux/aer.h> #include <linux/io.h> #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/cxl.h> /** * DOC: cxl pci @@ -276,35 +279,22 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) { - void __iomem *addr; - int bar = map->barno; struct device *dev = &pdev->dev; - resource_size_t offset = map->block_offset; - - /* Basic sanity check that BAR is big enough */ - if (pci_resource_len(pdev, bar) < offset) { - dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar, - &pdev->resource[bar], &offset); - return -ENXIO; - } - addr = pci_iomap(pdev, bar, 0); - if (!addr) { + map->base = ioremap(map->resource, map->max_size); + if (!map->base) { dev_err(dev, "failed to map registers\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n", - bar, &offset); - - map->base = addr + map->block_offset; + dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); return 0; } static void cxl_unmap_regblock(struct pci_dev *pdev, struct cxl_register_map *map) { - pci_iounmap(pdev, map->base - map->block_offset); + iounmap(map->base); map->base = NULL; } @@ -324,6 +314,9 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) return -ENXIO; } + if (!comp_map->ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + dev_dbg(dev, "Set up component registers\n"); break; case CXL_REGLOC_RBI_MEMDEV: @@ -347,27 +340,6 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) return 0; } -static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map) -{ - struct device *dev = cxlds->dev; - struct pci_dev *pdev = to_pci_dev(dev); - - switch (map->reg_type) { - case CXL_REGLOC_RBI_COMPONENT: - cxl_map_component_regs(pdev, &cxlds->regs.component, map); - dev_dbg(dev, "Mapping component registers...\n"); - break; - case CXL_REGLOC_RBI_MEMDEV: - cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map); - dev_dbg(dev, "Probing device registers...\n"); - break; - default: - break; - } - - return 0; -} - static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { @@ -418,6 +390,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) continue; } + if (!pci_request_config_region_exclusive(pdev, off, + PCI_DOE_CAP_SIZEOF, + dev_name(dev))) + pci_err(pdev, "Failed to exclude DOE registers\n"); + if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) { dev_err(dev, "xa_insert failed to insert MB @ %x\n", off); @@ -428,6 +405,20 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) } } +/* + * Assume that any RCIEP that emits the CXL memory expander class code + * is an RCD + */ +static bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + +static void disable_aer(void *pdev) +{ + pci_disable_pcie_error_reporting(pdev); +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -449,7 +440,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds = cxl_dev_state_create(&pdev->dev); if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + pci_set_drvdata(pdev, cxlds); + cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); @@ -461,7 +454,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_map_regs(cxlds, &map); + rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map); if (rc) return rc; @@ -474,10 +467,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_warn(&pdev->dev, "No component registers (%d)\n", rc); - cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + cxlds->component_reg_phys = map.resource; devm_cxl_pci_create_doe(cxlds); + rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component, + &map, BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) + dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; @@ -498,8 +496,13 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) - rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); + if (cxlds->regs.ras) { + pci_enable_pcie_error_reporting(pdev); + rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); + if (rc) + return rc; + } + pci_save_state(pdev); return rc; } @@ -511,10 +514,151 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!cxlds->regs.ras) + return false; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); + } else { + fe = status; + } + + header_log_copy(cxlds, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_report_and_clear(cxlds); + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", + dev_name(dev)); + pci_restore_state(pdev); + if (device_attach(dev) <= 0) + return PCI_ERS_RESULT_DISCONNECT; + return PCI_ERS_RESULT_RECOVERED; +} + +static void cxl_error_resume(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), + dev->driver ? "successful" : "failed"); +} + +static void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + void __iomem *addr; + u32 status; + + if (!cxlds->regs.ras) + return; + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(dev, status); + } +} + +static const struct pci_error_handlers cxl_error_handlers = { + .error_detected = cxl_error_detected, + .slot_reset = cxl_slot_reset, + .resume = cxl_error_resume, + .cor_error_detected = cxl_cor_error_detected, +}; + static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, + .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, |
