summaryrefslogtreecommitdiff
path: root/drivers/cxl/pci.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 13:55:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 13:55:31 -0800
commitc1f0fcd85d3d66f002fc1a4986363840fcca766d (patch)
tree414ad8ea3b38a33585de78686528d4c99e89598e /drivers/cxl/pci.c
parent691806e977a3a64895bd891878ed726cdbd282c0 (diff)
parentf04facfb993de47e2133b2b842d72b97b1c50162 (diff)
downloadlinux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.gz
linux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.bz2
linux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.zip
Merge tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams: "Compute Express Link (CXL) updates for 6.2. While it may seem backwards, the CXL update this time around includes some focus on CXL 1.x enabling where the work to date had been with CXL 2.0 (VH topologies) in mind. First generation CXL can mostly be supported via BIOS, similar to DDR, however it became clear there are use cases for OS native CXL error handling and some CXL 3.0 endpoint features can be deployed on CXL 1.x hosts (Restricted CXL Host (RCH) topologies). So, this update brings RCH topologies into the Linux CXL device model. In support of the ongoing CXL 2.0+ enabling two new core kernel facilities are added. One is the ability for the kernel to flag collisions between userspace access to PCI configuration registers and kernel accesses. This is brought on by the PCIe Data-Object-Exchange (DOE) facility, a hardware mailbox over config-cycles. The other is a cpu_cache_invalidate_memregion() API that maps to wbinvd_on_all_cpus() on x86. To prevent abuse it is disabled in guest VMs and architectures that do not support it yet. The CXL paths that need it, dynamic memory region creation and security commands (erase / unlock), are disabled when it is not present. As for the CXL 2.0+ this cycle the subsystem gains support Persistent Memory Security commands, error handling in response to PCIe AER notifications, and support for the "XOR" host bridge interleave algorithm. Summary: - Add the cpu_cache_invalidate_memregion() API for cache flushing in response to physical memory reconfiguration, or memory-side data invalidation from operations like secure erase or memory-device unlock. - Add a facility for the kernel to warn about collisions between kernel and userspace access to PCI configuration registers - Add support for Restricted CXL Host (RCH) topologies (formerly CXL 1.1) - Add handling and reporting of CXL errors reported via the PCIe AER mechanism - Add support for CXL Persistent Memory Security commands - Add support for the "XOR" algorithm for CXL host bridge interleave - Rework / simplify CXL to NVDIMM interactions - Miscellaneous cleanups and fixes" * tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (71 commits) cxl/region: Fix memdev reuse check cxl/pci: Remove endian confusion cxl/pci: Add some type-safety to the AER trace points cxl/security: Drop security command ioctl uapi cxl/mbox: Add variable output size validation for internal commands cxl/mbox: Enable cxl_mbox_send_cmd() users to validate output size cxl/security: Fix Get Security State output payload endian handling cxl: update names for interleave ways conversion macros cxl: update names for interleave granularity conversion macros cxl/acpi: Warn about an invalid CHBCR in an existing CHBS entry tools/testing/cxl: Require cache invalidation bypass cxl/acpi: Fail decoder add if CXIMS for HBIG is missing cxl/region: Fix spelling mistake "memergion" -> "memregion" cxl/regs: Fix sparse warning cxl/acpi: Set ACPI's CXL _OSC to indicate RCD mode support tools/testing/cxl: Add an RCH topology cxl/port: Add RCD endpoint port enumeration cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem tools/testing/cxl: Add XOR Math support to cxl_test cxl/acpi: Support CXL XOR Interleave Math (CXIMS) ...
Diffstat (limited to 'drivers/cxl/pci.c')
-rw-r--r--drivers/cxl/pci.c228
1 files changed, 186 insertions, 42 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index faeb5d9d7a7a..33083a522fd1 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -9,10 +9,13 @@
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
+#include <linux/aer.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/cxl.h>
/**
* DOC: cxl pci
@@ -276,35 +279,22 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
{
- void __iomem *addr;
- int bar = map->barno;
struct device *dev = &pdev->dev;
- resource_size_t offset = map->block_offset;
-
- /* Basic sanity check that BAR is big enough */
- if (pci_resource_len(pdev, bar) < offset) {
- dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
- &pdev->resource[bar], &offset);
- return -ENXIO;
- }
- addr = pci_iomap(pdev, bar, 0);
- if (!addr) {
+ map->base = ioremap(map->resource, map->max_size);
+ if (!map->base) {
dev_err(dev, "failed to map registers\n");
return -ENOMEM;
}
- dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
- bar, &offset);
-
- map->base = addr + map->block_offset;
+ dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource);
return 0;
}
static void cxl_unmap_regblock(struct pci_dev *pdev,
struct cxl_register_map *map)
{
- pci_iounmap(pdev, map->base - map->block_offset);
+ iounmap(map->base);
map->base = NULL;
}
@@ -324,6 +314,9 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return -ENXIO;
}
+ if (!comp_map->ras.valid)
+ dev_dbg(dev, "RAS registers not found\n");
+
dev_dbg(dev, "Set up component registers\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
@@ -347,27 +340,6 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return 0;
}
-static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
-{
- struct device *dev = cxlds->dev;
- struct pci_dev *pdev = to_pci_dev(dev);
-
- switch (map->reg_type) {
- case CXL_REGLOC_RBI_COMPONENT:
- cxl_map_component_regs(pdev, &cxlds->regs.component, map);
- dev_dbg(dev, "Mapping component registers...\n");
- break;
- case CXL_REGLOC_RBI_MEMDEV:
- cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
- dev_dbg(dev, "Probing device registers...\n");
- break;
- default:
- break;
- }
-
- return 0;
-}
-
static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map)
{
@@ -418,6 +390,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
continue;
}
+ if (!pci_request_config_region_exclusive(pdev, off,
+ PCI_DOE_CAP_SIZEOF,
+ dev_name(dev)))
+ pci_err(pdev, "Failed to exclude DOE registers\n");
+
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
@@ -428,6 +405,20 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
}
}
+/*
+ * Assume that any RCIEP that emits the CXL memory expander class code
+ * is an RCD
+ */
+static bool is_cxl_restricted(struct pci_dev *pdev)
+{
+ return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
+}
+
+static void disable_aer(void *pdev)
+{
+ pci_disable_pcie_error_reporting(pdev);
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
@@ -449,7 +440,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
return PTR_ERR(cxlds);
+ pci_set_drvdata(pdev, cxlds);
+ cxlds->rcd = is_cxl_restricted(pdev);
cxlds->serial = pci_get_dsn(pdev);
cxlds->cxl_dvsec = pci_find_dvsec_capability(
pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
@@ -461,7 +454,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- rc = cxl_map_regs(cxlds, &map);
+ rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map);
if (rc)
return rc;
@@ -474,10 +467,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
- cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
+ cxlds->component_reg_phys = map.resource;
devm_cxl_pci_create_doe(cxlds);
+ rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component,
+ &map, BIT(CXL_CM_CAP_CAP_ID_RAS));
+ if (rc)
+ dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
+
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
@@ -498,8 +496,13 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
- rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
+ if (cxlds->regs.ras) {
+ pci_enable_pcie_error_reporting(pdev);
+ rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
+ if (rc)
+ return rc;
+ }
+ pci_save_state(pdev);
return rc;
}
@@ -511,10 +514,151 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+ void __iomem *addr;
+ u32 *log_addr;
+ int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+ addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ log_addr = log;
+
+ for (i = 0; i < log_u32_size; i++) {
+ *log_addr = readl(addr);
+ log_addr++;
+ addr += sizeof(u32);
+ }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ u32 hl[CXL_HEADERLOG_SIZE_U32];
+ void __iomem *addr;
+ u32 status;
+ u32 fe;
+
+ if (!cxlds->regs.ras)
+ return false;
+
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+ return false;
+
+ /* If multiple errors, log header points to first error from ctrl reg */
+ if (hweight32(status) > 1) {
+ addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ } else {
+ fe = status;
+ }
+
+ header_log_copy(cxlds, hl);
+ trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+ return true;
+}
+
+static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ bool ue;
+
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_report_and_clear(cxlds);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ if (ue) {
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(&pdev->dev,
+ "%s: frozen state error detected, disable CXL.mem\n",
+ dev_name(dev));
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(&pdev->dev,
+ "failure state error detected, request disconnect\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+
+ dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
+ dev_name(dev));
+ pci_restore_state(pdev);
+ if (device_attach(dev) <= 0)
+ return PCI_ERS_RESULT_DISCONNECT;
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void cxl_error_resume(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+
+ dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
+ dev->driver ? "successful" : "failed");
+}
+
+static void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ void __iomem *addr;
+ u32 status;
+
+ if (!cxlds->regs.ras)
+ return;
+
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+ trace_cxl_aer_correctable_error(dev, status);
+ }
+}
+
+static const struct pci_error_handlers cxl_error_handlers = {
+ .error_detected = cxl_error_detected,
+ .slot_reset = cxl_slot_reset,
+ .resume = cxl_error_resume,
+ .cor_error_detected = cxl_cor_error_detected,
+};
+
static struct pci_driver cxl_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = cxl_mem_pci_tbl,
.probe = cxl_pci_probe,
+ .err_handler = &cxl_error_handlers,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},