summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 16:33:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 16:33:05 -0800
commite812928be2ee1c2744adf20ed04e0ce1e2fc5c13 (patch)
treed2685be8adaca1d097adf407b333d913d74c2582 /drivers/pci
parentcebcffe666cc82e68842e27852a019ca54072cb7 (diff)
parent63fbf275fa9f18f7020fb8acf54fa107e51d0f23 (diff)
downloadlinux-e812928be2ee1c2744adf20ed04e0ce1e2fc5c13.tar.gz
linux-e812928be2ee1c2744adf20ed04e0ce1e2fc5c13.tar.bz2
linux-e812928be2ee1c2744adf20ed04e0ce1e2fc5c13.zip
Merge tag 'cxl-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull CXL updates from Dave Jiang: - Introduce cxl_memdev_attach and pave way for soft reserved handling, type2 accelerator enabling, and LSA 2.0 enabling. All these series require the endpoint driver to settle before continuing the memdev driver probe. - Address CXL port error protocol handling and reporting. The large patch series was split into three parts. The first two parts are included here with the final part coming later. The first part consists of a series of code refactoring to PCI AER sub-system that addresses CXL and also CXL RAS code to prepare for port error handling. The second part refactors the CXL code to move management of component registers to cxl_port objects to allow all CXL AER errors to be handled through the cxl_port hierarchy. - Provide AMD Zen5 platform address translation for CXL using ACPI PRMT. This includes a conventions document to explain why this is needed and how it's implemented. - Misc CXL patches of fixes, cleanups, and updates. Including CXL address translation for unaligned MOD3 regions. [ TLA service: CXL is "Compute Express Link" ] * tag 'cxl-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (59 commits) cxl: Disable HPA/SPA translation handlers for Normalized Addressing cxl/region: Factor out code into cxl_region_setup_poison() cxl/atl: Lock decoders that need address translation cxl: Enable AMD Zen5 address translation using ACPI PRMT cxl/acpi: Prepare use of EFI runtime services cxl: Introduce callback for HPA address ranges translation cxl/region: Use region data to get the root decoder cxl/region: Add @hpa_range argument to function cxl_calc_interleave_pos() cxl/region: Separate region parameter setup and region construction cxl: Simplify cxl_root_ops allocation and handling cxl/region: Store HPA range in struct cxl_region cxl/region: Store root decoder in struct cxl_region cxl/region: Rename misleading variable name @hpa to @hpa_range Documentation/driver-api/cxl: ACPI PRM Address Translation Support and AMD Zen5 enablement cxl, doc: Moving conventions in separate files cxl, doc: Remove isonum.txt inclusion cxl/port: Unify endpoint and switch port lookup cxl/port: Move endpoint component register management to cxl_port cxl/port: Map Port RAS registers cxl/port: Move dport RAS setup to dport add time ...
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/pci.h39
-rw-r--r--drivers/pci/pcie/Kconfig9
-rw-r--r--drivers/pci/pcie/Makefile1
-rw-r--r--drivers/pci/pcie/aer.c135
-rw-r--r--drivers/pci/pcie/aer_cxl_rch.c104
-rw-r--r--drivers/pci/pcie/portdrv.h12
-rw-r--r--drivers/pci/probe.c31
7 files changed, 201 insertions, 130 deletions
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c8a0522e2e1f..13d998fbacce 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -816,31 +816,56 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev)
#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
+/**
+ * struct aer_err_info - AER Error Information
+ * @dev: Devices reporting error
+ * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log
+ * @__pad1: Padding for alignment
+ * @error_dev_num: Number of devices reporting an error
+ * @level: printk level to use in logging
+ * @id: Value from register PCI_ERR_ROOT_ERR_SRC
+ * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR
+ * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log
+ * @multi_error_valid: If multiple errors are reported
+ * @first_error: First reported error
+ * @__pad2: Padding for alignment
+ * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error
+ * @tlp_header_valid: Indicates if TLP field contains error information
+ * @status: COR/UNCOR error status
+ * @mask: COR/UNCOR mask
+ * @tlp: Transaction packet information
+ */
struct aer_err_info {
struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES];
int error_dev_num;
- const char *level; /* printk level */
+ const char *level;
unsigned int id:16;
- unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */
- unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */
+ unsigned int severity:2;
+ unsigned int root_ratelimit_print:1;
unsigned int __pad1:4;
unsigned int multi_error_valid:1;
unsigned int first_error:5;
- unsigned int __pad2:2;
+ unsigned int __pad2:1;
+ unsigned int is_cxl:1;
unsigned int tlp_header_valid:1;
- unsigned int status; /* COR/UNCOR Error Status */
- unsigned int mask; /* COR/UNCOR Error Mask */
- struct pcie_tlp_log tlp; /* TLP Header */
+ unsigned int status;
+ unsigned int mask;
+ struct pcie_tlp_log tlp;
};
int aer_get_device_error_info(struct aer_err_info *info, int i);
void aer_print_error(struct aer_err_info *info, int i);
+static inline const char *aer_err_bus(struct aer_err_info *info)
+{
+ return info->is_cxl ? "CXL" : "PCIe";
+}
+
int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
unsigned int tlp_len, bool flit,
struct pcie_tlp_log *log);
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 17919b99fa66..207c2deae35f 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -49,15 +49,6 @@ config PCIEAER_INJECT
gotten from:
https://github.com/intel/aer-inject.git
-config PCIEAER_CXL
- bool "PCI Express CXL RAS support"
- default y
- depends on PCIEAER && CXL_PCI
- help
- Enables CXL error handling.
-
- If unsure, say Y.
-
#
# PCI Express ECRC
#
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 173829aa02e6..b0b43a18c304 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o
obj-y += aspm.o
obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o
+obj-$(CONFIG_CXL_RAS) += aer_cxl_rch.o
obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
obj-$(CONFIG_PCIE_PME) += pme.o
obj-$(CONFIG_PCIE_DPC) += dpc.o
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 2b0ac6bfdd76..8dfbb0fe6cf6 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -867,6 +867,7 @@ void aer_print_error(struct aer_err_info *info, int i)
struct pci_dev *dev;
int layer, agent, id;
const char *level = info->level;
+ const char *bus_type = aer_err_bus(info);
if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES))
return;
@@ -876,22 +877,22 @@ void aer_print_error(struct aer_err_info *info, int i)
pci_dev_aer_stats_incr(dev, info);
trace_aer_event(pci_name(dev), (info->status & ~info->mask),
- info->severity, info->tlp_header_valid, &info->tlp);
+ info->severity, info->tlp_header_valid, &info->tlp, bus_type);
if (!info->ratelimit_print[i])
return;
if (!info->status) {
- pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
- aer_error_severity_string[info->severity]);
+ pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
+ bus_type, aer_error_severity_string[info->severity]);
goto out;
}
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
- aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
- aer_error_severity_string[info->severity],
+ aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
+ bus_type, aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
@@ -925,6 +926,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
+ const char *bus_type;
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
struct aer_err_info info = {
@@ -945,10 +947,13 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
info.status = status;
info.mask = mask;
+ info.is_cxl = pcie_is_cxl(dev);
+
+ bus_type = aer_err_bus(&info);
pci_dev_aer_stats_incr(dev, &info);
- trace_aer_event(pci_name(dev), (status & ~mask),
- aer_severity, tlp_header_valid, &aer->header_log);
+ trace_aer_event(pci_name(dev), (status & ~mask), aer_severity,
+ tlp_header_valid, &aer->header_log, bus_type);
if (!aer_ratelimit(dev, info.severity))
return;
@@ -1117,8 +1122,6 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
-#ifdef CONFIG_PCIEAER_CXL
-
/**
* pci_aer_unmask_internal_errors - unmask internal errors
* @dev: pointer to the pci_dev data structure
@@ -1129,7 +1132,7 @@ static bool find_source_device(struct pci_dev *parent,
* Note: AER must be enabled and supported by the device which must be
* checked in advance, e.g. with pcie_aer_is_native().
*/
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+void pci_aer_unmask_internal_errors(struct pci_dev *dev)
{
int aer = dev->aer_cap;
u32 mask;
@@ -1143,117 +1146,20 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
}
-static bool is_cxl_mem_dev(struct pci_dev *dev)
-{
- /*
- * The capability, status, and control fields in Device 0,
- * Function 0 DVSEC control the CXL functionality of the
- * entire device (CXL 3.0, 8.1.3).
- */
- if (dev->devfn != PCI_DEVFN(0, 0))
- return false;
-
- /*
- * CXL Memory Devices must have the 502h class code set (CXL
- * 3.0, 8.1.12.1).
- */
- if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
- return false;
-
- return true;
-}
-
-static bool cxl_error_is_native(struct pci_dev *dev)
-{
- struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
-
- return (pcie_ports_native || host->native_aer);
-}
+/*
+ * Internal errors are too device-specific to enable generally, however for CXL
+ * their behavior is standardized for conveying CXL protocol errors.
+ */
+EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core");
-static bool is_internal_error(struct aer_err_info *info)
+#ifdef CONFIG_CXL_RAS
+bool is_aer_internal_error(struct aer_err_info *info)
{
if (info->severity == AER_CORRECTABLE)
return info->status & PCI_ERR_COR_INTERNAL;
return info->status & PCI_ERR_UNC_INTN;
}
-
-static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
-{
- struct aer_err_info *info = (struct aer_err_info *)data;
- const struct pci_error_handlers *err_handler;
-
- if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
- return 0;
-
- /* Protect dev->driver */
- device_lock(&dev->dev);
-
- err_handler = dev->driver ? dev->driver->err_handler : NULL;
- if (!err_handler)
- goto out;
-
- if (info->severity == AER_CORRECTABLE) {
- if (err_handler->cor_error_detected)
- err_handler->cor_error_detected(dev);
- } else if (err_handler->error_detected) {
- if (info->severity == AER_NONFATAL)
- err_handler->error_detected(dev, pci_channel_io_normal);
- else if (info->severity == AER_FATAL)
- err_handler->error_detected(dev, pci_channel_io_frozen);
- }
-out:
- device_unlock(&dev->dev);
- return 0;
-}
-
-static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
-{
- /*
- * Internal errors of an RCEC indicate an AER error in an
- * RCH's downstream port. Check and handle them in the CXL.mem
- * device driver.
- */
- if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
- is_internal_error(info))
- pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
-}
-
-static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
-{
- bool *handles_cxl = data;
-
- if (!*handles_cxl)
- *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
-
- /* Non-zero terminates iteration */
- return *handles_cxl;
-}
-
-static bool handles_cxl_errors(struct pci_dev *rcec)
-{
- bool handles_cxl = false;
-
- if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
- pcie_aer_is_native(rcec))
- pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
-
- return handles_cxl;
-}
-
-static void cxl_rch_enable_rcec(struct pci_dev *rcec)
-{
- if (!handles_cxl_errors(rcec))
- return;
-
- pci_aer_unmask_internal_errors(rcec);
- pci_info(rcec, "CXL: Internal errors unmasked");
-}
-
-#else
-static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
-static inline void cxl_rch_handle_error(struct pci_dev *dev,
- struct aer_err_info *info) { }
#endif
/**
@@ -1402,6 +1308,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i)
/* Must reset in this function */
info->status = 0;
info->tlp_header_valid = 0;
+ info->is_cxl = pcie_is_cxl(dev);
/* The device might not support AER */
if (!aer)
diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c
new file mode 100644
index 000000000000..e471eefec9c4
--- /dev/null
+++ b/drivers/pci/pcie/aer_cxl_rch.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 AMD Corporation. All rights reserved. */
+
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/bitfield.h>
+#include "../pci.h"
+#include "portdrv.h"
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+ /*
+ * The capability, status, and control fields in Device 0,
+ * Function 0 DVSEC control the CXL functionality of the
+ * entire device (CXL 3.0, 8.1.3).
+ */
+ if (dev->devfn != PCI_DEVFN(0, 0))
+ return false;
+
+ /*
+ * CXL Memory Devices must have the 502h class code set (CXL
+ * 3.0, 8.1.12.1).
+ */
+ if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
+ return false;
+
+ return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+ return (pcie_ports_native || host->native_aer);
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+ struct aer_err_info *info = (struct aer_err_info *)data;
+ const struct pci_error_handlers *err_handler;
+
+ if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+ return 0;
+
+ guard(device)(&dev->dev);
+
+ err_handler = dev->driver ? dev->driver->err_handler : NULL;
+ if (!err_handler)
+ return 0;
+
+ if (info->severity == AER_CORRECTABLE) {
+ if (err_handler->cor_error_detected)
+ err_handler->cor_error_detected(dev);
+ } else if (err_handler->error_detected) {
+ if (info->severity == AER_NONFATAL)
+ err_handler->error_detected(dev, pci_channel_io_normal);
+ else if (info->severity == AER_FATAL)
+ err_handler->error_detected(dev, pci_channel_io_frozen);
+ }
+ return 0;
+}
+
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+ /*
+ * Internal errors of an RCEC indicate an AER error in an
+ * RCH's downstream port. Check and handle them in the CXL.mem
+ * device driver.
+ */
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
+ is_aer_internal_error(info))
+ pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+}
+
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+ bool *handles_cxl = data;
+
+ if (!*handles_cxl)
+ *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+ /* Non-zero terminates iteration */
+ return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+ bool handles_cxl = false;
+
+ if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+ pcie_aer_is_native(rcec))
+ pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
+
+ return handles_cxl;
+}
+
+void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+ if (!handles_cxl_errors(rcec))
+ return;
+
+ pci_aer_unmask_internal_errors(rcec);
+ pci_info(rcec, "CXL: Internal errors unmasked");
+}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index bd29d1cc7b8b..cc58bf2f2c84 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -123,4 +123,16 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {}
#endif /* !CONFIG_PCIE_PME */
struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
+
+struct aer_err_info;
+
+#ifdef CONFIG_CXL_RAS
+bool is_aer_internal_error(struct aer_err_info *info);
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info);
+void cxl_rch_enable_rcec(struct pci_dev *rcec);
+#else
+static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; }
+static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { }
+static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { }
+#endif /* CONFIG_CXL_RAS */
#endif /* _PORTDRV_H_ */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7711f579fa1d..2975974f35e8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1704,6 +1704,35 @@ static void set_pcie_thunderbolt(struct pci_dev *dev)
dev->is_thunderbolt = 1;
}
+static void set_pcie_cxl(struct pci_dev *dev)
+{
+ struct pci_dev *bridge;
+ u16 dvsec, cap;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ /*
+ * Update parent's CXL state because alternate protocol training
+ * may have changed
+ */
+ bridge = pci_upstream_bridge(dev);
+ if (bridge)
+ set_pcie_cxl(bridge);
+
+ dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_FLEXBUS_PORT);
+ if (!dvsec)
+ return;
+
+ pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS,
+ &cap);
+
+ dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) ||
+ FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap);
+
+}
+
static void set_pcie_untrusted(struct pci_dev *dev)
{
struct pci_dev *parent = pci_upstream_bridge(dev);
@@ -2041,6 +2070,8 @@ int pci_setup_device(struct pci_dev *dev)
/* Need to have dev->cfg_size ready */
set_pcie_thunderbolt(dev);
+ set_pcie_cxl(dev);
+
set_pcie_untrusted(dev);
if (pci_is_pcie(dev))