diff options
-rw-r--r-- | drivers/pci/pcie/Kconfig | 9 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 93 |
2 files changed, 100 insertions, 2 deletions
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 228652a59f27..8999fcebde6a 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -49,6 +49,15 @@ config PCIEAER_INJECT gotten from: https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ +config PCIEAER_CXL + bool "PCI Express CXL RAS support" + default y + depends on PCIEAER && CXL_PCI + help + Enables CXL error handling. + + If unsure, say Y. + # # PCI Express ECRC # diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 6593fe3fc555..f1e8494f5bb6 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -934,14 +934,97 @@ static bool find_source_device(struct pci_dev *parent, return true; } +#ifdef CONFIG_PCIEAER_CXL + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + /* protect dev->driver */ + device_lock(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + goto out; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } +out: + device_unlock(&dev->dev); + return 0; +} + +static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +#else +static inline void cxl_rch_handle_error(struct pci_dev *dev, + struct aer_err_info *info) { } +#endif + /** - * handle_error_source - handle logging error into an event log + * pci_aer_handle_error - handle logging error into an event log * @dev: pointer to pci_dev data structure of error source device * @info: comprehensive error information * * Invoked when an error being detected by Root Port. */ -static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) { int aer = dev->aer_cap; @@ -965,6 +1048,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); +} + +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + cxl_rch_handle_error(dev, info); + pci_aer_handle_error(dev, info); pci_dev_put(dev); } |