summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/perf/cxl.rst68
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/cxl/Kconfig13
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/core.h1
-rw-r--r--drivers/cxl/core/pmu.c68
-rw-r--r--drivers/cxl/core/port.c2
-rw-r--r--drivers/cxl/core/regs.c75
-rw-r--r--drivers/cxl/cxl.h16
-rw-r--r--drivers/cxl/cxlpci.h1
-rw-r--r--drivers/cxl/pci.c26
-rw-r--r--drivers/cxl/pmu.h28
-rw-r--r--drivers/perf/Kconfig13
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/cxl_pmu.c990
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--kernel/events/core.c1
-rw-r--r--tools/testing/cxl/Kbuild1
19 files changed, 1307 insertions, 7 deletions
diff --git a/Documentation/admin-guide/perf/cxl.rst b/Documentation/admin-guide/perf/cxl.rst
new file mode 100644
index 000000000000..9233ea0d0b10
--- /dev/null
+++ b/Documentation/admin-guide/perf/cxl.rst
@@ -0,0 +1,68 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+CXL Performance Monitoring Unit (CPMU)
+======================================
+
+The CXL rev 3.0 specification provides a definition of CXL Performance
+Monitoring Unit in section 13.2: Performance Monitoring.
+
+CXL components (e.g. Root Port, Switch Upstream Port, End Point) may have
+any number of CPMU instances. CPMU capabilities are fully discoverable from
+the devices. The specification provides event definitions for all CXL protocol
+message types and a set of additional events for things commonly counted on
+CXL devices (e.g. DRAM events).
+
+CPMU driver
+===========
+
+The CPMU driver registers a perf PMU with the name pmu_mem<X>.<Y> on the CXL bus
+representing the Yth CPMU for memX.
+
+ /sys/bus/cxl/device/pmu_mem<X>.<Y>
+
+The associated PMU is registered as
+
+ /sys/bus/event_sources/devices/cxl_pmu_mem<X>.<Y>
+
+In common with other CXL bus devices, the id has no specific meaning and the
+relationship to specific CXL device should be established via the device parent
+of the device on the CXL bus.
+
+PMU driver provides description of available events and filter options in sysfs.
+
+The "format" directory describes all formats of the config (event vendor id,
+group id and mask) config1 (threshold, filter enables) and config2 (filter
+parameters) fields of the perf_event_attr structure. The "events" directory
+describes all documented events show in perf list.
+
+The events shown in perf list are the most fine grained events with a single
+bit of the event mask set. More general events may be enable by setting
+multiple mask bits in config. For example, all Device to Host Read Requests
+may be captured on a single counter by setting the bits for all of
+
+* d2h_req_rdcurr
+* d2h_req_rdown
+* d2h_req_rdshared
+* d2h_req_rdany
+* d2h_req_rdownnodata
+
+Example of usage::
+
+ $#perf list
+ cxl_pmu_mem0.0/clock_ticks/ [Kernel PMU event]
+ cxl_pmu_mem0.0/d2h_req_rdshared/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpcur/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpdata/ [Kernel PMU event]
+ cxl_pmu_mem0.0/h2d_req_snpinv/ [Kernel PMU event]
+ -----------------------------------------------------------
+
+ $# perf stat -a -e cxl_pmu_mem0.0/clock_ticks/ -e cxl_pmu_mem0.0/d2h_req_rdshared/
+
+Vendor specific events may also be available and if so can be used via
+
+ $# perf stat -a -e cxl_pmu_mem0.0/vid=VID,gid=GID,mask=MASK/
+
+The driver does not support sampling so "perf record" is unsupported.
+It only supports system-wide counting so attaching to a task is
+unsupported.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 9de64a40adab..f60be04e4e33 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -21,3 +21,4 @@ Performance monitor support
alibaba_pmu
nvidia-pmu
meson-ddr-pmu
+ cxl
diff --git a/MAINTAINERS b/MAINTAINERS
index f794002a192e..a0676299d69a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5194,6 +5194,13 @@ S: Maintained
F: drivers/cxl/
F: include/uapi/linux/cxl_mem.h
+COMPUTE EXPRESS LINK PMU (CPMU)
+M: Jonathan Cameron <jonathan.cameron@huawei.com>
+L: linux-cxl@vger.kernel.org
+S: Maintained
+F: Documentation/admin-guide/perf/cxl.rst
+F: drivers/perf/cxl_pmu.c
+
CONEXANT ACCESSRUNNER USB DRIVER
L: accessrunner-general@lists.sourceforge.net
S: Orphan
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 80d8e35fa049..fcbf8295fde3 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -140,4 +140,17 @@ config CXL_REGION_INVALIDATION_TEST
If unsure, or if this kernel is meant for production environments,
say N.
+config CXL_PMU
+ tristate "CXL Performance Monitoring Unit"
+ default CXL_BUS
+ depends on PERF_EVENTS
+ help
+ Support performance monitoring as defined in CXL rev 3.0
+ section 13.2: Performance Monitoring. CXL components may have
+ one or more CXL Performance Monitoring Units (CPMUs).
+
+ Say 'y/m' to enable a driver that will attach to performance
+ monitoring units and provide standard perf based interfaces.
+
+ If unsure say 'm'.
endif
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index ca4ae31d8f57..1f66b5d4d935 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -12,5 +12,6 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
+cxl_core-y += pmu.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 27f0968449de..99d4a967eca6 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -6,6 +6,7 @@
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
+extern const struct device_type cxl_pmu_type;
extern struct attribute_group cxl_base_attribute_group;
diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c
new file mode 100644
index 000000000000..7684c843e5a5
--- /dev/null
+++ b/drivers/cxl/core/pmu.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Huawei. All rights reserved. */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <pmu.h>
+#include <cxl.h>
+#include "core.h"
+
+static void cxl_pmu_release(struct device *dev)
+{
+ struct cxl_pmu *pmu = to_cxl_pmu(dev);
+
+ kfree(pmu);
+}
+
+const struct device_type cxl_pmu_type = {
+ .name = "cxl_pmu",
+ .release = cxl_pmu_release,
+};
+
+static void remove_dev(void *dev)
+{
+ device_del(dev);
+}
+
+int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs,
+ int assoc_id, int index, enum cxl_pmu_type type)
+{
+ struct cxl_pmu *pmu;
+ struct device *dev;
+ int rc;
+
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ pmu->assoc_id = assoc_id;
+ pmu->index = index;
+ pmu->type = type;
+ pmu->base = regs->pmu;
+ dev = &pmu->dev;
+ device_initialize(dev);
+ device_set_pm_not_required(dev);
+ dev->parent = parent;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_pmu_type;
+ switch (pmu->type) {
+ case CXL_PMU_MEMDEV:
+ rc = dev_set_name(dev, "pmu_mem%d.%d", assoc_id, index);
+ break;
+ }
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ return devm_add_action_or_reset(parent, remove_dev, dev);
+
+err:
+ put_device(&pmu->dev);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_pmu_add, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 56be6410169c..835064a189a1 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -56,6 +56,8 @@ static int cxl_device_id(const struct device *dev)
return CXL_DEVICE_MEMORY_EXPANDER;
if (dev->type == CXL_REGION_TYPE())
return CXL_DEVICE_REGION;
+ if (dev->type == &cxl_pmu_type)
+ return CXL_DEVICE_PMU;
return 0;
}
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 52d1dbeda527..b7d8db922fd9 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <cxlmem.h>
#include <cxlpci.h>
+#include <pmu.h>
#include "core.h"
@@ -286,20 +287,23 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
}
/**
- * cxl_find_regblock() - Locate register blocks by type
+ * cxl_find_regblock_instance() - Locate a register block by type / index
* @pdev: The CXL PCI device to enumerate.
* @type: Register Block Indicator id
* @map: Enumeration output, clobbered on error
+ * @index: Index into which particular instance of a regblock wanted in the
+ * order found in register locator DVSEC.
*
* Return: 0 if register block enumerated, negative error code otherwise
*
* A CXL DVSEC may point to one or more register blocks, search for them
- * by @type.
+ * by @type and @index.
*/
-int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
- struct cxl_register_map *map)
+int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map, int index)
{
u32 regloc_size, regblocks;
+ int instance = 0;
int regloc, i;
map->resource = CXL_RESOURCE_NONE;
@@ -323,15 +327,74 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map))
continue;
- if (map->reg_type == type)
- return 0;
+ if (map->reg_type == type) {
+ if (index == instance)
+ return 0;
+ instance++;
+ }
}
map->resource = CXL_RESOURCE_NONE;
return -ENODEV;
}
+EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, CXL);
+
+/**
+ * cxl_find_regblock() - Locate register blocks by type
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ * @map: Enumeration output, clobbered on error
+ *
+ * Return: 0 if register block enumerated, negative error code otherwise
+ *
+ * A CXL DVSEC may point to one or more register blocks, search for them
+ * by @type.
+ */
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map)
+{
+ return cxl_find_regblock_instance(pdev, type, map, 0);
+}
EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
+/**
+ * cxl_count_regblock() - Count instances of a given regblock type.
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ *
+ * Some regblocks may be repeated. Count how many instances.
+ *
+ * Return: count of matching regblocks.
+ */
+int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type)
+{
+ struct cxl_register_map map;
+ int rc, count = 0;
+
+ while (1) {
+ rc = cxl_find_regblock_instance(pdev, type, &map, count);
+ if (rc)
+ return count;
+ count++;
+ }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL);
+
+int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
+ struct cxl_register_map *map)
+{
+ struct device *dev = &pdev->dev;
+ resource_size_t phys_addr;
+
+ phys_addr = map->resource;
+ regs->pmu = devm_cxl_iomap_block(dev, phys_addr, CXL_PMU_REGMAP_SIZE);
+ if (!regs->pmu)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL);
+
resource_size_t cxl_rcrb_to_component(struct device *dev,
resource_size_t rcrb,
enum cxl_rcrb which)
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d2bae6d68365..bc917a75333f 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -217,6 +217,10 @@ struct cxl_regs {
struct_group_tagged(cxl_device_regs, device_regs,
void __iomem *status, *mbox, *memdev;
);
+
+ struct_group_tagged(cxl_pmu_regs, pmu_regs,
+ void __iomem *pmu;
+ );
};
struct cxl_reg_map {
@@ -237,6 +241,10 @@ struct cxl_device_reg_map {
struct cxl_reg_map memdev;
};
+struct cxl_pmu_reg_map {
+ struct cxl_reg_map pmu;
+};
+
/**
* struct cxl_register_map - DVSEC harvested register block mapping parameters
* @base: virtual base of the register-block-BAR + @block_offset
@@ -245,6 +253,7 @@ struct cxl_device_reg_map {
* @reg_type: see enum cxl_regloc_type
* @component_map: cxl_reg_map for component registers
* @device_map: cxl_reg_maps for device registers
+ * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
*/
struct cxl_register_map {
void __iomem *base;
@@ -254,6 +263,7 @@ struct cxl_register_map {
union {
struct cxl_component_reg_map component_map;
struct cxl_device_reg_map device_map;
+ struct cxl_pmu_reg_map pmu_map;
};
};
@@ -266,8 +276,13 @@ int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs,
unsigned long map_mask);
int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs,
const struct cxl_register_map *map);
+int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
+ struct cxl_register_map *map);
enum cxl_regloc_type;
+int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type);
+int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map, int index);
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map);
@@ -759,6 +774,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_REGION 6
#define CXL_DEVICE_PMEM_REGION 7
#define CXL_DEVICE_DAX_REGION 8
+#define CXL_DEVICE_PMU 9
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 7c02e55b8042..0fa4799ea316 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -67,6 +67,7 @@ enum cxl_regloc_type {
CXL_REGLOC_RBI_COMPONENT,
CXL_REGLOC_RBI_VIRT,
CXL_REGLOC_RBI_MEMDEV,
+ CXL_REGLOC_RBI_PMU,
CXL_REGLOC_RBI_TYPES
};
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 18cfb7ae17a3..270d63d11e17 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -13,6 +13,7 @@
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
+#include "pmu.h"
/**
* DOC: cxl pci
@@ -825,7 +826,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct cxl_dev_state *cxlds;
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
- int rc;
+ int i, rc, pmu_count;
/*
* Double check the anonymous union trickery in struct cxl_regs
@@ -919,6 +920,29 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
+ for (i = 0; i < pmu_count; i++) {
+ struct cxl_pmu_regs pmu_regs;
+
+ rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i);
+ if (rc) {
+ dev_dbg(&pdev->dev, "Could not find PMU regblock\n");
+ break;
+ }
+
+ rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map);
+ if (rc) {
+ dev_dbg(&pdev->dev, "Could not map PMU regs\n");
+ break;
+ }
+
+ rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV);
+ if (rc) {
+ dev_dbg(&pdev->dev, "Could not add PMU instance\n");
+ break;
+ }
+ }
+
rc = cxl_event_config(host_bridge, mds);
if (rc)
return rc;
diff --git a/drivers/cxl/pmu.h b/drivers/cxl/pmu.h
new file mode 100644
index 000000000000..b1e9bcd9f28c
--- /dev/null
+++ b/drivers/cxl/pmu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2023 Huawei
+ * CXL Specification rev 3.0 Setion 8.2.7 (CPMU Register Interface)
+ */
+#ifndef CXL_PMU_H
+#define CXL_PMU_H
+#include <linux/device.h>
+
+enum cxl_pmu_type {
+ CXL_PMU_MEMDEV,
+};
+
+#define CXL_PMU_REGMAP_SIZE 0xe00 /* Table 8-32 CXL 3.0 specification */
+struct cxl_pmu {
+ struct device dev;
+ void __iomem *base;
+ int assoc_id;
+ int index;
+ enum cxl_pmu_type type;
+};
+
+#define to_cxl_pmu(dev) container_of(dev, struct cxl_pmu, dev)
+struct cxl_pmu_regs;
+int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs,
+ int assoc_id, int idx, enum cxl_pmu_type type);
+
+#endif
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 711f82400086..2d2dd400fed2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -213,4 +213,17 @@ source "drivers/perf/arm_cspmu/Kconfig"
source "drivers/perf/amlogic/Kconfig"
+config CXL_PMU
+ tristate "CXL Performance Monitoring Unit"
+ depends on CXL_BUS
+ help
+ Support performance monitoring as defined in CXL rev 3.0
+ section 13.2: Performance Monitoring. CXL components may have
+ one or more CXL Performance Monitoring Units (CPMUs).
+
+ Say 'y/m' to enable a driver that will attach to performance
+ monitoring units and provide standard perf based interfaces.
+
+ If unsure say 'm'.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index dabc859540ce..f1d7ce9da275 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
+obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
new file mode 100644
index 000000000000..0a8f597e695b
--- /dev/null
+++ b/drivers/perf/cxl_pmu.c
@@ -0,0 +1,990 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright(c) 2023 Huawei
+ *
+ * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
+ * called the CXL PMU, or CPMU. In order to allow a high degree of
+ * implementation flexibility the specification provides a wide range of
+ * options all of which are self describing.
+ *
+ * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/perf_event.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/bits.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+
+#include "../cxl/cxlpci.h"
+#include "../cxl/cxl.h"
+#include "../cxl/pmu.h"
+
+#define CXL_PMU_CAP_REG 0x0
+#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(4, 0)
+#define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8)
+#define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20)
+#define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32)
+#define CXL_PMU_FILTER_HDM BIT(0)
+#define CXL_PMU_FILTER_CHAN_RANK_BANK BIT(1)
+#define CXL_PMU_CAP_MSI_N_MSK GENMASK_ULL(47, 44)
+#define CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN BIT_ULL(48)
+#define CXL_PMU_CAP_FREEZE BIT_ULL(49)
+#define CXL_PMU_CAP_INT BIT_ULL(50)
+#define CXL_PMU_CAP_VERSION_MSK GENMASK_ULL(63, 60)
+
+#define CXL_PMU_OVERFLOW_REG 0x10
+#define CXL_PMU_FREEZE_REG 0x18
+#define CXL_PMU_EVENT_CAP_REG(n) (0x100 + 8 * (n))
+#define CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK GENMASK_ULL(31, 0)
+#define CXL_PMU_EVENT_CAP_GROUP_ID_MSK GENMASK_ULL(47, 32)
+#define CXL_PMU_EVENT_CAP_VENDOR_ID_MSK GENMASK_ULL(63, 48)
+
+#define CXL_PMU_COUNTER_CFG_REG(n) (0x200 + 8 * (n))
+#define CXL_PMU_COUNTER_CFG_TYPE_MSK GENMASK_ULL(1, 0)
+#define CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN 0
+#define CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN 1
+#define CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE 2
+#define CXL_PMU_COUNTER_CFG_ENABLE BIT_ULL(8)
+#define CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW BIT_ULL(9)
+#define CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW BIT_ULL(10)
+#define CXL_PMU_COUNTER_CFG_EDGE BIT_ULL(11)
+#define CXL_PMU_COUNTER_CFG_INVERT BIT_ULL(12)
+#define CXL_PMU_COUNTER_CFG_THRESHOLD_MSK GENMASK_ULL(23, 16)
+#define CXL_PMU_COUNTER_CFG_EVENTS_MSK GENMASK_ULL(55, 24)
+#define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59)
+
+#define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8))
+#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0)
+
+#define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n))
+
+/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
+#define CXL_PMU_GID_CLOCK_TICKS 0x00
+#define CXL_PMU_GID_D2H_REQ 0x0010
+#define CXL_PMU_GID_D2H_RSP 0x0011
+#define CXL_PMU_GID_H2D_REQ 0x0012
+#define CXL_PMU_GID_H2D_RSP 0x0013
+#define CXL_PMU_GID_CACHE_DATA 0x0014
+#define CXL_PMU_GID_M2S_REQ 0x0020
+#define CXL_PMU_GID_M2S_RWD 0x0021
+#define CXL_PMU_GID_M2S_BIRSP 0x0022
+#define CXL_PMU_GID_S2M_BISNP 0x0023
+#define CXL_PMU_GID_S2M_NDR 0x0024
+#define CXL_PMU_GID_S2M_DRS 0x0025
+#define CXL_PMU_GID_DDR 0x8000
+
+static int cxl_pmu_cpuhp_state_num;
+
+struct cxl_pmu_ev_cap {
+ u16 vid;
+ u16 gid;
+ u32 msk;
+ union {
+ int counter_idx; /* fixed counters */
+ int event_idx; /* configurable counters */
+ };
+ struct list_head node;
+};
+
+#define CXL_PMU_MAX_COUNTERS 64
+struct cxl_pmu_info {
+ struct pmu pmu;
+ void __iomem *base;
+ struct perf_event **hw_events;
+ struct list_head event_caps_configurable;
+ struct list_head event_caps_fixed;
+ DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
+ DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
+ u16 counter_width;
+ u8 num_counters;
+ u8 num_event_capabilities;
+ int on_cpu;
+ struct hlist_node node;
+ bool filter_hdm;
+ int irq;
+};
+
+#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
+
+/*
+ * All CPMU counters are discoverable via the Event Capabilities Registers.
+ * Each Event Capability register contains a a VID / GroupID.
+ * A counter may then count any combination (by summing) of events in
+ * that group which are in the Supported Events Bitmask.
+ * However, there are some complexities to the scheme.
+ * - Fixed function counters refer to an Event Capabilities register.
+ * That event capability register is not then used for Configurable
+ * counters.
+ */
+static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
+{
+ unsigned long fixed_counter_event_cap_bm = 0;
+ void __iomem *base = info->base;
+ bool freeze_for_enable;
+ u64 val, eval;
+ int i;
+
+ val = readq(base + CXL_PMU_CAP_REG);
+ freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
+ FIELD_GET(CXL_PMU_CAP_FREEZE, val);
+ if (!freeze_for_enable) {
+ dev_err(dev, "Counters not writable while frozen\n");
+ return -ENODEV;
+ }
+
+ info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
+ info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
+ info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
+
+ info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
+ if (FIELD_GET(CXL_PMU_CAP_INT, val))
+ info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
+ else
+ info->irq = -1;
+
+ /* First handle fixed function counters; note if configurable counters found */
+ for (i = 0; i < info->num_counters; i++) {
+ struct cxl_pmu_ev_cap *pmu_ev;
+ u32 events_msk;
+ u8 group_idx;
+
+ val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
+
+ if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
+ CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
+ set_bit(i, info->conf_counter_bm);
+ }
+
+ if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
+ CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
+ continue;
+
+ /* In this case we know which fields are const */
+ group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
+ events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
+ eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
+ pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+ if (!pmu_ev)
+ return -ENOMEM;
+
+ pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+ pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+ /* For a fixed purpose counter use the events mask from the counter CFG */
+ pmu_ev->msk = events_msk;
+ pmu_ev->counter_idx = i;
+ /* This list add is never unwound as all entries deleted on remove */
+ list_add(&pmu_ev->node, &info->event_caps_fixed);
+ /*
+ * Configurable counters must not use an Event Capability registers that
+ * is in use for a Fixed counter
+ */
+ set_bit(group_idx, &fixed_counter_event_cap_bm);
+ }
+
+ if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
+ struct cxl_pmu_ev_cap *pmu_ev;
+ int j;
+ /* Walk event capabilities unused by fixed counters */
+ for_each_clear_bit(j, &fixed_counter_event_cap_bm,
+ info->num_event_capabilities) {
+ pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+ if (!pmu_ev)
+ return -ENOMEM;
+
+ eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
+ pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+ pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+ pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
+ pmu_ev->event_idx = j;
+ list_add(&pmu_ev->node, &info->event_caps_configurable);
+ }
+ }
+
+ return 0;
+}
+
+static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+#define CXL_PMU_FORMAT_ATTR(_name, _format)\
+ (&((struct dev_ext_attribute[]) { \
+ { \
+ .attr = __ATTR(_name, 0444, \
+ cxl_pmu_format_sysfs_show, NULL), \
+ .var = (void *)_format \
+ } \
+ })[0].attr.attr)
+
+enum {
+ cxl_pmu_mask_attr,
+ cxl_pmu_gid_attr,
+ cxl_pmu_vid_attr,
+ cxl_pmu_threshold_attr,
+ cxl_pmu_invert_attr,
+ cxl_pmu_edge_attr,
+ cxl_pmu_hdm_filter_en_attr,
+ cxl_pmu_hdm_attr,
+};
+
+static struct attribute *cxl_pmu_format_attr[] = {
+ [cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
+ [cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
+ [cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
+ [cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
+ [cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
+ [cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
+ [cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
+ [cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
+ NULL
+};
+
+#define CXL_PMU_ATTR_CONFIG_MASK_MSK GENMASK_ULL(31, 0)
+#define CXL_PMU_ATTR_CONFIG_GID_MSK GENMASK_ULL(47, 32)
+#define CXL_PMU_ATTR_CONFIG_VID_MSK GENMASK_ULL(63, 48)
+#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK GENMASK_ULL(15, 0)
+#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK BIT(16)
+#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK BIT(17)
+#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK BIT(18)
+#define CXL_PMU_ATTR_CONFIG2_HDM_MSK GENMASK(15, 0)
+
+static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
+ struct attribute *attr, int a)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+ /*
+ * Filter capability at the CPMU level, so hide the attributes if the particular
+ * filter is not supported.
+ */
+ if (!info->filter_hdm &&
+ (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
+ attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group cxl_pmu_format_group = {
+ .name = "format",
+ .attrs = cxl_pmu_format_attr,
+ .is_visible = cxl_pmu_format_is_visible,
+};
+
+static u32 cxl_pmu_config_get_mask(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_gid(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_vid(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
+}
+
+static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_invert(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_edge(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
+}
+
+/*
+ * CPMU specification allows for 8 filters, each with a 16 bit value...
+ * So we need to find 8x16bits to store it in.
+ * As the value used for disable is 0xffff, a separate enable switch
+ * is needed.
+ */
+
+static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
+}
+
+static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
+{
+ return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
+}
+
+static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
+}
+
+#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk) \
+ PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show, \
+ ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
+
+/* For CXL spec defined events */
+#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \
+ CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
+
+static struct attribute *cxl_pmu_event_attrs[] = {
+ CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
+ /* CXL rev 3.0 Table 3-17 - Device to Host Requests */
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr, CXL_PMU_GID_D2H_REQ, BIT(1)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown, CXL_PMU_GID_D2H_REQ, BIT(2)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared, CXL_PMU_GID_D2H_REQ, BIT(3)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany, CXL_PMU_GID_D2H_REQ, BIT(4)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata, CXL_PMU_GID_D2H_REQ, BIT(5)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr, CXL_PMU_GID_D2H_REQ, BIT(6)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr, CXL_PMU_GID_D2H_REQ, BIT(7)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush, CXL_PMU_GID_D2H_REQ, BIT(8)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict, CXL_PMU_GID_D2H_REQ, BIT(9)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict, CXL_PMU_GID_D2H_REQ, BIT(10)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata, CXL_PMU_GID_D2H_REQ, BIT(11)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv, CXL_PMU_GID_D2H_REQ, BIT(12)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)),
+ CXL_PMU_EVENT_CXL_ATTR(d2h_req_