diff options
| -rw-r--r-- | Documentation/admin-guide/perf/cxl.rst | 68 | ||||
| -rw-r--r-- | Documentation/admin-guide/perf/index.rst | 1 | ||||
| -rw-r--r-- | MAINTAINERS | 7 | ||||
| -rw-r--r-- | drivers/cxl/Kconfig | 13 | ||||
| -rw-r--r-- | drivers/cxl/core/Makefile | 1 | ||||
| -rw-r--r-- | drivers/cxl/core/core.h | 1 | ||||
| -rw-r--r-- | drivers/cxl/core/pmu.c | 68 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 2 | ||||
| -rw-r--r-- | drivers/cxl/core/regs.c | 75 | ||||
| -rw-r--r-- | drivers/cxl/cxl.h | 16 | ||||
| -rw-r--r-- | drivers/cxl/cxlpci.h | 1 | ||||
| -rw-r--r-- | drivers/cxl/pci.c | 26 | ||||
| -rw-r--r-- | drivers/cxl/pmu.h | 28 | ||||
| -rw-r--r-- | drivers/perf/Kconfig | 13 | ||||
| -rw-r--r-- | drivers/perf/Makefile | 1 | ||||
| -rw-r--r-- | drivers/perf/cxl_pmu.c | 990 | ||||
| -rw-r--r-- | include/linux/perf_event.h | 1 | ||||
| -rw-r--r-- | kernel/events/core.c | 1 | ||||
| -rw-r--r-- | tools/testing/cxl/Kbuild | 1 |
19 files changed, 1307 insertions, 7 deletions
diff --git a/Documentation/admin-guide/perf/cxl.rst b/Documentation/admin-guide/perf/cxl.rst new file mode 100644 index 000000000000..9233ea0d0b10 --- /dev/null +++ b/Documentation/admin-guide/perf/cxl.rst @@ -0,0 +1,68 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================== +CXL Performance Monitoring Unit (CPMU) +====================================== + +The CXL rev 3.0 specification provides a definition of CXL Performance +Monitoring Unit in section 13.2: Performance Monitoring. + +CXL components (e.g. Root Port, Switch Upstream Port, End Point) may have +any number of CPMU instances. CPMU capabilities are fully discoverable from +the devices. The specification provides event definitions for all CXL protocol +message types and a set of additional events for things commonly counted on +CXL devices (e.g. DRAM events). + +CPMU driver +=========== + +The CPMU driver registers a perf PMU with the name pmu_mem<X>.<Y> on the CXL bus +representing the Yth CPMU for memX. + + /sys/bus/cxl/device/pmu_mem<X>.<Y> + +The associated PMU is registered as + + /sys/bus/event_sources/devices/cxl_pmu_mem<X>.<Y> + +In common with other CXL bus devices, the id has no specific meaning and the +relationship to specific CXL device should be established via the device parent +of the device on the CXL bus. + +PMU driver provides description of available events and filter options in sysfs. + +The "format" directory describes all formats of the config (event vendor id, +group id and mask) config1 (threshold, filter enables) and config2 (filter +parameters) fields of the perf_event_attr structure. The "events" directory +describes all documented events show in perf list. + +The events shown in perf list are the most fine grained events with a single +bit of the event mask set. More general events may be enable by setting +multiple mask bits in config. For example, all Device to Host Read Requests +may be captured on a single counter by setting the bits for all of + +* d2h_req_rdcurr +* d2h_req_rdown +* d2h_req_rdshared +* d2h_req_rdany +* d2h_req_rdownnodata + +Example of usage:: + + $#perf list + cxl_pmu_mem0.0/clock_ticks/ [Kernel PMU event] + cxl_pmu_mem0.0/d2h_req_rdshared/ [Kernel PMU event] + cxl_pmu_mem0.0/h2d_req_snpcur/ [Kernel PMU event] + cxl_pmu_mem0.0/h2d_req_snpdata/ [Kernel PMU event] + cxl_pmu_mem0.0/h2d_req_snpinv/ [Kernel PMU event] + ----------------------------------------------------------- + + $# perf stat -a -e cxl_pmu_mem0.0/clock_ticks/ -e cxl_pmu_mem0.0/d2h_req_rdshared/ + +Vendor specific events may also be available and if so can be used via + + $# perf stat -a -e cxl_pmu_mem0.0/vid=VID,gid=GID,mask=MASK/ + +The driver does not support sampling so "perf record" is unsupported. +It only supports system-wide counting so attaching to a task is +unsupported. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 9de64a40adab..f60be04e4e33 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -21,3 +21,4 @@ Performance monitor support alibaba_pmu nvidia-pmu meson-ddr-pmu + cxl diff --git a/MAINTAINERS b/MAINTAINERS index f794002a192e..a0676299d69a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5194,6 +5194,13 @@ S: Maintained F: drivers/cxl/ F: include/uapi/linux/cxl_mem.h +COMPUTE EXPRESS LINK PMU (CPMU) +M: Jonathan Cameron <jonathan.cameron@huawei.com> +L: linux-cxl@vger.kernel.org +S: Maintained +F: Documentation/admin-guide/perf/cxl.rst +F: drivers/perf/cxl_pmu.c + CONEXANT ACCESSRUNNER USB DRIVER L: accessrunner-general@lists.sourceforge.net S: Orphan diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 80d8e35fa049..fcbf8295fde3 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -140,4 +140,17 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. +config CXL_PMU + tristate "CXL Performance Monitoring Unit" + default CXL_BUS + depends on PERF_EVENTS + help + Support performance monitoring as defined in CXL rev 3.0 + section 13.2: Performance Monitoring. CXL components may have + one or more CXL Performance Monitoring Units (CPMUs). + + Say 'y/m' to enable a driver that will attach to performance + monitoring units and provide standard perf based interfaces. + + If unsure say 'm'. endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index ca4ae31d8f57..1f66b5d4d935 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -12,5 +12,6 @@ cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o +cxl_core-y += pmu.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 27f0968449de..99d4a967eca6 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -6,6 +6,7 @@ extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; +extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c new file mode 100644 index 000000000000..7684c843e5a5 --- /dev/null +++ b/drivers/cxl/core/pmu.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Huawei. All rights reserved. */ + +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <cxlmem.h> +#include <pmu.h> +#include <cxl.h> +#include "core.h" + +static void cxl_pmu_release(struct device *dev) +{ + struct cxl_pmu *pmu = to_cxl_pmu(dev); + + kfree(pmu); +} + +const struct device_type cxl_pmu_type = { + .name = "cxl_pmu", + .release = cxl_pmu_release, +}; + +static void remove_dev(void *dev) +{ + device_del(dev); +} + +int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, + int assoc_id, int index, enum cxl_pmu_type type) +{ + struct cxl_pmu *pmu; + struct device *dev; + int rc; + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + pmu->assoc_id = assoc_id; + pmu->index = index; + pmu->type = type; + pmu->base = regs->pmu; + dev = &pmu->dev; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = parent; + dev->bus = &cxl_bus_type; + dev->type = &cxl_pmu_type; + switch (pmu->type) { + case CXL_PMU_MEMDEV: + rc = dev_set_name(dev, "pmu_mem%d.%d", assoc_id, index); + break; + } + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + return devm_add_action_or_reset(parent, remove_dev, dev); + +err: + put_device(&pmu->dev); + return rc; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_pmu_add, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 56be6410169c..835064a189a1 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -56,6 +56,8 @@ static int cxl_device_id(const struct device *dev) return CXL_DEVICE_MEMORY_EXPANDER; if (dev->type == CXL_REGION_TYPE()) return CXL_DEVICE_REGION; + if (dev->type == &cxl_pmu_type) + return CXL_DEVICE_PMU; return 0; } diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 52d1dbeda527..b7d8db922fd9 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <cxlmem.h> #include <cxlpci.h> +#include <pmu.h> #include "core.h" @@ -286,20 +287,23 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, } /** - * cxl_find_regblock() - Locate register blocks by type + * cxl_find_regblock_instance() - Locate a register block by type / index * @pdev: The CXL PCI device to enumerate. * @type: Register Block Indicator id * @map: Enumeration output, clobbered on error + * @index: Index into which particular instance of a regblock wanted in the + * order found in register locator DVSEC. * * Return: 0 if register block enumerated, negative error code otherwise * * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. + * by @type and @index. */ -int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index) { u32 regloc_size, regblocks; + int instance = 0; int regloc, i; map->resource = CXL_RESOURCE_NONE; @@ -323,15 +327,74 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map)) continue; - if (map->reg_type == type) - return 0; + if (map->reg_type == type) { + if (index == instance) + return 0; + instance++; + } } map->resource = CXL_RESOURCE_NONE; return -ENODEV; } +EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, CXL); + +/** + * cxl_find_regblock() - Locate register blocks by type + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type. + */ +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + return cxl_find_regblock_instance(pdev, type, map, 0); +} EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); +/** + * cxl_count_regblock() - Count instances of a given regblock type. + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * + * Some regblocks may be repeated. Count how many instances. + * + * Return: count of matching regblocks. + */ +int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) +{ + struct cxl_register_map map; + int rc, count = 0; + + while (1) { + rc = cxl_find_regblock_instance(pdev, type, &map, count); + if (rc) + return count; + count++; + } +} +EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL); + +int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, + struct cxl_register_map *map) +{ + struct device *dev = &pdev->dev; + resource_size_t phys_addr; + + phys_addr = map->resource; + regs->pmu = devm_cxl_iomap_block(dev, phys_addr, CXL_PMU_REGMAP_SIZE); + if (!regs->pmu) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL); + resource_size_t cxl_rcrb_to_component(struct device *dev, resource_size_t rcrb, enum cxl_rcrb which) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index d2bae6d68365..bc917a75333f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -217,6 +217,10 @@ struct cxl_regs { struct_group_tagged(cxl_device_regs, device_regs, void __iomem *status, *mbox, *memdev; ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); }; struct cxl_reg_map { @@ -237,6 +241,10 @@ struct cxl_device_reg_map { struct cxl_reg_map memdev; }; +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + /** * struct cxl_register_map - DVSEC harvested register block mapping parameters * @base: virtual base of the register-block-BAR + @block_offset @@ -245,6 +253,7 @@ struct cxl_device_reg_map { * @reg_type: see enum cxl_regloc_type * @component_map: cxl_reg_map for component registers * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units */ struct cxl_register_map { void __iomem *base; @@ -254,6 +263,7 @@ struct cxl_register_map { union { struct cxl_component_reg_map component_map; struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; }; }; @@ -266,8 +276,13 @@ int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs, unsigned long map_mask); int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs, const struct cxl_register_map *map); +int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, + struct cxl_register_map *map); enum cxl_regloc_type; +int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); @@ -759,6 +774,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_REGION 6 #define CXL_DEVICE_PMEM_REGION 7 #define CXL_DEVICE_DAX_REGION 8 +#define CXL_DEVICE_PMU 9 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 7c02e55b8042..0fa4799ea316 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -67,6 +67,7 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_COMPONENT, CXL_REGLOC_RBI_VIRT, CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, CXL_REGLOC_RBI_TYPES }; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 18cfb7ae17a3..270d63d11e17 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -13,6 +13,7 @@ #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" +#include "pmu.h" /** * DOC: cxl pci @@ -825,7 +826,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct cxl_dev_state *cxlds; struct cxl_register_map map; struct cxl_memdev *cxlmd; - int rc; + int i, rc, pmu_count; /* * Double check the anonymous union trickery in struct cxl_regs @@ -919,6 +920,29 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); + for (i = 0; i < pmu_count; i++) { + struct cxl_pmu_regs pmu_regs; + + rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i); + if (rc) { + dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); + break; + } + + rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map); + if (rc) { + dev_dbg(&pdev->dev, "Could not map PMU regs\n"); + break; + } + + rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV); + if (rc) { + dev_dbg(&pdev->dev, "Could not add PMU instance\n"); + break; + } + } + rc = cxl_event_config(host_bridge, mds); if (rc) return rc; diff --git a/drivers/cxl/pmu.h b/drivers/cxl/pmu.h new file mode 100644 index 000000000000..b1e9bcd9f28c --- /dev/null +++ b/drivers/cxl/pmu.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright(c) 2023 Huawei + * CXL Specification rev 3.0 Setion 8.2.7 (CPMU Register Interface) + */ +#ifndef CXL_PMU_H +#define CXL_PMU_H +#include <linux/device.h> + +enum cxl_pmu_type { + CXL_PMU_MEMDEV, +}; + +#define CXL_PMU_REGMAP_SIZE 0xe00 /* Table 8-32 CXL 3.0 specification */ +struct cxl_pmu { + struct device dev; + void __iomem *base; + int assoc_id; + int index; + enum cxl_pmu_type type; +}; + +#define to_cxl_pmu(dev) container_of(dev, struct cxl_pmu, dev) +struct cxl_pmu_regs; +int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, + int assoc_id, int idx, enum cxl_pmu_type type); + +#endif diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 711f82400086..2d2dd400fed2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -213,4 +213,17 @@ source "drivers/perf/arm_cspmu/Kconfig" source "drivers/perf/amlogic/Kconfig" +config CXL_PMU + tristate "CXL Performance Monitoring Unit" + depends on CXL_BUS + help + Support performance monitoring as defined in CXL rev 3.0 + section 13.2: Performance Monitoring. CXL components may have + one or more CXL Performance Monitoring Units (CPMUs). + + Say 'y/m' to enable a driver that will attach to performance + monitoring units and provide standard perf based interfaces. + + If unsure say 'm'. + endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index dabc859540ce..f1d7ce9da275 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/ obj-$(CONFIG_MESON_DDR_PMU) += amlogic/ +obj-$(CONFIG_CXL_PMU) += cxl_pmu.o diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c new file mode 100644 index 000000000000..0a8f597e695b --- /dev/null +++ b/drivers/perf/cxl_pmu.c @@ -0,0 +1,990 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright(c) 2023 Huawei + * + * The CXL 3.0 specification includes a standard Performance Monitoring Unit, + * called the CXL PMU, or CPMU. In order to allow a high degree of + * implementation flexibility the specification provides a wide range of + * options all of which are self describing. + * + * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface + */ + +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/perf_event.h> +#include <linux/bitops.h> +#include <linux/device.h> +#include <linux/bits.h> +#include <linux/list.h> +#include <linux/bug.h> +#include <linux/pci.h> + +#include "../cxl/cxlpci.h" +#include "../cxl/cxl.h" +#include "../cxl/pmu.h" + +#define CXL_PMU_CAP_REG 0x0 +#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(4, 0) +#define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8) +#define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20) +#define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32) +#define CXL_PMU_FILTER_HDM BIT(0) +#define CXL_PMU_FILTER_CHAN_RANK_BANK BIT(1) +#define CXL_PMU_CAP_MSI_N_MSK GENMASK_ULL(47, 44) +#define CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN BIT_ULL(48) +#define CXL_PMU_CAP_FREEZE BIT_ULL(49) +#define CXL_PMU_CAP_INT BIT_ULL(50) +#define CXL_PMU_CAP_VERSION_MSK GENMASK_ULL(63, 60) + +#define CXL_PMU_OVERFLOW_REG 0x10 +#define CXL_PMU_FREEZE_REG 0x18 +#define CXL_PMU_EVENT_CAP_REG(n) (0x100 + 8 * (n)) +#define CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK GENMASK_ULL(31, 0) +#define CXL_PMU_EVENT_CAP_GROUP_ID_MSK GENMASK_ULL(47, 32) +#define CXL_PMU_EVENT_CAP_VENDOR_ID_MSK GENMASK_ULL(63, 48) + +#define CXL_PMU_COUNTER_CFG_REG(n) (0x200 + 8 * (n)) +#define CXL_PMU_COUNTER_CFG_TYPE_MSK GENMASK_ULL(1, 0) +#define CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN 0 +#define CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN 1 +#define CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE 2 +#define CXL_PMU_COUNTER_CFG_ENABLE BIT_ULL(8) +#define CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW BIT_ULL(9) +#define CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW BIT_ULL(10) +#define CXL_PMU_COUNTER_CFG_EDGE BIT_ULL(11) +#define CXL_PMU_COUNTER_CFG_INVERT BIT_ULL(12) +#define CXL_PMU_COUNTER_CFG_THRESHOLD_MSK GENMASK_ULL(23, 16) +#define CXL_PMU_COUNTER_CFG_EVENTS_MSK GENMASK_ULL(55, 24) +#define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59) + +#define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8)) +#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0) + +#define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n)) + +/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */ +#define CXL_PMU_GID_CLOCK_TICKS 0x00 +#define CXL_PMU_GID_D2H_REQ 0x0010 +#define CXL_PMU_GID_D2H_RSP 0x0011 +#define CXL_PMU_GID_H2D_REQ 0x0012 +#define CXL_PMU_GID_H2D_RSP 0x0013 +#define CXL_PMU_GID_CACHE_DATA 0x0014 +#define CXL_PMU_GID_M2S_REQ 0x0020 +#define CXL_PMU_GID_M2S_RWD 0x0021 +#define CXL_PMU_GID_M2S_BIRSP 0x0022 +#define CXL_PMU_GID_S2M_BISNP 0x0023 +#define CXL_PMU_GID_S2M_NDR 0x0024 +#define CXL_PMU_GID_S2M_DRS 0x0025 +#define CXL_PMU_GID_DDR 0x8000 + +static int cxl_pmu_cpuhp_state_num; + +struct cxl_pmu_ev_cap { + u16 vid; + u16 gid; + u32 msk; + union { + int counter_idx; /* fixed counters */ + int event_idx; /* configurable counters */ + }; + struct list_head node; +}; + +#define CXL_PMU_MAX_COUNTERS 64 +struct cxl_pmu_info { + struct pmu pmu; + void __iomem *base; + struct perf_event **hw_events; + struct list_head event_caps_configurable; + struct list_head event_caps_fixed; + DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS); + DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS); + u16 counter_width; + u8 num_counters; + u8 num_event_capabilities; + int on_cpu; + struct hlist_node node; + bool filter_hdm; + int irq; +}; + +#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu) + +/* + * All CPMU counters are discoverable via the Event Capabilities Registers. + * Each Event Capability register contains a a VID / GroupID. + * A counter may then count any combination (by summing) of events in + * that group which are in the Supported Events Bitmask. + * However, there are some complexities to the scheme. + * - Fixed function counters refer to an Event Capabilities register. + * That event capability register is not then used for Configurable + * counters. + */ +static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info) +{ + unsigned long fixed_counter_event_cap_bm = 0; + void __iomem *base = info->base; + bool freeze_for_enable; + u64 val, eval; + int i; + + val = readq(base + CXL_PMU_CAP_REG); + freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) && + FIELD_GET(CXL_PMU_CAP_FREEZE, val); + if (!freeze_for_enable) { + dev_err(dev, "Counters not writable while frozen\n"); + return -ENODEV; + } + + info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1; + info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val); + info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1; + + info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM; + if (FIELD_GET(CXL_PMU_CAP_INT, val)) + info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val); + else + info->irq = -1; + + /* First handle fixed function counters; note if configurable counters found */ + for (i = 0; i < info->num_counters; i++) { + struct cxl_pmu_ev_cap *pmu_ev; + u32 events_msk; + u8 group_idx; + + val = readq(base + CXL_PMU_COUNTER_CFG_REG(i)); + + if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) == + CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) { + set_bit(i, info->conf_counter_bm); + } + + if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) != + CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN) + continue; + + /* In this case we know which fields are const */ + group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val); + events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val); + eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx)); + pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL); + if (!pmu_ev) + return -ENOMEM; + + pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval); + pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval); + /* For a fixed purpose counter use the events mask from the counter CFG */ + pmu_ev->msk = events_msk; + pmu_ev->counter_idx = i; + /* This list add is never unwound as all entries deleted on remove */ + list_add(&pmu_ev->node, &info->event_caps_fixed); + /* + * Configurable counters must not use an Event Capability registers that + * is in use for a Fixed counter + */ + set_bit(group_idx, &fixed_counter_event_cap_bm); + } + + if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) { + struct cxl_pmu_ev_cap *pmu_ev; + int j; + /* Walk event capabilities unused by fixed counters */ + for_each_clear_bit(j, &fixed_counter_event_cap_bm, + info->num_event_capabilities) { + pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL); + if (!pmu_ev) + return -ENOMEM; + + eval = readq(base + CXL_PMU_EVENT_CAP_REG(j)); + pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval); + pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval); + pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval); + pmu_ev->event_idx = j; + list_add(&pmu_ev->node, &info->event_caps_configurable); + } + } + + return 0; +} + +static ssize_t cxl_pmu_format_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +#define CXL_PMU_FORMAT_ATTR(_name, _format)\ + (&((struct dev_ext_attribute[]) { \ + { \ + .attr = __ATTR(_name, 0444, \ + cxl_pmu_format_sysfs_show, NULL), \ + .var = (void *)_format \ + } \ + })[0].attr.attr) + +enum { + cxl_pmu_mask_attr, + cxl_pmu_gid_attr, + cxl_pmu_vid_attr, + cxl_pmu_threshold_attr, + cxl_pmu_invert_attr, + cxl_pmu_edge_attr, + cxl_pmu_hdm_filter_en_attr, + cxl_pmu_hdm_attr, +}; + +static struct attribute *cxl_pmu_format_attr[] = { + [cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"), + [cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"), + [cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"), + [cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"), + [cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"), + [cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"), + [cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"), + [cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"), + NULL +}; + +#define CXL_PMU_ATTR_CONFIG_MASK_MSK GENMASK_ULL(31, 0) +#define CXL_PMU_ATTR_CONFIG_GID_MSK GENMASK_ULL(47, 32) +#define CXL_PMU_ATTR_CONFIG_VID_MSK GENMASK_ULL(63, 48) +#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK GENMASK_ULL(15, 0) +#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK BIT(16) +#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK BIT(17) +#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK BIT(18) +#define CXL_PMU_ATTR_CONFIG2_HDM_MSK GENMASK(15, 0) + +static umode_t cxl_pmu_format_is_visible(struct kobject *kobj, + struct attribute *attr, int a) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_pmu_info *info = dev_get_drvdata(dev); + + /* + * Filter capability at the CPMU level, so hide the attributes if the particular + * filter is not supported. + */ + if (!info->filter_hdm && + (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] || + attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr])) + return 0; + + return attr->mode; +} + +static const struct attribute_group cxl_pmu_format_group = { + .name = "format", + .attrs = cxl_pmu_format_attr, + .is_visible = cxl_pmu_format_is_visible, +}; + +static u32 cxl_pmu_config_get_mask(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config); +} + +static u16 cxl_pmu_config_get_gid(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config); +} + +static u16 cxl_pmu_config_get_vid(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config); +} + +static u8 cxl_pmu_config1_get_threshold(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1); +} + +static bool cxl_pmu_config1_get_invert(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1); +} + +static bool cxl_pmu_config1_get_edge(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1); +} + +/* + * CPMU specification allows for 8 filters, each with a 16 bit value... + * So we need to find 8x16bits to store it in. + * As the value used for disable is 0xffff, a separate enable switch + * is needed. + */ + +static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1); +} + +static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2); +} + +static ssize_t cxl_pmu_event_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id); +} + +#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk) \ + PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show, \ + ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk)) + +/* For CXL spec defined events */ +#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \ + CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk) + +static struct attribute *cxl_pmu_event_attrs[] = { + CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)), + /* CXL rev 3.0 Table 3-17 - Device to Host Requests */ + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr, CXL_PMU_GID_D2H_REQ, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown, CXL_PMU_GID_D2H_REQ, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared, CXL_PMU_GID_D2H_REQ, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany, CXL_PMU_GID_D2H_REQ, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata, CXL_PMU_GID_D2H_REQ, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr, CXL_PMU_GID_D2H_REQ, BIT(6)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr, CXL_PMU_GID_D2H_REQ, BIT(7)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush, CXL_PMU_GID_D2H_REQ, BIT(8)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict, CXL_PMU_GID_D2H_REQ, BIT(9)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict, CXL_PMU_GID_D2H_REQ, BIT(10)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata, CXL_PMU_GID_D2H_REQ, BIT(11)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv, CXL_PMU_GID_D2H_REQ, BIT(12)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_ |
