diff options
| -rw-r--r-- | arch/x86/events/core.c | 63 | ||||
| -rw-r--r-- | arch/x86/events/intel/bts.c | 3 | ||||
| -rw-r--r-- | arch/x86/events/intel/cstate.c | 142 | ||||
| -rw-r--r-- | arch/x86/events/intel/pt.c | 29 | ||||
| -rw-r--r-- | arch/x86/events/intel/uncore.c | 9 | ||||
| -rw-r--r-- | arch/x86/events/intel/uncore.h | 2 | ||||
| -rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 185 | ||||
| -rw-r--r-- | drivers/dma/idxd/idxd.h | 7 | ||||
| -rw-r--r-- | drivers/dma/idxd/init.c | 3 | ||||
| -rw-r--r-- | drivers/dma/idxd/perfmon.c | 98 | ||||
| -rw-r--r-- | drivers/iommu/intel/iommu.h | 2 | ||||
| -rw-r--r-- | drivers/iommu/intel/perfmon.c | 111 | ||||
| -rw-r--r-- | include/linux/cpuhotplug.h | 2 | ||||
| -rw-r--r-- | include/linux/perf_event.h | 32 | ||||
| -rw-r--r-- | include/linux/rbtree.h | 67 | ||||
| -rw-r--r-- | include/linux/uprobes.h | 48 | ||||
| -rw-r--r-- | kernel/events/core.c | 584 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 499 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 38 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 44 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 27 |
21 files changed, 1142 insertions, 853 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index be01823b1bb4..65ab6460aed4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -41,6 +41,8 @@ #include <asm/desc.h> #include <asm/ldt.h> #include <asm/unwind.h> +#include <asm/uprobes.h> +#include <asm/ibt.h> #include "perf_event.h" @@ -2816,6 +2818,46 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } +#ifdef CONFIG_UPROBES +/* + * Heuristic-based check if uprobe is installed at the function entry. + * + * Under assumption of user code being compiled with frame pointers, + * `push %rbp/%ebp` is a good indicator that we indeed are. + * + * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. + * If we get this wrong, captured stack trace might have one extra bogus + * entry, but the rest of stack trace will still be meaningful. + */ +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + struct arch_uprobe *auprobe; + + if (!current->utask) + return false; + + auprobe = current->utask->auprobe; + if (!auprobe) + return false; + + /* push %rbp/%ebp */ + if (auprobe->insn[0] == 0x55) + return true; + + /* endbr64 (64-bit only) */ + if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + return true; + + return false; +} + +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -2827,6 +2869,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent unsigned long ss_base, cs_base; struct stack_frame_ia32 frame; const struct stack_frame_ia32 __user *fp; + u32 ret_addr; if (user_64bit_mode(regs)) return 0; @@ -2836,6 +2879,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); + + /* see perf_callchain_user() below for why we do this */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const u32 __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; @@ -2864,6 +2913,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct stack_frame frame; const struct stack_frame __user *fp; + unsigned long ret_addr; if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ @@ -2887,6 +2937,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs return; pagefault_disable(); + + /* + * If we are called from uprobe handler, and we are indeed at the very + * entry to user function (which is normally a `push %rbp` instruction, + * under assumption of application being compiled with frame pointers), + * we should read return address from *regs->sp before proceeding + * to follow frame pointers, otherwise we'll skip immediate caller + * as %rbp is not yet setup. + */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const unsigned long __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 974e917e65b2..8f78b0c900ef 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -557,9 +557,6 @@ static int bts_event_init(struct perf_event *event) * disabled, so disallow intel_bts driver for unprivileged * users on paranoid systems since it provides trace data * to the user in a zero-copy fashion. - * - * Note that the default paranoia setting permits unprivileged - * users to profile the kernel. */ if (event->attr.exclude_kernel) { ret = perf_allow_kernel(&event->attr); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9f116dfc4728..ae4ec16156bb 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -128,10 +128,6 @@ static ssize_t __cstate_##_var##_show(struct device *dev, \ static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf); - /* Model -> events mapping */ struct cstate_model { unsigned long core_events; @@ -206,22 +202,9 @@ static struct attribute_group cstate_format_attr_group = { .attrs = cstate_format_attrs, }; -static cpumask_t cstate_core_cpu_mask; -static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); - -static struct attribute *cstate_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group cpumask_attr_group = { - .attrs = cstate_cpumask_attrs, -}; - static const struct attribute_group *cstate_attr_groups[] = { &cstate_events_attr_group, &cstate_format_attr_group, - &cpumask_attr_group, NULL, }; @@ -269,8 +252,6 @@ static struct perf_msr pkg_msr[] = { [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; -static cpumask_t cstate_pkg_cpu_mask; - /* cstate_module PMU */ static struct pmu cstate_module_pmu; static bool has_cstate_module; @@ -291,28 +272,9 @@ static struct perf_msr module_msr[] = { [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr }, }; -static cpumask_t cstate_module_cpu_mask; - -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct pmu *pmu = dev_get_drvdata(dev); - - if (pmu == &cstate_core_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); - else if (pmu == &cstate_pkg_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); - else if (pmu == &cstate_module_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask); - else - return 0; -} - static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; - int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -331,20 +293,13 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; - - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - event->hw.event_base = pkg_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(event->cpu)); } else if (event->pmu == &cstate_module_pmu) { if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX) return -EINVAL; @@ -352,16 +307,10 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(module_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = module_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(event->cpu)); } else { return -ENOENT; } - if (cpu >= nr_cpu_ids) - return -ENODEV; - - event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; return 0; @@ -412,84 +361,6 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) return 0; } -/* - * Check if exiting cpu is the designated reader. If so migrate the - * events when there is a valid target available - */ -static int cstate_cpu_exit(unsigned int cpu) -{ - unsigned int target; - - if (has_cstate_core && - cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { - - target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_core_cpu_mask); - perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); - } - } - - if (has_cstate_pkg && - cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { - - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_pkg_cpu_mask); - perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); - } - } - - if (has_cstate_module && - cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) { - - target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_module_cpu_mask); - perf_pmu_migrate_context(&cstate_module_pmu, cpu, target); - } - } - return 0; -} - -static int cstate_cpu_init(unsigned int cpu) -{ - unsigned int target; - - /* - * If this is the first online thread of that core, set it in - * the core cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(cpu)); - - if (has_cstate_core && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_core_cpu_mask); - - /* - * If this is the first online thread of that package, set it - * in the package cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(cpu)); - if (has_cstate_pkg && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); - - /* - * If this is the first online thread of that cluster, set it - * in the cluster cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(cpu)); - if (has_cstate_module && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_module_cpu_mask); - - return 0; -} - static const struct attribute_group *core_attr_update[] = { &group_cstate_core_c1, &group_cstate_core_c3, @@ -526,6 +397,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CORE, .module = THIS_MODULE, }; @@ -541,6 +413,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_PKG, .module = THIS_MODULE, }; @@ -556,6 +429,7 @@ static struct pmu cstate_module_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CLUSTER, .module = THIS_MODULE, }; @@ -810,9 +684,6 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); - if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -827,11 +698,6 @@ static int __init cstate_init(void) { int err; - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "perf/x86/cstate:starting", cstate_cpu_init, NULL); - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "perf/x86/cstate:online", NULL, cstate_cpu_exit); - if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { @@ -844,6 +710,8 @@ static int __init cstate_init(void) if (has_cstate_pkg) { if (topology_max_dies_per_package() > 1) { + /* CLX-AP is multi-die and the cstate is die-scope */ + cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE; err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index b4aa8daa4773..fd4670a6694e 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -416,7 +416,7 @@ static bool pt_event_valid(struct perf_event *event) static void pt_config_start(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = event->hw.config; + u64 ctl = event->hw.aux_config; ctl |= RTIT_CTL_TRACEEN; if (READ_ONCE(pt->vmx_on)) @@ -424,7 +424,7 @@ static void pt_config_start(struct perf_event *event) else wrmsrl(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); } /* Address ranges and their corresponding msr configuration registers */ @@ -503,7 +503,7 @@ static void pt_config(struct perf_event *event) u64 reg; /* First round: clear STATUS, in particular the PSB byte counter. */ - if (!event->hw.config) { + if (!event->hw.aux_config) { perf_event_itrace_started(event); wrmsrl(MSR_IA32_RTIT_STATUS, 0); } @@ -533,14 +533,14 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); - event->hw.config = reg; + event->hw.aux_config = reg; pt_config_start(event); } static void pt_config_stop(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = READ_ONCE(event->hw.config); + u64 ctl = READ_ONCE(event->hw.aux_config); /* may be already stopped by a PMI */ if (!(ctl & RTIT_CTL_TRACEEN)) @@ -550,7 +550,7 @@ static void pt_config_stop(struct perf_event *event) if (!READ_ONCE(pt->vmx_on)) wrmsrl(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); /* * A wrmsr that disables trace generation serializes other PT @@ -1557,7 +1557,7 @@ void intel_pt_handle_vmx(int on) /* Turn PTs back on */ if (!on && event) - wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config); + wrmsrl(MSR_IA32_RTIT_CTL, event->hw.aux_config); local_irq_restore(flags); } @@ -1606,6 +1606,7 @@ static void pt_event_stop(struct perf_event *event, int mode) * see comment in intel_pt_interrupt(). */ WRITE_ONCE(pt->handle_nmi, 0); + barrier(); pt_config_stop(event); @@ -1657,11 +1658,10 @@ static long pt_event_snapshot_aux(struct perf_event *event, return 0; /* - * Here, handle_nmi tells us if the tracing is on + * There is no PT interrupt in this mode, so stop the trace and it will + * remain stopped while the buffer is copied. */ - if (READ_ONCE(pt->handle_nmi)) - pt_config_stop(event); - + pt_config_stop(event); pt_read_offset(buf); pt_update_head(pt); @@ -1673,11 +1673,10 @@ static long pt_event_snapshot_aux(struct perf_event *event, ret = perf_output_copy_aux(&pt->handle, handle, from, to); /* - * If the tracing was on when we turned up, restart it. - * Compiler barrier not needed as we couldn't have been - * preempted by anything that touches pt->handle_nmi. + * Here, handle_nmi tells us if the tracing was on. + * If the tracing was on, restart it. */ - if (pt->handle_nmi) + if (READ_ONCE(pt->handle_nmi)) pt_config_start(event); return ret; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 64ca8625eb58..d98fac567684 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1816,6 +1816,11 @@ static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { .mmio_init = adl_uncore_mmio_init, }; +static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { + .cpu_init = lnl_uncore_cpu_init, + .mmio_init = lnl_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun icx_uncore_init __initconst = { .cpu_init = icx_uncore_cpu_init, .pci_init = icx_uncore_pci_init, @@ -1893,6 +1898,10 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 027ef292c602..79ff32e13dcc 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -611,10 +611,12 @@ void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); void adl_uncore_cpu_init(void); +void lnl_uncore_cpu_init(void); void mtl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); void adl_uncore_mmio_init(void); +void lnl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 9462fd9f3b7a..3934e1e4e3b1 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -252,6 +252,7 @@ DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29"); +DEFINE_UNCORE_FORMAT_ATTR(threshold2, threshold, "config:24-31"); /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -746,6 +747,34 @@ void mtl_uncore_cpu_init(void) uncore_msr_uncores = mtl_msr_uncores; } +static struct intel_uncore_type *lnl_msr_uncores[] = { + &mtl_uncore_cbox, + &mtl_uncore_arb, + NULL +}; + +#define LNL_UNC_MSR_GLOBAL_CTL 0x240e + +static void lnl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static struct intel_uncore_ops lnl_uncore_msr_ops = { + .init_box = lnl_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void lnl_uncore_cpu_init(void) +{ + mtl_uncore_cbox.num_boxes = 4; + mtl_uncore_cbox.ops = &lnl_uncore_msr_ops; + uncore_msr_uncores = lnl_msr_uncores; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -1475,39 +1504,45 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) ids++; } + /* Just try to grab 00:00.0 device */ + if (!mc_dev) + mc_dev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); + return mc_dev; } #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 -static void __uncore_imc_init_box(struct intel_uncore_box *box, - unsigned int base_offset) +static void +uncore_get_box_mmio_addr(struct intel_uncore_box *box, + unsigned int base_offset, + int bar_offset, int step) { struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct intel_uncore_pmu *pmu = box->pmu; struct intel_uncore_type *type = pmu->type; resource_size_t addr; - u32 mch_bar; + u32 bar; if (!pdev) { pr_warn("perf uncore: Cannot find matched IMC device.\n"); return; } - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); - /* MCHBAR is disabled */ - if (!(mch_bar & BIT(0))) { - pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + pci_read_config_dword(pdev, bar_offset, &bar); + if (!(bar & BIT(0))) { + pr_warn("perf uncore: BAR 0x%x is disabled. Failed to map %s counters.\n", + bar_offset, type->name); pci_dev_put(pdev); return; } - mch_bar &= ~BIT(0); - addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + bar &= ~BIT(0); + addr = (resource_size_t)(bar + step * pmu->pmu_idx); #ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); - addr |= ((resource_size_t)mch_bar << 32); + pci_read_config_dword(pdev, bar_offset + 4, &bar); + addr |= ((resource_size_t)bar << 32); #endif addr += base_offset; @@ -1518,6 +1553,14 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box, pci_dev_put(pdev); } +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) +{ + uncore_get_box_mmio_addr(box, base_offset, + SNB_UNCORE_PCI_IMC_BAR_OFFSET, + TGL_UNCORE_MMIO_IMC_MEM_OFFSET); +} + static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) { __uncore_imc_init_box(box, 0); @@ -1612,14 +1655,17 @@ static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) writel(0, box->io_addr + uncore_mmio_box_ctl(box)); } +#define MMIO_UNCORE_COMMON_OPS() \ + .exit_box = uncore_mmio_exit_box, \ + .disable_box = adl_uncore_mmio_disable_box, \ + .enable_box = adl_uncore_mmio_enable_box, \ + .disable_event = intel_generic_uncore_mmio_disable_event, \ + .enable_event = intel_generic_uncore_mmio_enable_event, \ + .read_counter = uncore_mmio_read_counter, + static struct intel_uncore_ops adl_uncore_mmio_ops = { .init_box = adl_uncore_imc_init_box, - .exit_box = uncore_mmio_exit_box, - .disable_box = adl_uncore_mmio_disable_box, - .enable_box = adl_uncore_mmio_enable_box, - .disable_event = intel_generic_uncore_mmio_disable_event, - .enable_event = intel_generic_uncore_mmio_enable_event, - .read_counter = uncore_mmio_read_counter, + MMIO_UNCORE_COMMON_OPS() }; #define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 @@ -1703,3 +1749,108 @@ void adl_uncore_mmio_init(void) } /* end of Alder Lake MMIO uncore support */ + +/* Lunar Lake MMIO uncore support */ +#define LNL_UNCORE_PCI_SAFBAR_OFFSET 0x68 +#define LNL_UNCORE_MAP_SIZE 0x1000 +#define LNL_UNCORE_SNCU_BASE 0xE4B000 +#define LNL_UNCORE_SNCU_CTR 0x390 +#define LNL_UNCORE_SNCU_CTRL 0x398 +#define LNL_UNCORE_SNCU_BOX_CTL 0x380 +#define LNL_UNCORE_GLOBAL_CTL 0x700 +#define LNL_UNCORE_HBO_BASE 0xE54000 +#define LNL_UNCORE_HBO_OFFSET -4096 +#define LNL_UNCORE_HBO_CTR 0x570 +#define LNL_UNCORE_HBO_CTRL 0x550 +#define LNL_UNCORE_HBO_BOX_CTL 0x548 + +#define LNL_UNC_CTL_THRESHOLD 0xff000000 +#define LNL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + LNL_UNC_CTL_THRESHOLD) + +static struct attribute *lnl_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_threshold2.attr, + NULL +}; + +static const struct attribute_group lnl_uncore_format_group = { + .name = "format", + .attrs = lnl_uncore_formats_attr, +}; + +static void lnl_uncore_hbo_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_HBO_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + LNL_UNCORE_HBO_OFFSET); +} + +static struct intel_uncore_ops lnl_uncore_hbo_ops = { + .init_box = lnl_uncore_hbo_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_hbo = { + .name = "hbo", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_HBO_CTR, + .event_ctl = LNL_UNCORE_HBO_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_HBO_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_hbo_ops, + .format_group = &lnl_uncore_format_group, +}; + +static void lnl_uncore_sncu_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_SNCU_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + 0); + + if (box->io_addr) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + LNL_UNCORE_GLOBAL_CTL); +} + +static struct intel_uncore_ops lnl_uncore_sncu_ops = { + .init_box = lnl_uncore_sncu_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_sncu = { + .name = "sncu", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_SNCU_CTR, + .event_ctl = LNL_UNCORE_SNCU_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_SNCU_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_sncu_ops, + .format_group = &lnl_uncore_format_group, +}; + +static struct intel_uncore_type *lnl_mmio_uncores[] = { + &adl_uncore_imc, + &lnl_uncore_hbo, + &lnl_uncore_sncu, + &adl_uncore_imc_free_running, + NULL +}; + +void lnl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = lnl_mmio_uncores; +} + +/* end of Lunar Lake MMIO uncore support */ diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 868b724a3b75..d84e21daa991 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -124,7 +124,6 @@ struct idxd_pmu { struct pmu pmu; char name[IDXD_NAME_SIZE]; - int cpu; int n_counters; int counter_width; @@ -135,8 +134,6 @@ struct idxd_pmu { unsigned long supported_filters; int n_filters; - - struct hlist_node cpuhp_node; }; #define IDXD_MAX_PRIORITY 0xf @@ -803,14 +800,10 @@ void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); int perfmon_pmu_init(struct idxd_device *idxd); void perfmon_pmu_remove(struct idxd_device *idxd); void perfmon_counter_overflow(struct idxd_device *idxd); -void perfmon_init(void); -void perfmon_exit(void); #else static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} -static inline void perfmon_init(void) {} -static inline void perfmon_exit(void) {} #endif /* debugfs */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 21f6905b554d..5725ea82c409 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -878,8 +878,6 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - perfmon_init(); - err = idxd_driver_register(&idxd_drv); if (err < 0) goto err_idxd_driver_register; @@ -928,7 +926,6 @@ static void __exit idxd_exit_module(void) idxd_driver_unregister(&idxd_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); - perfmon_exit(); idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index 5e94247e1ea7..f511cf15845b 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -6,29 +6,6 @@ #include "idxd.h" #include "perfmon.h" -static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, - char *buf); - -static cpumask_t perfmon_dsa_cpu_mask; -static bool cpuhp_set_up; -static enum cpuhp_state cpuhp_slot; - -/* - * perf userspace reads this attribute to determine which cpus to open - * counters on. It's connected to perfmon_dsa_cpu_mask, which is - * maintained by the cpu hotplug handlers. - */ -static DEVICE_ATTR_RO(cpumask); - -static struct attribute *perfmon_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group cpumask_attr_group = { - .attrs = perfmon_cpumask_attrs, -}; - /* * These attributes specify the bits in the config word that the perf * syscall uses to pass the event ids and categories to perfmon. @@ -67,16 +44,9 @@ static struct attribute_group perfmon_format_attr_group = { static const struct attribute_group *perfmon_attr_groups[] = { &perfmon_format_attr_group, - &cpumask_attr_group, NULL, }; -static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); -} - static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) { return &idxd_pmu->pmu == event->pmu; @@ -217,7 +187,6 @@ static int perfmon_pmu_event_init(struct perf_event *event) return -EINVAL; event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); - event->cpu = idxd->idxd_pmu->cpu; event->hw.config = event->attr.config; if (event->group_leader != event) @@ -488,6 +457,7 @@ static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) idxd_pmu->pmu.stop = perfmon_pmu_event_stop; idxd_pmu->pmu.read = perfmon_pmu_event_update; idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE; idxd_pmu->pmu.module = THIS_MODULE; } @@ -496,47 +466,11 @@ void perfmon_pmu_remove(struct idxd_device *idxd) if (!idxd->idxd_pmu) return; - cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); perf_pmu_unregister(&idxd->idxd_pmu->pmu); |
