From 03c4c7f88709fac0e20b6a48357c73d6fc50e544 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 24 Nov 2022 23:05:50 -0500 Subject: perf/x86/lbr: Simplify the exposure check for the LBR_INFO registers The x86_pmu.lbr_info is 0 unless explicitly initialized, so there's no point checking x86_pmu.intel_cap.lbr_format. Signed-off-by: Like Xu Signed-off-by: Yang Weijiang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/20221125040604.5051-2-weijiang.yang@intel.com --- arch/x86/events/intel/lbr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 1f21f576ca77..c3b0d15a9841 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1606,12 +1606,10 @@ clear_arch_lbr: */ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { - int lbr_fmt = x86_pmu.intel_cap.lbr_format; - lbr->nr = x86_pmu.lbr_nr; lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; - lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; + lbr->info = x86_pmu.lbr_info; } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); -- cgit v1.2.3 From 38aaf921e92dc5cf87e4a6c5a4b24dd99155cace Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:42 -0800 Subject: perf/x86: Add Meteor Lake support From PMU's perspective, Meteor Lake is similar to Alder Lake. Both are hybrid platforms, with e-core and p-core. The key differences include: - The e-core supports 2 PDIST GP counters (GP0 & GP1) - New MSRs for the Module Snoop Response Events on the e-core. - New Data Source fields are introduced for the e-core. - There are 8 GP counters for the e-core. - The load latency AUX event is not required for the p-core anymore. - Retire Latency (Support in a separate patch) for both cores. Since most of the code in the intel_pmu_init() should be the same as Alder Lake, to avoid code duplication, share the path with Alder Lake. Add new specific functions of extra_regs, and get_event_constraints to support the OCR events, Module Snoop Response Events and 2 PDIST GP counters on e-core. Add new MTL specific mem_attrs which drops the load latency AUX event. The Data Source field is extended to 4:0, which can contains max 32 sources. The Retire Latency is implemented with a separate patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-2-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 141 ++++++++++++++++++++++++++++++++++++++----- arch/x86/events/intel/ds.c | 70 +++++++++++++++++---- arch/x86/events/perf_event.h | 21 ++++--- 3 files changed, 200 insertions(+), 32 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dfd2c124cdf8..d2030be04e4a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), + INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0), + INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1), + EVENT_EXTRA_END +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -4182,6 +4192,12 @@ static int hsw_hw_config(struct perf_event *event) static struct event_constraint counter0_constraint = INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); +static struct event_constraint counter1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x2); + +static struct event_constraint counter0_1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x3); + static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); @@ -4191,6 +4207,9 @@ static struct event_constraint fixed0_constraint = static struct event_constraint fixed0_counter0_constraint = INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); +static struct event_constraint fixed0_counter0_1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL); + static struct event_constraint * hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -4322,6 +4341,54 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return &emptyconstraint; } +static struct event_constraint * +cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_get_event_constraints(cpuc, idx, event); + + /* + * The :ppp indicates the Precise Distribution (PDist) facility, which + * is only supported on the GP counter 0 & 1 and Fixed counter 0. + * If a :ppp event which is not available on the above eligible counters, + * error out. + */ + if (event->attr.precise_ip == 3) { + /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ + if (constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_counter0_1_constraint; + + switch (c->idxmsk64 & 0x3ull) { + case 0x1: + return &counter0_constraint; + case 0x2: + return &counter1_constraint; + case 0x3: + return &counter0_1_constraint; + } + return &emptyconstraint; + } + + return c; +} + +static struct event_constraint * +mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return spr_get_event_constraints(cpuc, idx, event); + if (pmu->cpu_type == hybrid_small) + return cmt_get_event_constraints(cpuc, idx, event); + + WARN_ON(1); + return &emptyconstraint; +} + static int adl_hw_config(struct perf_event *event) { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); @@ -5463,6 +5530,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = { NULL, }; +static struct attribute *mtl_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_adl), + EVENT_PTR(mem_st_adl), + NULL +}; + EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); @@ -5490,20 +5563,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); FORMAT_ATTR_HYBRID(frontend, hybrid_big); +#define ADL_HYBRID_RTM_FORMAT_ATTR \ + FORMAT_HYBRID_PTR(in_tx), \ + FORMAT_HYBRID_PTR(in_tx_cp) + +#define ADL_HYBRID_FORMAT_ATTR \ + FORMAT_HYBRID_PTR(offcore_rsp), \ + FORMAT_HYBRID_PTR(ldlat), \ + FORMAT_HYBRID_PTR(frontend) + static struct attribute *adl_hybrid_extra_attr_rtm[] = { - FORMAT_HYBRID_PTR(in_tx), - FORMAT_HYBRID_PTR(in_tx_cp), - FORMAT_HYBRID_PTR(offcore_rsp), - FORMAT_HYBRID_PTR(ldlat), - FORMAT_HYBRID_PTR(frontend), - NULL, + ADL_HYBRID_RTM_FORMAT_ATTR, + ADL_HYBRID_FORMAT_ATTR, + NULL }; static struct attribute *adl_hybrid_extra_attr[] = { - FORMAT_HYBRID_PTR(offcore_rsp), - FORMAT_HYBRID_PTR(ldlat), - FORMAT_HYBRID_PTR(frontend), - NULL, + ADL_HYBRID_FORMAT_ATTR, + NULL +}; + +PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63"); +FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); + +static struct attribute *mtl_hybrid_extra_attr_rtm[] = { + ADL_HYBRID_RTM_FORMAT_ATTR, + ADL_HYBRID_FORMAT_ATTR, + FORMAT_HYBRID_PTR(snoop_rsp), + NULL +}; + +static struct attribute *mtl_hybrid_extra_attr[] = { + ADL_HYBRID_FORMAT_ATTR, + FORMAT_HYBRID_PTR(snoop_rsp), + NULL }; static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) @@ -5725,6 +5818,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) } } +static __always_inline bool is_mtl(u8 x86_model) +{ + return (x86_model == INTEL_FAM6_METEORLAKE) || + (x86_model == INTEL_FAM6_METEORLAKE_L); +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6381,6 +6480,8 @@ __init int intel_pmu_init(void) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * @@ -6400,9 +6501,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_adl(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.num_topdown_events = 8; static_call_update(intel_pmu_update_topdown_event, @@ -6489,8 +6588,22 @@ __init int intel_pmu_init(void) pmu->event_constraints = intel_slm_event_constraints; pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; - pr_cont("Alderlake Hybrid events, "); - name = "alderlake_hybrid"; + if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.pebs_latency_data = mtl_latency_data_small; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + mem_attr = mtl_hybrid_mem_attrs; + intel_pmu_pebs_data_source_mtl(); + x86_pmu.get_event_constraints = mtl_get_event_constraints; + pmu->extra_regs = intel_cmt_extra_regs; + pr_cont("Meteorlake Hybrid events, "); + name = "meteorlake_hybrid"; + } else { + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + intel_pmu_pebs_data_source_adl(); + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + } break; default: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 88e58b6ee73c..e991c54916d1 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -53,6 +53,13 @@ union intel_x86_pebs_dse { unsigned int st_lat_locked:1; unsigned int ld_reserved3:26; }; + struct { + unsigned int mtl_dse:5; + unsigned int mtl_locked:1; + unsigned int mtl_stlb_miss:1; + unsigned int mtl_fwd_blk:1; + unsigned int ld_reserved4:24; + }; }; @@ -135,6 +142,29 @@ void __init intel_pmu_pebs_data_source_adl(void) __intel_pmu_pebs_data_source_grt(data_source); } +static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source) +{ + data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_mtl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + intel_pmu_pebs_data_source_cmt(data_source); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -219,24 +249,19 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) } /* Retrieve the latency data for e-core of ADL */ -u64 adl_latency_data_small(struct perf_event *event, u64 status) +static u64 __adl_latency_data_small(struct perf_event *event, u64 status, + u8 dse, bool tlb, bool lock, bool blk) { - union intel_x86_pebs_dse dse; u64 val; WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); - dse.val = status; - - val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + dse &= PERF_PEBS_DATA_SOURCE_MASK; + val = hybrid_var(event->pmu, pebs_data_source)[dse]; - /* - * For the atom core on ADL, - * bit 4: lock, bit 5: TLB access. - */ - pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + pebs_set_tlb_lock(&val, tlb, lock); - if (dse.ld_data_blk) + if (blk) val |= P(BLK, DATA); else val |= P(BLK, NA); @@ -244,6 +269,29 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status) return val; } +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + + dse.val = status; + + return __adl_latency_data_small(event, status, dse.ld_dse, + dse.ld_locked, dse.ld_stlb_miss, + dse.ld_data_blk); +} + +/* Retrieve the latency data for e-core of MTL */ +u64 mtl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + + dse.val = status; + + return __adl_latency_data_small(event, status, dse.mtl_dse, + dse.mtl_stlb_miss, dse.mtl_locked, + dse.mtl_fwd_blk); +} + static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 0e849f28a5c1..1ac9d9e3c55c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -35,15 +35,17 @@ * per-core reg tables. */ enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ + EXTRA_REG_NONE = -1, /* not used */ - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - EXTRA_REG_LBR = 2, /* lbr_select */ - EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ - EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */ + EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */ - EXTRA_REG_MAX /* number of entries needed */ + EXTRA_REG_MAX /* number of entries needed */ }; struct event_constraint { @@ -647,6 +649,7 @@ enum { }; #define PERF_PEBS_DATA_SOURCE_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) struct x86_hybrid_pmu { struct pmu pmu; @@ -1486,6 +1489,8 @@ int intel_pmu_drain_bts_buffer(void); u64 adl_latency_data_small(struct perf_event *event, u64 status); +u64 mtl_latency_data_small(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1597,6 +1602,8 @@ void intel_pmu_pebs_data_source_adl(void); void intel_pmu_pebs_data_source_grt(void); +void intel_pmu_pebs_data_source_mtl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); -- cgit v1.2.3 From c87a31093c707eb0b8c48aab89922c1d0bf4bd90 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:43 -0800 Subject: perf/x86: Support Retire Latency Retire Latency reports the number of elapsed core clocks between the retirement of the instruction indicated by the Instruction Pointer field of the PEBS record and the retirement of the prior instruction. It's enumerated by the IA32_PERF_CAPABILITIES.PEBS_TIMING_INFO[17]. Add flag PMU_FL_RETIRE_LATENCY to indicate the availability of the feature. The Retire Latency is not supported by the fixed counter 0 on p-core of MTL. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-3-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 32 +++++++++++++++++++++++++++++++- arch/x86/events/intel/ds.c | 4 ++++ arch/x86/events/perf_event.h | 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d2030be04e4a..a5678ab6d3e3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4210,6 +4210,9 @@ static struct event_constraint fixed0_counter0_constraint = static struct event_constraint fixed0_counter0_1_constraint = INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL); +static struct event_constraint counters_1_7_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL); + static struct event_constraint * hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -4374,6 +4377,30 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static struct event_constraint * +rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = spr_get_event_constraints(cpuc, idx, event); + + /* The Retire Latency is not supported by the fixed counter 0. */ + if (event->attr.precise_ip && + (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) && + constraint_match(&fixed0_constraint, event->hw.config)) { + /* + * The Instruction PDIR is only available + * on the fixed counter 0. Error out for this case. + */ + if (event->attr.precise_ip == 3) + return &emptyconstraint; + return &counters_1_7_constraint; + } + + return c; +} + static struct event_constraint * mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -4381,7 +4408,7 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); if (pmu->cpu_type == hybrid_big) - return spr_get_event_constraints(cpuc, idx, event); + return rwc_get_event_constraints(cpuc, idx, event); if (pmu->cpu_type == hybrid_small) return cmt_get_event_constraints(cpuc, idx, event); @@ -6718,6 +6745,9 @@ __init int intel_pmu_init(void) if (is_hybrid()) intel_pmu_check_hybrid_pmus((u64)fixed_mask); + if (x86_pmu.intel_cap.pebs_timing_info) + x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; + intel_aux_output_init(); return 0; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e991c54916d1..6ec326b47e2e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1753,6 +1753,7 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #define PEBS_LATENCY_MASK 0xffff #define PEBS_CACHE_LATENCY_OFFSET 32 +#define PEBS_RETIRE_LATENCY_OFFSET 32 /* * With adaptive PEBS the layout depends on what fields are configured. @@ -1804,6 +1805,9 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, set_linear_ip(regs, basic->ip); regs->flags = PERF_EFLAGS_EXACT; + if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)) + data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; + /* * The record for MEMINFO is in front of GP * But PERF_SAMPLE_TRANSACTION needs gprs->ax. diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 1ac9d9e3c55c..d6de4487348c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -608,6 +608,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 pebs_timing_info:1; u64 anythread_deprecated:1; }; u64 capabilities; @@ -1003,6 +1004,7 @@ do { \ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ +#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -- cgit v1.2.3 From eb467aaac21e133a3d01c48c0a6bf43756b06e78 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:45 -0800 Subject: perf/x86/intel: Support Architectural PerfMon Extension leaf The new CPUID leaf 0x23 reports the "true view" of PMU resources. The sub-leaf 1 reports the available general-purpose counters and fixed counters. Update the number of counters and fixed counters when the sub-leaf is detected. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-5-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a5678ab6d3e3..29d2d0411caf 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4588,6 +4588,25 @@ static void flip_smm_bit(void *data) } } +static void intel_pmu_check_num_counters(int *num_counters, + int *num_counters_fixed, + u64 *intel_ctrl, u64 fixed_mask); + +static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +{ + unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); + unsigned int eax, ebx, ecx, edx; + + if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, + &eax, &ebx, &ecx, &edx); + pmu->num_counters = fls(eax); + pmu->num_counters_fixed = fls(ebx); + intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, + &pmu->intel_ctrl, ebx); + } +} + static bool init_hybrid_pmu(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -4613,6 +4632,9 @@ static bool init_hybrid_pmu(int cpu) if (!cpumask_empty(&pmu->supported_cpus)) goto end; + if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(pmu); + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) return false; -- cgit v1.2.3 From eaef048c281bf7eaecdfde96d9b305b8644c9f66 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:46 -0800 Subject: perf/x86/cstate: Add Meteor Lake support Meteor Lake is Intel's successor to Raptor lake. From the perspective of Intel cstate residency counters, there is nothing changed compared with Raptor lake. Share adl_cstates with Raptor lake. Update the comments for Meteor Lake. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-6-kan.liang@linux.intel.com --- arch/x86/events/intel/cstate.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index a2834bc93149..3019fb1926e3 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,6 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,50 +52,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL + * ICL,TGL,RKL,ADL,RPL,MTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR + * RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL + * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) * */ @@ -686,6 +687,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -- cgit v1.2.3 From b0bd3336d87f3403094fbadc7803c1d5bf3df4f7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:47 -0800 Subject: perf/x86/msr: Add Meteor Lake support Meteor Lake is Intel's successor to Raptor lake. PPERF and SMI_COUNT MSRs are also supported. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230104201349.1451191-7-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/events') diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ecced3a52668..074150d28fa8 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -107,6 +107,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; -- cgit v1.2.3 From 31046500c1864b8ab25d1b9846ad10aa3f7b1821 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:53 -0800 Subject: perf/core: Add perf_sample_save_callchain() helper When we save the callchain to the perf sample data, we need to update the sample flags and the dynamic size. To ensure this is done consistently, add the perf_sample_save_callchain() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-3-namhyung@kernel.org --- arch/x86/events/amd/ibs.c | 6 ++---- arch/x86/events/intel/ds.c | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index da3f5ebac4e1..417c80bd3274 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1122,10 +1122,8 @@ fail: * recorded as part of interrupt regs. Thus we need to use rip from * interrupt regs while unwinding call stack. */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { - data.callchain = perf_callchain(event, iregs); - data.sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); out: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 6ec326b47e2e..158cf845fc80 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1617,10 +1617,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - data->callchain = perf_callchain(event, iregs); - data->sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(data, event, iregs); /* * We use the interrupt regs as a base because the PEBS record does not @@ -1795,10 +1793,8 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - data->callchain = perf_callchain(event, iregs); - data->sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(data, event, iregs); *regs = *iregs; /* The ip in basic is EventingIP */ -- cgit v1.2.3 From 0a9081cf0a11770f6b0affd377db8caa3ec4c793 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:54 -0800 Subject: perf/core: Add perf_sample_save_raw_data() helper When we save the raw_data to the perf sample data, we need to update the sample flags and the dynamic size. To make sure this is done consistently, add the perf_sample_save_raw_data() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-4-namhyung@kernel.org --- arch/x86/events/amd/ibs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 417c80bd3274..64582954b5f6 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1110,8 +1110,7 @@ fail: .data = ibs_data.data, }, }; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } if (perf_ibs == &perf_ibs_op) -- cgit v1.2.3 From eb55b455ef9c7123bdfa7e8a7f1ebeaa8034eb83 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:55 -0800 Subject: perf/core: Add perf_sample_save_brstack() helper When we saves the branch stack to the perf sample data, we needs to update the sample flags and the dynamic size. To make sure this is done consistently, add the perf_sample_save_brstack() helper and convert all call sites. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Athira Rajeev Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-5-namhyung@kernel.org --- arch/x86/events/amd/core.c | 6 ++---- arch/x86/events/intel/core.c | 6 ++---- arch/x86/events/intel/ds.c | 9 +++------ 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 4386b10682ce..8c45b198b62f 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (has_branch_stack(event)) { - data.br_stack = &cpuc->lbr_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 29d2d0411caf..14f0a746257d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3036,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) { - data.br_stack = &cpuc->lbr_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 158cf845fc80..07c8a2cdc3ee 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1720,10 +1720,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, data->sample_flags |= PERF_SAMPLE_TIME; } - if (has_branch_stack(event)) { - data->br_stack = &cpuc->lbr_stack; - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(data, event, &cpuc->lbr_stack); } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1883,8 +1881,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); - data->br_stack = &cpuc->lbr_stack; - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + perf_sample_save_brstack(data, event, &cpuc->lbr_stack); } } -- cgit v1.2.3 From f6e707156e1d5d150f288823987bee1ba0104c4c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Jan 2023 22:05:58 -0800 Subject: perf/core: Introduce perf_prepare_header() Factor out perf_prepare_header() so that it can call perf_prepare_sample() without a header if not needed. Also it checks the filtered_sample_type to avoid duplicate work when perf_prepare_sample() is called twice (or more). Suggested-by: Peter Zijlstr Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Jiri Olsa Acked-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230118060559.615653-8-namhyung@kernel.org --- arch/x86/events/intel/ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 07c8a2cdc3ee..183efa914b99 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -807,7 +807,8 @@ int intel_pmu_drain_bts_buffer(void) * the sample. */ rcu_read_lock(); - perf_prepare_sample(&header, &data, event, ®s); + perf_prepare_sample(&data, event, ®s); + perf_prepare_header(&header, &data, event, ®s); if (perf_output_begin(&handle, &data, event, header.size * (top - base - skip))) -- cgit v1.2.3 From dbf061b26221fa1a99e6489dd61f5b4ee97a24e8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jan 2023 12:01:01 -0800 Subject: perf/x86/uncore: Factor out uncore_device_to_die() The same code is used to retrieve the logical die ID with a given PCI device in both the discovery code and the code that supports a system with > 8 nodes. Factor out uncore_device_to_die() to replace the duplicate code. No functional change. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Petlan Link: https://lore.kernel.org/r/20230112200105.733466-2-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 15 +++++++++++++++ arch/x86/events/intel/uncore.h | 1 + arch/x86/events/intel/uncore_discovery.c | 16 ++-------------- arch/x86/events/intel/uncore_snbep.c | 12 +----------- 4 files changed, 19 insertions(+), 25 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 459b1aafd4d4..eeaa92f06b44 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -65,6 +65,21 @@ int uncore_die_to_segment(int die) return bus ? pci_domain_nr(bus) : -EINVAL; } +int uncore_device_to_die(struct pci_dev *dev) +{ + int node = pcibus_to_node(dev->bus); + int cpu; + + for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) + return c->logical_die_id; + } + + return -1; +} + static void uncore_free_pcibus_map(void) { struct pci2phy_map *map, *tmp; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e278e2e7c051..8d493bea9eb6 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -208,6 +208,7 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_dieid(struct pci_bus *bus); int uncore_die_to_segment(int die); +int uncore_device_to_die(struct pci_dev *dev); ssize_t uncore_event_show(struct device *dev, struct device_attribute *attr, char *buf); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 5fd72d4b8bbb..08af92af2be2 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -33,7 +33,7 @@ static int logical_die_id; static int get_device_die_id(struct pci_dev *dev) { - int cpu, node = pcibus_to_node(dev->bus); + int node = pcibus_to_node(dev->bus); /* * If the NUMA info is not available, assume that the logical die id is @@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev) if (node < 0) return logical_die_id++; - for_each_cpu(cpu, cpumask_of_node(node)) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->initialized && cpu_to_node(cpu) == node) - return c->logical_die_id; - } - - /* - * All CPUs of a node may be offlined. For this case, - * the PCI and MMIO type of uncore blocks which are - * enumerated by the device will be unavailable. - */ - return -1; + return uncore_device_to_die(dev); } #define __node_2_type(cur) \ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 44c2f879f708..31acc96ccb69 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1453,9 +1453,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool } raw_spin_unlock(&pci2phy_map_lock); } else { - int node = pcibus_to_node(ubox_dev->bus); - int cpu; - segment = pci_domain_nr(ubox_dev->bus); raw_spin_lock(&pci2phy_map_lock); map = __find_pci2phy_map(segment); @@ -1465,15 +1462,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool break; } - die_id = -1; - for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) { - struct cpuinfo_x86 *c = &cpu_data(cpu); + map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev); - if (c->initialized && cpu_to_node(cpu) == node) { - map->pbus_to_dieid[bus] = die_id = c->logical_die_id; - break; - } - } raw_spin_unlock(&pci2phy_map_lock); if (WARN_ON_ONCE(die_id == -1)) { -- cgit v1.2.3 From 3af548f2361077cd53762c88d62343d4e8ea1efb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jan 2023 12:01:02 -0800 Subject: perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name The current code assumes that the discovery table provides valid box_ids for the normal units. It's not the case anymore since some units in the discovery table are broken on some SPR variants. Factor out uncore_get_box_id(). Check the existence of the type->box_ids before using it. If it's not available, use pmu_idx. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Petlan Link: https://lore.kernel.org/r/20230112200105.733466-3-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index eeaa92f06b44..271c01621303 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -857,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; +static inline int uncore_get_box_id(struct intel_uncore_type *type, + struct intel_uncore_pmu *pmu) +{ + return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; +} + void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) { struct intel_uncore_type *type = pmu->type; @@ -865,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) sprintf(pmu_name, "uncore_type_%u", type->type_id); else { sprintf(pmu_name, "uncore_type_%u_%d", - type->type_id, type->box_ids[pmu->pmu_idx]); + type->type_id, uncore_get_box_id(type, pmu)); } } @@ -892,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) * Use the box ID from the discovery table if applicable. */ sprintf(pmu->name, "uncore_%s_%d", type->name, - type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + uncore_get_box_id(type, pmu)); } } -- cgit v1.2.3 From bd9514a4d5ec25b29728720ca8b3a9ac4e3137d7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jan 2023 12:01:03 -0800 Subject: perf/x86/uncore: Ignore broken units in discovery table Some units in a discovery table may be broken, e.g., UPI of SPR MCC. A generic method is required to ignore the broken units. Add uncore_units_ignore in the struct intel_uncore_init_fun, which indicates the type ID of broken units. It will be assigned by the platform-specific code later when the platform has a broken discovery table. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Petlan Link: https://lore.kernel.org/r/20230112200105.733466-4-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 8 ++++++-- arch/x86/events/intel/uncore.h | 2 ++ arch/x86/events/intel/uncore_discovery.c | 26 +++++++++++++++++++++++--- arch/x86/events/intel/uncore_discovery.h | 2 +- 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 271c01621303..d9a2ef5eb182 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1695,7 +1695,10 @@ struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); + /* Discovery table is required */ bool use_discovery; + /* The units in the discovery table should be ignored. */ + int *uncore_units_ignore; }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1874,7 +1877,7 @@ static int __init intel_uncore_init(void) id = x86_match_cpu(intel_uncore_match); if (!id) { - if (!uncore_no_discover && intel_uncore_has_discovery_tables()) + if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; else return -ENODEV; @@ -1882,7 +1885,8 @@ static int __init intel_uncore_init(void) uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_no_discover && uncore_init->use_discovery) return -ENODEV; - if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + if (uncore_init->use_discovery && + !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) return -ENODEV; } diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 8d493bea9eb6..bbaa57cd868d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -34,6 +34,8 @@ #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +#define UNCORE_IGNORE_END -1 + struct pci_extra_dev { struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; }; diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 08af92af2be2..abb51191f5af 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -190,8 +190,25 @@ free_box_offset: } +static bool +uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore) +{ + int i; + + if (!ignore) + return false; + + for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) { + if (unit->box_type == ignore[i]) + return true; + } + + return false; +} + static int parse_discovery_table(struct pci_dev *dev, int die, - u32 bar_offset, bool *parsed) + u32 bar_offset, bool *parsed, + int *ignore) { struct uncore_global_discovery global; struct uncore_unit_discovery unit; @@ -246,6 +263,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die, if (unit.access_type >= UNCORE_ACCESS_MAX) continue; + if (uncore_ignore_unit(&unit, ignore)) + continue; + uncore_insert_box_info(&unit, die, *parsed); } @@ -254,7 +274,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die, return 0; } -bool intel_uncore_has_discovery_tables(void) +bool intel_uncore_has_discovery_tables(int *ignore) { u32 device, val, entry_id, bar_offset; int die, dvsec = 0, ret = true; @@ -290,7 +310,7 @@ bool intel_uncore_has_discovery_tables(void) if (die < 0) continue; - parse_discovery_table(dev, die, bar_offset, &parsed); + parse_discovery_table(dev, die, bar_offset, &parsed, ignore); } } diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index f4439357779a..41637022b5d1 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -122,7 +122,7 @@ struct intel_uncore_discovery_type { unsigned int *box_offset; /* Box offset */ }; -bool intel_uncore_has_discovery_tables(void); +bool intel_uncore_has_discovery_tables(int *ignore); void intel_uncore_clear_discovery_tables(void); void intel_uncore_generic_uncore_cpu_init(void); int intel_uncore_generic_uncore_pci_init(void); -- cgit v1.2.3 From 65248a9a9ee1ea2e6aad8f35253ffd9096ab81b7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jan 2023 12:01:04 -0800 Subject: perf/x86/uncore: Add a quirk for UPI on SPR The discovery table of UPI on some SPR variants, e.g., MCC, is broken. The third UPI table may includes a wrong address which points to a non-exists device. The bug impacts both UPI and M3UPI uncore PMON. Use a pre-defined UPI and M3UPI table to replace the broken table. Different BIOS may populate a device into a different domain or a different BUS. The accurate location can only be retrieved at load time. Add spr_update_device_location() to update the location of the UPI and M3UPI in the pre-defined table. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Petlan Link: https://lore.kernel.org/r/20230112200105.733466-5-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore.c | 1 + arch/x86/events/intel/uncore.h | 1 + arch/x86/events/intel/uncore_discovery.h | 12 ++- arch/x86/events/intel/uncore_snbep.c | 146 ++++++++++++++++++++++++++----- 4 files changed, 136 insertions(+), 24 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d9a2ef5eb182..4030b0b2ef97 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1806,6 +1806,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = { .pci_init = spr_uncore_pci_init, .mmio_init = spr_uncore_mmio_init, .use_discovery = true, + .uncore_units_ignore = spr_uncore_units_ignore, }; static const struct intel_uncore_init_fun generic_uncore_init __initconst = { diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index bbaa57cd868d..e76b945a079e 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -592,6 +592,7 @@ extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; +extern int spr_uncore_units_ignore[]; /* uncore_snb.c */ int snb_uncore_pci_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 41637022b5d1..6ee80ad3423e 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -21,9 +21,15 @@ /* Global discovery table size */ #define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 -#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7) -#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff) -#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff) +#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET 28 +#define UNCORE_DISCOVERY_PCI_DOMAIN(data) \ + ((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7) +#define UNCORE_DISCOVERY_PCI_BUS_OFFSET 20 +#define UNCORE_DISCOVERY_PCI_BUS(data) \ + ((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff) +#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET 12 +#define UNCORE_DISCOVERY_PCI_DEVFN(data) \ + ((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff) #define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 31acc96ccb69..7d1199554fe3 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6132,24 +6132,6 @@ static int spr_upi_get_topology(struct intel_uncore_type *type) return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0); } -static struct intel_uncore_type spr_uncore_upi = { - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, - .format_group = &spr_uncore_raw_format_group, - .ops = &spr_uncore_pci_ops, - .name = "upi", - .attr_update = spr_upi_attr_update, - .get_topology = spr_upi_get_topology, - .set_mapping = spr_upi_set_mapping, - .cleanup_mapping = spr_upi_cleanup_mapping, -}; - -static struct intel_uncore_type spr_uncore_m3upi = { - SPR_UNCORE_PCI_COMMON_FORMAT(), - .name = "m3upi", - .constraints = icx_uncore_m3upi_constraints, -}; - static struct intel_uncore_type spr_uncore_mdf = { SPR_UNCORE_COMMON_FORMAT(), .name = "mdf", @@ -6158,7 +6140,13 @@ static struct intel_uncore_type spr_uncore_mdf = { #define UNCORE_SPR_NUM_UNCORE_TYPES 12 #define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IMC 6 +#define UNCORE_SPR_UPI 8 +#define UNCORE_SPR_M3UPI 9 +/* + * The uncore units, which are supported by the discovery table, + * are defined here. + */ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { &spr_uncore_chabox, &spr_uncore_iio, @@ -6168,12 +6156,56 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { NULL, &spr_uncore_imc, &spr_uncore_m2m, - &spr_uncore_upi, - &spr_uncore_m3upi, + NULL, + NULL, NULL, &spr_uncore_mdf, }; +/* + * The uncore units, which are not supported by the discovery table, + * are implemented from here. + */ +#define SPR_UNCORE_UPI_NUM_BOXES 4 + +static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { + 0, 0x8000, 0x10000, 0x18000 +}; + +static struct intel_uncore_type spr_uncore_upi = { + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .format_group = &spr_uncore_raw_format_group, + .ops = &spr_uncore_pci_ops, + .name = "upi", + .attr_update = spr_upi_attr_update, + .get_topology = spr_upi_get_topology, + .set_mapping = spr_upi_set_mapping, + .cleanup_mapping = spr_upi_cleanup_mapping, + .type_id = UNCORE_SPR_UPI, + .num_counters = 4, + .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, + .perf_ctr_bits = 48, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0, + .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, + .pci_offsets = spr_upi_pci_offsets, +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", + .type_id = UNCORE_SPR_M3UPI, + .num_counters = 4, + .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, + .perf_ctr_bits = 48, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, + .pci_offsets = spr_upi_pci_offsets, + .constraints = icx_uncore_m3upi_constraints, +}; + enum perf_uncore_spr_iio_freerunning_type_id { SPR_IIO_MSR_IOCLK, SPR_IIO_MSR_BW_IN, @@ -6304,6 +6336,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = { #define UNCORE_SPR_MSR_EXTRA_UNCORES 1 #define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 +#define UNCORE_SPR_PCI_EXTRA_UNCORES 2 static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { &spr_uncore_iio_free_running, @@ -6313,6 +6346,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] &spr_uncore_imc_free_running, }; +static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = { + &spr_uncore_upi, + &spr_uncore_m3upi +}; + +int spr_uncore_units_ignore[] = { + UNCORE_SPR_UPI, + UNCORE_SPR_M3UPI, + UNCORE_IGNORE_END +}; + static void uncore_type_customized_copy(struct intel_uncore_type *to_type, struct intel_uncore_type *from_type) { @@ -6413,9 +6457,69 @@ void spr_uncore_cpu_init(void) spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); } +#define SPR_UNCORE_UPI_PCIID 0x3241 +#define SPR_UNCORE_UPI0_DEVFN 0x9 +#define SPR_UNCORE_M3UPI_PCIID 0x3246 +#define SPR_UNCORE_M3UPI0_DEVFN 0x29 + +static void spr_update_device_location(int type_id) +{ + struct intel_uncore_type *type; + struct pci_dev *dev = NULL; + u32 device, devfn; + u64 *ctls; + int die; + + if (type_id == UNCORE_SPR_UPI) { + type = &spr_uncore_upi; + device = SPR_UNCORE_UPI_PCIID; + devfn = SPR_UNCORE_UPI0_DEVFN; + } else if (type_id == UNCORE_SPR_M3UPI) { + type = &spr_uncore_m3upi; + device = SPR_UNCORE_M3UPI_PCIID; + devfn = SPR_UNCORE_M3UPI0_DEVFN; + } else + return; + + ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); + if (!ctls) { + type->num_boxes = 0; + return; + } + + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { + if (devfn != dev->devfn) + continue; + + die = uncore_device_to_die(dev); + if (die < 0) + continue; + + ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET | + dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET | + devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET | + type->box_ctl; + } + + type->box_ctls = ctls; +} + int spr_uncore_pci_init(void) { - uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + /* + * The discovery table of UPI on some SPR variant is broken, + * which impacts the detection of both UPI and M3UPI uncore PMON. + * Use the pre-defined UPI and M3UPI table to replace. + * + * The accurate location, e.g., domain and BUS number, + * can only be retrieved at load time. + * Update the location of UPI and M3UPI. + */ + spr_update_device_location(UNCORE_SPR_UPI); + spr_update_device_location(UNCORE_SPR_M3UPI); + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, + UNCORE_SPR_PCI_EXTRA_UNCORES, + spr_pci_uncores); return 0; } -- cgit v1.2.3 From 5d515ee40cb57ea5331998f27df7946a69f14dc3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jan 2023 12:01:05 -0800 Subject: perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table The kernel warning message is triggered, when SPR MCC is used. [ 17.945331] ------------[ cut here ]------------ [ 17.946305] WARNING: CPU: 65 PID: 1 at arch/x86/events/intel/uncore_discovery.c:184 intel_uncore_has_discovery_tables+0x4c0/0x65c [ 17.946305] Modules linked in: [ 17.946305] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.4.17-2136.313.1-X10-2c+ #4 It's caused by the broken discovery table of UPI. The discovery tables are from hardware. Except for dropping the broken information, there is nothing Linux can do. Using WARN_ON_ONCE() is overkilled. Use the pr_info() to replace WARN_ON_ONCE(), and specify what uncore unit is dropped and the reason. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Petlan Link: https://lore.kernel.org/r/20230112200105.733466-6-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_discovery.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index abb51191f5af..cb488e41807c 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -128,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, unsigned int *box_offset, *ids; int i; - if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset)) + if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { + pr_info("Invalid address is detected for uncore type %d box %d, " + "Disable the uncore unit.\n", + unit->box_type, unit->box_id); return; + } if (parsed) { type = search_uncore_discovery_type(unit->box_type); - if (WARN_ON_ONCE(!type)) + if (!type) { + pr_info("A spurious uncore type %d is detected, " + "Disable the uncore type.\n", + unit->box_type); return; + } /* Store the first box of each die */ if (!type->box_ctrl_die[die]) type->box_ctrl_die[die] = unit->ctl; @@ -169,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, ids[i] = type->ids[i]; box_offset[i] = type->box_offset[i]; - if (WARN_ON_ONCE(unit->box_id == ids[i])) + if (unit->box_id == ids[i]) { + pr_info("Duplicate uncore type %d box ID %d is detected, " + "Drop the duplicate uncore unit.\n", + unit->box_type, unit->box_id); goto free_ids; + } } ids[i] = unit->box_id; box_offset[i] = unit->ctl - type->box_ctrl; -- cgit v1.2.3 From 89e97eb8cec0f1af5ebf2380308913256ca7915a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Jan 2023 12:49:25 -0800 Subject: perf/x86/intel/ds: Fix the conversion from TSC to perf time The time order is incorrect when the TSC in a PEBS record is used. $perf record -e cycles:upp dd if=/dev/zero of=/dev/null count=10000 $ perf script --show-task-events perf-exec 0 0.000000: PERF_RECORD_COMM: perf-exec:915/915 dd 915 106.479872: PERF_RECORD_COMM exec: dd:915/915 dd 915 106.483270: PERF_RECORD_EXIT(915:915):(914:914) dd 915 106.512429: 1 cycles:upp: ffffffff96c011b7 [unknown] ([unknown]) ... ... The perf time is from sched_clock_cpu(). The current PEBS code unconditionally convert the TSC to native_sched_clock(). There is a shift between the two clocks. If the TSC is stable, the shift is consistent, __sched_clock_offset. If the TSC is unstable, the shift has to be calculated at runtime. This patch doesn't support the conversion when the TSC is unstable. The TSC unstable case is a corner case and very unlikely to happen. If it happens, the TSC in a PEBS record will be dropped and fall back to perf_event_clock(). Fixes: 47a3aeb39e8d ("perf/x86/intel/pebs: Fix PEBS timestamps overwritten") Reported-by: Namhyung Kim Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/CAM9d7cgWDVAq8-11RbJ2uGfwkKD6fA-OMwOKDrNUrU_=8MgEjg@mail.gmail.com/ --- arch/x86/events/intel/ds.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 183efa914b99..b0354dc869d2 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2,12 +2,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include "../perf_event.h" @@ -1568,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux) return val; } +static void setup_pebs_time(struct perf_event *event, + struct perf_sample_data *data, + u64 tsc) +{ + /* Converting to a user-defined clock is not supported yet. */ + if (event->attr.use_clockid != 0) + return; + + /* + * Doesn't support the conversion when the TSC is unstable. + * The TSC unstable case is a corner case and very unlikely to + * happen. If it happens, the TSC in a PEBS record will be + * dropped and fall back to perf_event_clock(). + */ + if (!using_native_sched_clock() || !sched_clock_stable()) + return; + + data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset; + data->sample_flags |= PERF_SAMPLE_TIME; +} + #define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_DATA_PAGE_SIZE) @@ -1715,11 +1738,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * * We can only do this for the default trace clock. */ - if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) { - data->time = native_sched_clock_from_tsc(pebs->tsc); - data->sample_flags |= PERF_SAMPLE_TIME; - } + if (x86_pmu.intel_cap.pebs_format >= 3) + setup_pebs_time(event, data, pebs->tsc); if (has_branch_stack(event)) perf_sample_save_brstack(data, event, &cpuc->lbr_stack); @@ -1781,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) { - data->time = native_sched_clock_from_tsc(basic->tsc); - data->sample_flags |= PERF_SAMPLE_TIME; - } + setup_pebs_time(event, data, basic->tsc); /* * We must however always use iregs for the unwinder to stay sane; the -- cgit v1.2.3 From fd636b6a9bc6034f2e5bb869658898a2b472c037 Mon Sep 17 00:00:00 2001 From: silviazhao Date: Wed, 8 Feb 2023 16:27:22 +0800 Subject: x86/perf/zhaoxin: Add stepping check for ZXC Some of Nano series processors will lead GP when accessing PMC fixed counter. Meanwhile, their hardware support for PMC has not announced externally. So exclude Nano CPUs from ZXC by checking stepping information. This is an unambiguous way to differentiate between Z