// SPDX-License-Identifier: GPL-2.0-only
#include <linux/perf_event.h>
#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#include "../perf_event.h"
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
static unsigned long perf_nmi_window;
/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
static u64 amd_pmu_global_cntr_mask __read_mostly;
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
[ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
[ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
static __initconst const u64 amd_hw_cache_event_ids_f17h
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
[C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
[C(RESULT_MISS)] = 0,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */
[C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
[C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
[C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */
[C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0,
[C(RESULT_MISS)] = 0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
};
/*
* AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
[PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
};
/*
* AMD Performance Monitor Family 17h and later:
*/
static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
static u64 amd_pmu_event_map(int hw_event)
{
if (boot_cpu_data.x86 >= 0x17)
return amd_f17h_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
/*
* Previously calculated offsets
*/
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
/*
* Legacy CPUs:
* 4 counters starting at 0xc0010000 each offset by 1
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
*/
static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
int offset;
if (!index)
return index;
if (eventsel)
offset = event_offsets[index];
else
offset = count_offsets[index];
if (offset)
return offset;
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
offset = index;
else
offset = index << 1;
if (eventsel)
event_offsets[index] = offset;
else
count_offsets[index] = offset;
return offset;
}
/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
{
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
}
static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
{
if (!(x86_pmu.flags & PMU_FL_PA
|