diff options
| author | Russell King <rmk+kernel@arm.linux.org.uk> | 2010-05-17 17:21:23 +0100 |
|---|---|---|
| committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2010-05-17 17:21:23 +0100 |
| commit | fda0e18c8a7a3e02747c2b045b4fcd2c920410b9 (patch) | |
| tree | 6dac80c846592901cbfdcff029525d7994465282 | |
| parent | 98830bc9967b18d6f9a614a1f354f5580196ef85 (diff) | |
| parent | d1e86d64bc48dedd0d68d182d0ce6951d8b4fd0d (diff) | |
| download | linux-fda0e18c8a7a3e02747c2b045b4fcd2c920410b9.tar.gz linux-fda0e18c8a7a3e02747c2b045b4fcd2c920410b9.tar.bz2 linux-fda0e18c8a7a3e02747c2b045b4fcd2c920410b9.zip | |
Merge branch 'devel-pmu' into devel
| -rw-r--r-- | arch/arm/Kconfig | 26 | ||||
| -rw-r--r-- | arch/arm/include/asm/perf_event.h | 17 | ||||
| -rw-r--r-- | arch/arm/include/asm/pmu.h | 27 | ||||
| -rw-r--r-- | arch/arm/kernel/perf_event.c | 928 | ||||
| -rw-r--r-- | arch/arm/kernel/pmu.c | 127 | ||||
| -rw-r--r-- | arch/arm/oprofile/Makefile | 7 | ||||
| -rw-r--r-- | arch/arm/oprofile/backtrace.c | 83 | ||||
| -rw-r--r-- | arch/arm/oprofile/common.c | 375 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_arm_model.h | 35 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_counter.h | 27 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_arm11_core.c | 162 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_arm11_core.h | 45 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_mpcore.c | 306 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_mpcore.h | 61 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_v6.c | 78 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_v7.c | 415 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_v7.h | 103 | ||||
| -rw-r--r-- | arch/arm/oprofile/op_model_xscale.c | 444 |
18 files changed, 1316 insertions, 1950 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9e938a79a3b2..2b3157b5089a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -13,7 +13,7 @@ config ARM select RTC_LIB select SYS_SUPPORTS_APM_EMULATION select GENERIC_ATOMIC64 if (!CPU_32v6K) - select HAVE_OPROFILE + select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_ARCH_KGDB select HAVE_KPROBES if (!XIP_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) @@ -181,28 +181,6 @@ config ARM_L1_CACHE_SHIFT_6 help Setting ARM L1 cache line size to 64 Bytes. -if OPROFILE - -config OPROFILE_ARMV6 - def_bool y - depends on CPU_V6 && !SMP - select OPROFILE_ARM11_CORE - -config OPROFILE_MPCORE - def_bool y - depends on CPU_V6 && SMP - select OPROFILE_ARM11_CORE - -config OPROFILE_ARM11_CORE - bool - -config OPROFILE_ARMV7 - def_bool y - depends on CPU_V7 && !SMP - bool - -endif - config VECTORS_BASE hex default 0xffff0000 if MMU || CPU_HIGH_VECTOR @@ -1314,7 +1292,7 @@ config HIGHPTE config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS && CPU_HAS_PMU && (CPU_V6 || CPU_V7) + depends on PERF_EVENTS && CPU_HAS_PMU default y help Enable hardware performance counter support for perf events. If diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 49e3049aba32..48837e6d8887 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -28,4 +28,21 @@ set_perf_event_pending(void) * same indexes here for consistency. */ #define PERF_EVENT_INDEX_OFFSET 1 +/* ARM perf PMU IDs for use by internal perf clients. */ +enum arm_perf_pmu_ids { + ARM_PERF_PMU_ID_XSCALE1 = 0, + ARM_PERF_PMU_ID_XSCALE2, + ARM_PERF_PMU_ID_V6, + ARM_PERF_PMU_ID_V6MP, + ARM_PERF_PMU_ID_CA8, + ARM_PERF_PMU_ID_CA9, + ARM_NUM_PMU_IDS, +}; + +extern enum arm_perf_pmu_ids +armpmu_get_pmu_id(void); + +extern int +armpmu_get_max_events(void); + #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 44bec1f02cb0..8ccea012722c 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -19,31 +19,26 @@ enum arm_pmu_type { #ifdef CONFIG_CPU_HAS_PMU -struct pmu_irqs { - const int *irqs; - int num_irqs; -}; - /** * reserve_pmu() - reserve the hardware performance counters * * Reserve the hardware performance counters in the system for exclusive use. - * The 'struct pmu_irqs' for the system is returned on success, ERR_PTR() + * The platform_device for the system is returned on success, ERR_PTR() * encoded error on failure. */ -extern const struct pmu_irqs * -reserve_pmu(void); +extern struct platform_device * +reserve_pmu(enum arm_pmu_type device); /** * release_pmu() - Relinquish control of the performance counters * * Release the performance counters and allow someone else to use them. * Callers must have disabled the counters and released IRQs before calling - * this. The 'struct pmu_irqs' returned from reserve_pmu() must be passed as + * this. The platform_device returned from reserve_pmu() must be passed as * a cookie. */ extern int -release_pmu(const struct pmu_irqs *irqs); +release_pmu(struct platform_device *pdev); /** * init_pmu() - Initialise the PMU. @@ -53,24 +48,26 @@ release_pmu(const struct pmu_irqs *irqs); * the actual hardware initialisation. */ extern int -init_pmu(void); +init_pmu(enum arm_pmu_type device); #else /* CONFIG_CPU_HAS_PMU */ -static inline const struct pmu_irqs * -reserve_pmu(void) +#include <linux/err.h> + +static inline struct platform_device * +reserve_pmu(enum arm_pmu_type device) { return ERR_PTR(-ENODEV); } static inline int -release_pmu(const struct pmu_irqs *irqs) +release_pmu(struct platform_device *pdev) { return -ENODEV; } static inline int -init_pmu(void) +init_pmu(enum arm_pmu_type device) { return -ENODEV; } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 9e70f2053f9a..c45768614c8a 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -16,7 +16,9 @@ #include <linux/interrupt.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/perf_event.h> +#include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/uaccess.h> @@ -26,7 +28,7 @@ #include <asm/pmu.h> #include <asm/stacktrace.h> -static const struct pmu_irqs *pmu_irqs; +static struct platform_device *pmu_device; /* * Hardware lock to serialize accesses to PMU registers. Needed for the @@ -67,8 +69,18 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +/* PMU names. */ +static const char *arm_pmu_names[] = { + [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", + [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", + [ARM_PERF_PMU_ID_V6] = "v6", + [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", + [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", + [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", +}; + struct arm_pmu { - char *name; + enum arm_perf_pmu_ids id; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); @@ -87,6 +99,30 @@ struct arm_pmu { /* Set at runtime when we know what CPU type we are. */ static const struct arm_pmu *armpmu; +enum arm_perf_pmu_ids +armpmu_get_pmu_id(void) +{ + int id = -ENODEV; + + if (armpmu != NULL) + id = armpmu->id; + + return id; +} +EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); + +int +armpmu_get_max_events(void) +{ + int max_events = 0; + + if (armpmu != NULL) + max_events = armpmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(armpmu_get_max_events); + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) \ @@ -314,38 +350,44 @@ validate_group(struct perf_event *event) static int armpmu_reserve_hardware(void) { - int i; - int err; + int i, err = -ENODEV, irq; - pmu_irqs = reserve_pmu(); - if (IS_ERR(pmu_irqs)) { + pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU); + if (IS_ERR(pmu_device)) { pr_warning("unable to reserve pmu\n"); - return PTR_ERR(pmu_irqs); + return PTR_ERR(pmu_device); } - init_pmu(); + init_pmu(ARM_PMU_DEVICE_CPU); - if (pmu_irqs->num_irqs < 1) { + if (pmu_device->num_resources < 1) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < pmu_irqs->num_irqs; ++i) { - err = request_irq(pmu_irqs->irqs[i], armpmu->handle_irq, + for (i = 0; i < pmu_device->num_resources; ++i) { + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + err = request_irq(irq, armpmu->handle_irq, IRQF_DISABLED | IRQF_NOBALANCING, "armpmu", NULL); if (err) { - pr_warning("unable to request IRQ%d for ARM " - "perf counters\n", pmu_irqs->irqs[i]); + pr_warning("unable to request IRQ%d for ARM perf " + "counters\n", irq); break; } } if (err) { - for (i = i - 1; i >= 0; --i) - free_irq(pmu_irqs->irqs[i], NULL); - release_pmu(pmu_irqs); - pmu_irqs = NULL; + for (i = i - 1; i >= 0; --i) { + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, NULL); + } + release_pmu(pmu_device); + pmu_device = NULL; } return err; @@ -354,14 +396,17 @@ armpmu_reserve_hardware(void) static void armpmu_release_hardware(void) { - int i; + int i, irq; - for (i = pmu_irqs->num_irqs - 1; i >= 0; --i) - free_irq(pmu_irqs->irqs[i], NULL); + for (i = pmu_device->num_resources - 1; i >= 0; --i) { + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, NULL); + } armpmu->stop(); - release_pmu(pmu_irqs); - pmu_irqs = NULL; + release_pmu(pmu_device); + pmu_device = NULL; } static atomic_t active_events = ATOMIC_INIT(0); @@ -1144,7 +1189,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, } static const struct arm_pmu armv6pmu = { - .name = "v6", + .id = ARM_PERF_PMU_ID_V6, .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, @@ -1167,7 +1212,7 @@ static const struct arm_pmu armv6pmu = { * reset the period and enable the interrupt reporting. */ static const struct arm_pmu armv6mpcore_pmu = { - .name = "v6mpcore", + .id = ARM_PERF_PMU_ID_V6MP, .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, @@ -1197,10 +1242,6 @@ static const struct arm_pmu armv6mpcore_pmu = { * counter and all 4 performance counters together can be reset separately. */ -#define ARMV7_PMU_CORTEX_A8_NAME "ARMv7 Cortex-A8" - -#define ARMV7_PMU_CORTEX_A9_NAME "ARMv7 Cortex-A9" - /* Common ARMv7 event types */ enum armv7_perf_types { ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, @@ -2079,6 +2120,803 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Based on xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +static inline int +xscalepmu_event_map(int config) +{ + int mapping = xscale_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static u64 +xscalepmu_raw_event(u64 config) +{ + return config & 0xff; +} + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + perf_event_do_pending(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { + return XSCALE_COUNTER1; + } + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { + return XSCALE_COUNTER0; + } + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .event_map = xscalepmu_event_map, + .raw_event = xscalepmu_raw_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + perf_event_do_pending(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED |
