diff options
Diffstat (limited to 'arch/x86')
| -rw-r--r-- | arch/x86/events/intel/core.c | 67 | ||||
| -rw-r--r-- | arch/x86/events/intel/ds.c | 2 | ||||
| -rw-r--r-- | arch/x86/events/intel/lbr.c | 20 | ||||
| -rw-r--r-- | arch/x86/events/perf_event.h | 19 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 159 |
5 files changed, 166 insertions, 101 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aa3f5b527dc0..603964408d2d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2145,19 +2145,19 @@ static __initconst const u64 knl_hw_cache_extra_regs * However, there are some cases which may change PEBS status, e.g. PMI * throttle. The PEBS_ENABLE should be updated where the status changes. */ -static void __intel_pmu_disable_all(void) +static __always_inline void __intel_pmu_disable_all(bool bts) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); } -static void intel_pmu_disable_all(void) +static __always_inline void intel_pmu_disable_all(void) { - __intel_pmu_disable_all(); + __intel_pmu_disable_all(true); intel_pmu_pebs_disable_all(); intel_pmu_lbr_disable_all(); } @@ -2188,6 +2188,49 @@ static void intel_pmu_enable_all(int added) __intel_pmu_enable_all(added, false); } +static noinline int +__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, + unsigned int cnt, unsigned long flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + intel_pmu_lbr_read(); + cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr); + + memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); + intel_pmu_enable_all(0); + local_irq_restore(flags); + return cnt; +} + +static int +intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_pebs_disable_all(); + __intel_pmu_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + +static int +intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_pebs_disable_all(); + __intel_pmu_arch_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + /* * Workaround for: * Intel Errata AAK100 (model 26) @@ -2937,7 +2980,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; - __intel_pmu_disable_all(); + __intel_pmu_disable_all(true); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); status = intel_pmu_get_status(); @@ -6299,9 +6342,21 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - if (x86_pmu.lbr_nr) + if (x86_pmu.lbr_nr) { pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + /* only support branch_stack snapshot for perfmon >= v2 */ + if (x86_pmu.disable_all == intel_pmu_disable_all) { + if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_arch_branch_stack); + } else { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_branch_stack); + } + } + } + intel_pmu_check_extra_regs(x86_pmu.extra_regs); /* Support full width counters using alternative MSR range */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 4dbb55a43dad..2e215369df4a 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1302,7 +1302,7 @@ void intel_pmu_pebs_disable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); + __intel_pmu_pebs_disable_all(); } static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9e6d6eaeb4cb..6b72e9b55c69 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -228,20 +228,6 @@ static void __intel_pmu_lbr_enable(bool pmi) wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); } -static void __intel_pmu_lbr_disable(void) -{ - u64 debugctl; - - if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { - wrmsrl(MSR_ARCH_LBR_CTL, 0); - return; - } - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); -} - void intel_pmu_lbr_reset_32(void) { int i; @@ -779,8 +765,12 @@ void intel_pmu_lbr_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (cpuc->lbr_users && !vlbr_exclude_host()) + if (cpuc->lbr_users && !vlbr_exclude_host()) { + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return __intel_pmu_arch_lbr_disable(); + __intel_pmu_lbr_disable(); + } } void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 7b5167db3e78..5480db242083 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1242,6 +1242,25 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) return intel_pmu_has_bts_period(event, hwc->sample_period); } +static __always_inline void __intel_pmu_pebs_disable_all(void) +{ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); +} + +static __always_inline void __intel_pmu_arch_lbr_disable(void) +{ + wrmsrl(MSR_ARCH_LBR_CTL, 0); +} + +static __always_inline void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + int intel_pmu_save_and_restart(struct perf_event *event); struct event_constraint * diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8796559f62a4..726700fabca6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -697,6 +697,20 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) *pprog = prog; } +/* + * Similar version of maybe_emit_mod() for a single register + */ +static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64) +{ + u8 *prog = *pprog; + + if (is64) + EMIT1(add_1mod(0x48, reg)); + else if (is_ereg(reg)) + EMIT1(add_1mod(0x40, reg)); + *pprog = prog; +} + /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -925,10 +939,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; @@ -942,10 +954,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); /* * b3 holds 'normal' opcode, b2 short form only valid @@ -1002,19 +1012,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: - case BPF_ALU64 | BPF_DIV | BPF_K: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ + case BPF_ALU64 | BPF_DIV | BPF_K: { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; - if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, src_reg */ - EMIT_mov(AUX_REG, src_reg); - else + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) { + if (src_reg == BPF_REG_0 || + src_reg == BPF_REG_3) { + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); + src_reg = AUX_REG; + } + } else { /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); + src_reg = AUX_REG; + } - /* mov rax, dst_reg */ - EMIT_mov(BPF_REG_0, dst_reg); + if (dst_reg != BPF_REG_0) + /* mov rax, dst_reg */ + emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); /* * xor edx, edx @@ -1022,63 +1043,51 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (BPF_CLASS(insn->code) == BPF_ALU64) - /* div r11 */ - EMIT3(0x49, 0xF7, 0xF3); - else - /* div r11d */ - EMIT3(0x41, 0xF7, 0xF3); - - if (BPF_OP(insn->code) == BPF_MOD) - /* mov r11, rdx */ - EMIT3(0x49, 0x89, 0xD3); - else - /* mov r11, rax */ - EMIT3(0x49, 0x89, 0xC3); + /* div src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF0, src_reg)); - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ + if (BPF_OP(insn->code) == BPF_MOD && + dst_reg != BPF_REG_3) + /* mov dst_reg, rdx */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); + else if (BPF_OP(insn->code) == BPF_DIV && + dst_reg != BPF_REG_0) + /* mov dst_reg, rax */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) + EMIT1(0x58); /* pop rax */ break; + } case BPF_ALU | BPF_MUL | BPF_K: - case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: - case BPF_ALU64 | BPF_MUL | BPF_X: - { - bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; - - if (dst_reg != BPF_REG_0) - EMIT1(0x50); /* push rax */ - if (dst_reg != BPF_REG_3) - EMIT1(0x52); /* push rdx */ - - /* mov r11, dst_reg */ - EMIT_mov(AUX_REG, dst_reg); + maybe_emit_mod(&prog, dst_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); - if (BPF_SRC(insn->code) == BPF_X) - emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); + if (is_imm8(imm32)) + /* imul dst_reg, dst_reg, imm8 */ + EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg), + imm32); else - emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); + /* imul dst_reg, dst_reg, imm32 */ + EMIT2_off32(0x69, + add_2reg(0xC0, dst_reg, dst_reg), + imm32); + break; - if (is64) - EMIT1(add_1mod(0x48, AUX_REG)); - else if (is_ereg(AUX_REG)) - EMIT1(add_1mod(0x40, AUX_REG)); - /* mul(q) r11 */ - EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + maybe_emit_mod(&prog, src_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); - if (dst_reg != BPF_REG_3) - EMIT1(0x5A); /* pop rdx */ - if (dst_reg != BPF_REG_0) { - /* mov dst_reg, rax */ - EMIT_mov(dst_reg, BPF_REG_0); - EMIT1(0x58); /* pop rax */ - } + /* imul dst_reg, src_reg */ + EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); break; - } + /* Shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: @@ -1086,10 +1095,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; if (imm32 == 1) @@ -1120,10 +1127,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); @@ -1430,10 +1435,8 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; @@ -1466,10 +1469,8 @@ st: if (is_imm8(insn->off)) } /* cmp dst_reg, imm8/32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); |
