diff options
Diffstat (limited to 'arch/x86/kernel')
38 files changed, 4925 insertions, 423 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c887cd944f0c..9bcd0b56ca17 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o +obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 803b684676ff..dbe76a14c3c9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index d9d0bd2faaf4..ab3219b3fbda 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -171,8 +171,8 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) for_each_online_cpu(cpu) { if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) continue; - __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu)); - __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); + cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); } free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); free_cpumask_var(per_cpu(ipi_mask, this_cpu)); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 80091ae54c2b..9bff68798836 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,8 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ perf_event_intel_uncore_snb.o \ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fd470ebf924e..e4cf63301ff4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + + /* AMD CPUs don't reset SS attributes on SYSRET */ + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f70538012e2..a62cf04dac8a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[10] = eax; } + /* Additional Intel-defined flags: level 0x0000000F */ + if (c->cpuid_level >= 0x0000000F) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=0 */ + cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); + c->x86_capability[11] = edx; + if (cpu_has(c, X86_FEATURE_CQM_LLC)) { + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = ebx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[12] = edx; + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } + } else { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + } + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } +static void x86_init_cache_qos(struct cpuinfo_x86 *c) +{ + /* + * The heavy lifting of max_rmid and cache_occ_scale are handled + * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu + * in case CQM bits really aren't there in this CPU. + */ + if (c != &boot_cpu_data) { + boot_cpu_data.x86_cache_max_rmid = + min(boot_cpu_data.x86_cache_max_rmid, + c->x86_cache_max_rmid); + } +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) init_hypervisor(c); x86_init_rdrand(c); + x86_init_cache_qos(c); /* * Clear/Set all flags overriden by options, need do it diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 36ce402a3fa5..d820d8eae96b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -27,8 +27,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { -#ifdef CONFIG_XEN_PVHVM - &x86_hyper_xen_hvm, +#ifdef CONFIG_XEN + &x86_hyper_xen, #endif &x86_hyper_vmware, &x86_hyper_ms_hyperv, diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h new file mode 100644 index 000000000000..1c338b0eba05 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_pt.h @@ -0,0 +1,131 @@ +/* + * Intel(R) Processor Trace PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Intel PT is specified in the Intel Architecture Instruction Set Extensions + * Programming Reference: + * http://software.intel.com/en-us/intel-isa-extensions + */ + +#ifndef __INTEL_PT_H__ +#define __INTEL_PT_H__ + +/* + * Single-entry ToPA: when this close to region boundary, switch + * buffers to avoid losing data. + */ +#define TOPA_PMI_MARGIN 512 + +/* + * Table of Physical Addresses bits + */ +enum topa_sz { + TOPA_4K = 0, + TOPA_8K, + TOPA_16K, + TOPA_32K, + TOPA_64K, + TOPA_128K, + TOPA_256K, + TOPA_512K, + TOPA_1MB, + TOPA_2MB, + TOPA_4MB, + TOPA_8MB, + TOPA_16MB, + TOPA_32MB, + TOPA_64MB, + TOPA_128MB, + TOPA_SZ_END, +}; + +static inline unsigned int sizes(enum topa_sz tsz) +{ + return 1 << (tsz + 12); +}; + +struct topa_entry { + u64 end : 1; + u64 rsvd0 : 1; + u64 intr : 1; + u64 rsvd1 : 1; + u64 stop : 1; + u64 rsvd2 : 1; + u64 size : 4; + u64 rsvd3 : 2; + u64 base : 36; + u64 rsvd4 : 16; +}; + +#define TOPA_SHIFT 12 +#define PT_CPUID_LEAVES 2 + +enum pt_capabilities { + PT_CAP_max_subleaf = 0, + PT_CAP_cr3_filtering, + PT_CAP_topa_output, + PT_CAP_topa_multiple_entries, + PT_CAP_payloads_lip, +}; + +struct pt_pmu { + struct pmu pmu; + u32 caps[4 * PT_CPUID_LEAVES]; +}; + +/** + * struct pt_buffer - buffer configuration; one buffer per task_struct or + * cpu, depending on perf event configuration + * @cpu: cpu for per-cpu allocation + * @tables: list of ToPA tables in this buffer + * @first: shorthand for first topa table + * @last: shorthand for last topa table + * @cur: current topa table + * @nr_pages: buffer size in pages + * @cur_idx: current output region's index within @cur table + * @output_off: offset within the current output region + * @data_size: running total of the amount of data in this buffer + * @lost: if data was lost/truncated + * @head: logical write offset inside the buffer + * @snapshot: if this is for a snapshot/overwrite counter + * @stop_pos: STOP topa entry in the buffer + * @intr_pos: INT topa entry in the buffer + * @data_pages: array of pages from perf + * @topa_index: table of topa entries indexed by page offset + */ +struct pt_buffer { + int cpu; + struct list_head tables; + struct topa *first, *last, *cur; + unsigned int cur_idx; + size_t output_off; + unsigned long nr_pages; + local_t data_size; + local_t lost; + local64_t head; + bool snapshot; + unsigned long stop_pos, intr_pos; + void **data_pages; + struct topa_entry *topa_index[0]; +}; + +/** + * struct pt - per-cpu pt context + * @handle: perf output handle + * @handle_nmi: do handle PT PMI on this cpu, there's an active event + */ +struct pt { + struct perf_output_handle handle; + int handle_nmi; +}; + +#endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index a041e094b8b9..d76f13d6d8d6 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -404,11 +404,10 @@ static const struct file_operations mtrr_fops = { static int mtrr_seq_show(struct seq_file *seq, void *offset) { char factor; - int i, max, len; + int i, max; mtrr_type type; unsigned long base, size; - len = 0; max = num_var_ranges; for (i = 0; i < max; i++) { mtrr_if->get(i, &base, &size, &type); @@ -425,11 +424,10 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) size >>= 20 - PAGE_SHIFT; } /* Base can be > 32bit */ - len += seq_printf(seq, "reg%02i: base=0x%06lx000 " - "(%5luMB), size=%5lu%cB, count=%d: %s\n", - i, base, base >> (20 - PAGE_SHIFT), size, - factor, mtrr_usage_table[i], - mtrr_attrib_to_str(type)); + seq_printf(seq, "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", + i, base, base >> (20 - PAGE_SHIFT), + size, factor, + mtrr_usage_table[i], mtrr_attrib_to_str(type)); } return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e2888a3ad1e3..87848ebe2bb7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -263,6 +263,14 @@ static void hw_perf_event_destroy(struct perf_event *event) } } +void hw_perf_lbr_event_destroy(struct perf_event *event) +{ + hw_perf_event_destroy(event); + + /* undo the lbr/bts event accounting */ + x86_del_exclusive(x86_lbr_exclusive_lbr); +} + static inline int x86_pmu_initialized(void) { return x86_pmu.handle_irq != NULL; @@ -302,6 +310,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +/* + * Check if we can create event of a certain type (that no conflicting events + * are present). + */ +int x86_add_exclusive(unsigned int what) +{ + int ret = -EBUSY, i; + + if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) + return 0; + + mutex_lock(&pmc_reserve_mutex); + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) + goto out; + + atomic_inc(&x86_pmu.lbr_exclusive[what]); + ret = 0; + +out: + mutex_unlock(&pmc_reserve_mutex); + return ret; +} + +void x86_del_exclusive(unsigned int what) +{ + atomic_dec(&x86_pmu.lbr_exclusive[what]); +} + int x86_setup_perfctr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -346,6 +383,12 @@ int x86_setup_perfctr(struct perf_event *event) /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; } hwc->config |= config; @@ -399,39 +442,41 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; - /* - * check that PEBS LBR correction does not conflict with - * whatever the user is asking with attr->branch_sample_type - */ - if (event->attr.precise_ip > 1 && - x86_pmu.intel_cap.pebs_format < 2) { - u64 *br_type = &event->attr.branch_sample_type; - - if (has_branch_stack(event)) { - if (!precise_br_compat(event)) - return -EOPNOTSUPP; - - /* branch_sample_type is compatible */ - - } else { - /* - * user did not specify branch_sample_type - * - * For PEBS fixups, we capture all - * the branches at the priv level of the - * event. - */ - *br_type = PERF_SAMPLE_BRANCH_ANY; - - if (!event->attr.exclude_user) - *br_type |= PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_kernel) - *br_type |= PERF_SAMPLE_BRANCH_KERNEL; - } + } + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; } } + if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) + event->attach_state |= PERF_ATTACH_TASK_DATA; + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -449,6 +494,12 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + if (event->attr.sample_period && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, event->attr.sample_period) > + event->attr.sample_period) + return -EINVAL; + } + return x86_setup_perfctr(event); } @@ -728,14 +779,17 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; - int i, wmin, wmax, num = 0; + int i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); + if (x86_pmu.start_scheduling) + x86_pmu.start_scheduling(cpuc); + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); + c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); hwc->constraint = c; wmin = min(wmin, c->weight); @@ -768,24 +822,30 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) - num = perf_assign_events(cpuc->event_list, n, wmin, - wmax, assign); + unsched = perf_assign_events(cpuc->event_list, n, wmin, + wmax, assign); /* - * Mark the event as committed, so we do not put_constraint() - * in case new events are added and fail scheduling. + * In case of success (unsched = 0), mark events as committed, + * so we do not put_constraint() in case new events are added + * and fail to be scheduled + * + * We invoke the lower level commit callback to lock the resource + * + * We do not need to do all of this in case we are called to + * validate an event group (assign == NULL) */ - if (!num && assign) { + if (!unsched && assign) { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; e->hw.flags |= PERF_X86_EVENT_COMMITTED; + if (x86_pmu.commit_scheduling) + x86_pmu.commit_scheduling(cpuc, e, assign[i]); } } - /* - * scheduling failed or is just a simulation, - * free resources if necessary - */ - if (!assign || num) { + + if (!assign || unsched) { + for (i = 0; i < n; i++) { e = cpuc->event_list[i]; /* @@ -795,11 +855,18 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) continue; + /* + * release events that failed scheduling + */ if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, e); } } - return num ? -EINVAL : 0; + + if (x86_pmu.stop_scheduling) + x86_pmu.stop_scheduling(cpuc); + + return unsched ? -EINVAL : 0; } /* @@ -986,6 +1053,9 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; + if (x86_pmu.limit_period) + left = x86_pmu.limit_period(event, left); + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* @@ -1033,7 +1103,6 @@ static int x86_pmu_add(struct perf_event *event, int flags) hwc = &event->hw; - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; ret = n = collect_events(cpuc, event, false); if (ret < 0) @@ -1071,7 +1140,6 @@ done_collect: ret = 0; out: - perf_pmu_enable(event->pmu); return ret; } @@ -1103,7 +1171,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - u64 pebs; + u64 pebs, debugctl; struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; @@ -1121,14 +1189,20 @@ void perf_event_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); + if (x86_pmu.pebs_constraints) { + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); + } + if (x86_pmu.lbr_nr) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); + } } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); @@ -1321,11 +1395,12 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int ret = NOTIFY_OK; + int i, ret = NOTIFY_OK; switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - cpuc->kfree_on_online = NULL; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) + cpuc->kfree_on_online[i] = NULL; if (x86_pmu.cpu_prepare) ret = x86_pmu.cpu_prepare(cpu); break; @@ -1336,7 +1411,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_ONLINE: - kfree(cpuc->kfree_on_online); + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { + kfree(cpuc->kfree_on_online[i]); + cpuc->kfree_on_online[i] = NULL; + } break; case CPU_DYING: @@ -1712,7 +1790,7 @@ static int validate_event(struct perf_event *event) if (IS_ERR(fake_cpuc)) return PTR_ERR(fake_cpuc); - c = x86_pmu.get_event_constraints(fake_cpuc, event); + c = x86_pmu.get_event_constraints(fake_cpuc, -1, event); if (!c || !c->weight) ret = -EINVAL; @@ -1914,10 +1992,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_flush_branch_stack(void) +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.flush_branch_stack) - x86_pmu.flush_branch_stack(); + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); } void perf_check_microcode(void) @@ -1949,7 +2027,8 @@ static struct pmu pmu = { .commit_txn = x86_pmu_commit_txn, .event_idx = x86_pmu_event_idx, - .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, + .task_ctx_size = sizeof(struct x86_perf_task_context), }; void arch_perf_update_userpage(struct perf_event *event, @@ -1968,13 +2047,23 @@ void arch_perf_update_userpage(struct perf_event *event, data = cyc2ns_read_begin(); + /* + * Internal timekeeping for enabled/running/stopped times + * is always in the local_clock domain. + */ userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; userpg->time_offset = data->cyc2ns_offset - now; - userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + /* + * cap_user_time_zero doesn't make sense when we're using a different + * time base for the records. + */ + if (event->clock == &local_clock) { + userpg->cap_user_time_zero = 1; + userpg->time_zero = data->cyc2ns_offset; + } cyc2ns_read_end(data); } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index df525d2be1e8..6ac5cb7a9e14 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -65,13 +65,15 @@ struct event_constraint { /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ -#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ -#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ +#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ +#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ +#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ +#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ +#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ +#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ struct amd_nb { @@ -123,8 +125,37 @@ struct intel_shared_regs { unsigned core_id; /* per-core: core id */ }; +enum intel_excl_state_type { + INTEL_EXCL_UNUSED = 0, /* counter is unused */ + INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ + INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ +}; + |
