summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:21:10 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:21:10 -0800
commitb0f4b285d7ed174804658539129a834270f4829a (patch)
treebe7f8dca58075aba2c6a137fcfd4d44c5c333efc /arch/x86/kernel
parentbe9c5ae4eeec2e85527e95647348b8ea4eb25128 (diff)
parent5250d329e38cdf7580faeb9c53c17d3588d7d19c (diff)
downloadlinux-b0f4b285d7ed174804658539129a834270f4829a.tar.gz
linux-b0f4b285d7ed174804658539129a834270f4829a.tar.bz2
linux-b0f4b285d7ed174804658539129a834270f4829a.zip
Merge branch 'tracing-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (241 commits) sched, trace: update trace_sched_wakeup() tracing/ftrace: don't trace on early stage of a secondary cpu boot, v3 Revert "x86: disable X86_PTRACE_BTS" ring-buffer: prevent false positive warning ring-buffer: fix dangling commit race ftrace: enable format arguments checking x86, bts: memory accounting x86, bts: add fork and exit handling ftrace: introduce tracing_reset_online_cpus() helper tracing: fix warnings in kernel/trace/trace_sched_switch.c tracing: fix warning in kernel/trace/trace.c tracing/ring-buffer: remove unused ring_buffer size trace: fix task state printout ftrace: add not to regex on filtering functions trace: better use of stack_trace_enabled for boot up code trace: add a way to enable or disable the stack tracer x86: entry_64 - introduce FTRACE_ frame macro v2 tracing/ftrace: add the printk-msg-only option tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp() x86, bts: correctly report invalid bts records ... Fixed up trivial conflict in scripts/recordmcount.pl due to SH bits being already partly merged by the SH merge.
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/ds.c1138
-rw-r--r--arch/x86/kernel/dumpstack.c34
-rw-r--r--arch/x86/kernel/dumpstack.h2
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c7
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S98
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c67
-rw-r--r--arch/x86/kernel/process_64.c58
-rw-r--r--arch/x86/kernel/ptrace.c431
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S1
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
23 files changed, 1465 insertions, 923 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1f208aaee780..88dd768eab6d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7397911f8478..b5229affb953 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/dmar.h>
+#include <linux/ftrace.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -790,7 +791,7 @@ static void local_apic_timer_interrupt(void)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-void smp_apic_timer_interrupt(struct pt_regs *regs)
+void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a5c04e88777e..82db7f45e2de 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -2,6 +2,11 @@
# Makefile for x86-compatible CPU details and quirks
#
+# Don't trace early stages of a secondary CPU boot
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_common.o = -pg
+endif
+
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
+#include <linux/ftrace.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
+ struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
+ trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ccfd2047630c..8ea6929e974c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,7 +11,6 @@
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
-#include <asm/ptrace.h>
#include <asm/ds.h>
#include <asm/bugs.h>
@@ -326,9 +325,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P3);
#endif
- if (cpu_has_bts)
- ptrace_bts_init_intel(c);
-
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d6938d9351cf..da91701a2348 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,14 +6,13 @@
* precise-event based sampling (PEBS).
*
* It manages:
- * - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - DS and BTS hardware configuration
+ * - buffer overflow handling (to be done)
* - buffer access
*
- * It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * It does not do:
+ * - security checking (is the caller allowed to trace the task)
+ * - buffer allocation (memory accounting)
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@@ -28,22 +27,69 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/kernel.h>
/*
* The configuration for a particular DS hardware implementation.
*/
struct ds_configuration {
- /* the size of the DS structure in bytes */
- unsigned char sizeof_ds;
- /* the size of one pointer-typed field in the DS structure in bytes;
- this covers the first 8 fields related to buffer management. */
+ /* the name of the configuration */
+ const char *name;
+ /* the size of one pointer-typed field in the DS structure and
+ in the BTS and PEBS buffers in bytes;
+ this covers the first 8 DS fields related to buffer management. */
unsigned char sizeof_field;
/* the size of a BTS/PEBS record in bytes */
unsigned char sizeof_rec[2];
+ /* a series of bit-masks to control various features indexed
+ * by enum ds_feature */
+ unsigned long ctl[dsf_ctl_max];
};
-static struct ds_configuration ds_cfg;
+static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
+
+#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
+#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
+#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
+
+#define BTS_CONTROL \
+ (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
+ ds_cfg.ctl[dsf_bts_overflow])
+
+
+/*
+ * A BTS or PEBS tracer.
+ *
+ * This holds the configuration of the tracer and serves as a handle
+ * to identify tracers.
+ */
+struct ds_tracer {
+ /* the DS context (partially) owned by this tracer */
+ struct ds_context *context;
+ /* the buffer provided on ds_request() and its size in bytes */
+ void *buffer;
+ size_t size;
+};
+
+struct bts_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* the trace including the DS configuration */
+ struct bts_trace trace;
+ /* buffer overflow notification function */
+ bts_ovfl_callback_t ovfl;
+};
+
+struct pebs_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* the trace including the DS configuration */
+ struct pebs_trace trace;
+ /* buffer overflow notification function */
+ pebs_ovfl_callback_t ovfl;
+};
/*
* Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
/*
- * Locking is done only for allocating BTS or PEBS resources and for
- * guarding context and buffer memory allocation.
- *
- * Most functions require the current task to own the ds context part
- * they are going to access. All the locking is done when validating
- * access to the context.
+ * Locking is done only for allocating BTS or PEBS resources.
*/
-static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
-
-/*
- * Validate that the current task is allowed to access the BTS/PEBS
- * buffer of the parameter task.
- *
- * Returns 0, if access is granted; -Eerrno, otherwise.
- */
-static inline int ds_validate_access(struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return -EPERM;
-
- if (context->owner[qual] == current)
- return 0;
-
- return -EPERM;
-}
+static DEFINE_SPINLOCK(ds_lock);
/*
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context,
* >0 number of per-thread tracers
* <0 number of per-cpu tracers
*
- * The below functions to get and put tracers and to check the
- * allocation type require the ds_lock to be held by the caller.
- *
* Tracers essentially gives the number of ds contexts for a certain
* type of allocation.
*/
-static long tracers;
+static atomic_t tracers = ATOMIC_INIT(0);
static inline void get_tracer(struct task_struct *task)
{
- tracers += (task ? 1 : -1);
+ if (task)
+ atomic_inc(&tracers);
+ else
+ atomic_dec(&tracers);
}
static inline void put_tracer(struct task_struct *task)
{
- tracers -= (task ? 1 : -1);
+ if (task)
+ atomic_dec(&tracers);
+ else
+ atomic_inc(&tracers);
}
static inline int check_tracer(struct task_struct *task)
{
- return (task ? (tracers >= 0) : (tracers <= 0));
+ return task ?
+ (atomic_read(&tracers) >= 0) :
+ (atomic_read(&tracers) <= 0);
}
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task)
*
* Contexts are use-counted. They are allocated on first access and
* deallocated when the last user puts the context.
- *
- * We distinguish between an allocating and a non-allocating get of a
- * context:
- * - the allocating get is used for requesting BTS/PEBS resources. It
- * requires the caller to hold the global ds_lock.
- * - the non-allocating get is used for all other cases. A
- * non-existing context indicates an error. It acquires and releases
- * the ds_lock itself for obtaining the context.
- *
- * A context and its DS configuration are allocated and deallocated
- * together. A context always has a DS configuration of the
- * appropriate size.
*/
-static DEFINE_PER_CPU(struct ds_context *, system_context);
-
-#define this_system_context per_cpu(system_context, smp_processor_id())
-
-/*
- * Returns the pointer to the parameter task's context or to the
- * system-wide context, if task is NULL.
- *
- * Increases the use count of the returned context, if not NULL.
- */
-static inline struct ds_context *ds_get_context(struct task_struct *task)
-{
- struct ds_context *context;
- unsigned long irq;
+struct ds_context {
+ /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
+ unsigned char ds[MAX_SIZEOF_DS];
+ /* the owner of the BTS and PEBS configuration, respectively */
+ struct bts_tracer *bts_master;
+ struct pebs_tracer *pebs_master;
+ /* use count */
+ unsigned long count;
+ /* a pointer to the context location inside the thread_struct
+ * or the per_cpu context array */
+ struct ds_context **this;
+ /* a pointer to the task owning this context, or NULL, if the
+ * context is owned by a cpu */
+ struct task_struct *task;
+};
- spin_lock_irqsave(&ds_lock, irq);
+static DEFINE_PER_CPU(struct ds_context *, system_context_array);
- context = (task ? task->thread.ds_ctx : this_system_context);
- if (context)
- context->count++;
+#define system_context per_cpu(system_context_array, smp_processor_id())
- spin_unlock_irqrestore(&ds_lock, irq);
-
- return context;
-}
-/*
- * Same as ds_get_context, but allocates the context and it's DS
- * structure, if necessary; returns NULL; if out of memory.
- */
-static inline struct ds_context *ds_alloc_context(struct task_struct *task)
+static inline struct ds_context *ds_get_context(struct task_struct *task)
{
struct ds_context **p_context =
- (task ? &task->thread.ds_ctx : &this_system_context);
- struct ds_context *context = *p_context;
+ (task ? &task->thread.ds_ctx : &system_context);
+ struct ds_context *context = NULL;
+ struct ds_context *new_context = NULL;
unsigned long irq;
- if (!context) {
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return NULL;
-
- context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
- if (!context->ds) {
- kfree(context);
- return NULL;
- }
+ /* Chances are small that we already have a context. */
+ new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
+ if (!new_context)
+ return NULL;
- spin_lock_irqsave(&ds_lock, irq);
+ spin_lock_irqsave(&ds_lock, irq);
- if (*p_context) {
- kfree(context->ds);
- kfree(context);
+ context = *p_context;
+ if (!context) {
+ context = new_context;
- context = *p_context;
- } else {
- *p_context = context;
+ context->this = p_context;
+ context->task = task;
+ context->count = 0;
- context->this = p_context;
- context->task = task;
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
- if (!task || (task == current))
- wrmsrl(MSR_IA32_DS_AREA,
- (unsigned long)context->ds);
- }
- spin_unlock_irqrestore(&ds_lock, irq);
+ *p_context = context;
}
context->count++;
+ spin_unlock_irqrestore(&ds_lock, irq);
+
+ if (context != new_context)
+ kfree(new_context);
+
return context;
}
-/*
- * Decreases the use count of the parameter context, if not NULL.
- * Deallocates the context, if the use count reaches zero.
- */
static inline void ds_put_context(struct ds_context *context)
{
unsigned long irq;
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context)
spin_lock_irqsave(&ds_lock, irq);
- if (--context->count)
- goto out;
+ if (--context->count) {
+ spin_unlock_irqrestore(&ds_lock, irq);
+ return;
+ }
*(context->this) = NULL;
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context)
if (!context->task || (context->task == current))
wrmsrl(MSR_IA32_DS_AREA, 0);
- put_tracer(context->task);
+ spin_unlock_irqrestore(&ds_lock, irq);
- /* free any leftover buffers from tracers that did not
- * deallocate them properly. */
- kfree(context->buffer[ds_bts]);
- kfree(context->buffer[ds_pebs]);
- kfree(context->ds);
kfree(context);
- out:
- spin_unlock_irqrestore(&ds_lock, irq);
}
/*
- * Handle a buffer overflow
+ * Call the tracer's callback on a buffer overflow.
*
- * task: the task whose buffers are overflowing;
- * NULL for a buffer overflow on the current cpu
* context: the ds context
* qual: the buffer type
*/
-static void ds_overflow(struct task_struct *task, struct ds_context *context,
- enum ds_qualifier qual)
+static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
{
- if (!context)
- return;
-
- if (context->callback[qual])
- (*context->callback[qual])(task);
-
- /* todo: do some more overflow handling */
+ switch (qual) {
+ case ds_bts:
+ if (context->bts_master &&
+ context->bts_master->ovfl)
+ context->bts_master->ovfl(context->bts_master);
+ break;
+ case ds_pebs:
+ if (context->pebs_master &&
+ context->pebs_master->ovfl)
+ context->pebs_master->ovfl(context->pebs_master);
+ break;
+ }
}
/*
- * Allocate a non-pageable buffer of the parameter size.
- * Checks the memory and the locked memory rlimit.
+ * Write raw data into the BTS or PEBS buffer.
*
- * Returns the buffer, if successful;
- * NULL, if out of memory or rlimit exceeded.
+ * The remainder of any partially written record is zeroed out.
*
- * size: the requested buffer size in bytes
- * pages (out): if not NULL, contains the number of pages reserved
+ * context: the DS context
+ * qual: the buffer type
+ * record: the data to write
+ * size: the size of the data
*/
-static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+static int ds_write(struct ds_context *context, enum ds_qualifier qual,
+ const void *record, size_t size)
{
- unsigned long rlim, vm, pgsz;
- void *buffer;
+ int bytes_written = 0;
- pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ if (!record)
+ return -EINVAL;
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ while (size) {
+ unsigned long base, index, end, write_end, int_th;
+ unsigned long write_size, adj_write_size;
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ /*
+ * write as much as possible without producing an
+ * overflow interrupt.
+ *
+ * interrupt_threshold must either be
+ * - bigger than absolute_maximum or
+ * - point to a record between buffer_base and absolute_maximum
+ *
+ * index points to a valid record.
+ */
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ index = ds_get(context->ds, qual, ds_index);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
+ int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
- buffer = kzalloc(size, GFP_KERNEL);
- if (!buffer)
- return NULL;
+ write_end = min(end, int_th);
- current->mm->total_vm += pgsz;
- current->mm->locked_vm += pgsz;
+ /* if we are already beyond the interrupt threshold,
+ * we fill the entire buffer */
+ if (write_end <= index)
+ write_end = end;
- if (pages)
- *pages = pgsz;
+ if (write_end <= index)
+ break;
+
+ write_size = min((unsigned long) size, write_end - index);
+ memcpy((void *)index, record, write_size);
+
+ record = (const char *)record + write_size;
+ size -= write_size;
+ bytes_written += write_size;
+
+ adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+ adj_write_size *= ds_cfg.sizeof_rec[qual];
- return buffer;
+ /* zero out trailing bytes */
+ memset((char *)index + write_size, 0,
+ adj_write_size - write_size);
+ index += adj_write_size;
+
+ if (index >= end)
+ index = base;
+ ds_set(context->ds, qual, ds_index, index);
+
+ if (index >= int_th)
+ ds_overflow(context, qual);
+ }
+
+ return bytes_written;
}
-static int ds_request(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ *
+ * We use two levels of abstraction:
+ * - the raw data level defined here
+ * - an arch-independent level defined in ds.h
+ */
+
+enum bts_field {
+ bts_from,
+ bts_to,
+ bts_flags,
+
+ bts_qual = bts_from,
+ bts_jiffies = bts_to,
+ bts_pid = bts_flags,
+
+ bts_qual_mask = (bts_qual_max - 1),
+ bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
{
- struct ds_context *context;
- unsigned long buffer, adj;
- const unsigned long alignment = (1 << 3);
- unsigned long irq;
- int error = 0;
+ base += (ds_cfg.sizeof_field * field);
+ return *(unsigned long *)base;
+}
- if (!ds_cfg.sizeof_ds)
- return -EOPNOTSUPP;
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+{
+ base += (ds_cfg.sizeof_field * field);;
+ (*(unsigned long *)base) = val;
+}
- /* we require some space to do alignment adjustments below */
- if (size < (alignment + ds_cfg.sizeof_rec[qual]))
+
+/*
+ * The raw BTS data is architecture dependent.
+ *
+ * For higher-level users, we give an arch-independent view.
+ * - ds.h defines struct bts_struct
+ * - bts_read translates one raw bts record into a bts_struct
+ * - bts_write translates one bts_struct into the raw format and
+ * writes it into the top of the parameter tracer's buffer.
+ *
+ * return: bytes read/written on success; -Eerrno, otherwise
+ */
+static int bts_read(struct bts_tracer *tracer, const void *at,
+ struct bts_struct *out)
+{
+ if (!tracer)
return -EINVAL;
- /* buffer overflow notification is not yet implemented */
- if (ovfl)
- return -EOPNOTSUPP;
+ if (at < tracer->trace.ds.begin)
+ return -EINVAL;
+ if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
+ return -EINVAL;
- context = ds_alloc_context(task);
- if (!context)
- return -ENOMEM;
+ memset(out, 0, sizeof(*out));
+ if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
+ out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
+ out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
+ out->variant.timestamp.pid = bts_get(at, bts_pid);
+ } else {
+ out->qualifier = bts_branch;
+ out->variant.lbr.from = bts_get(at, bts_from);
+ out->variant.lbr.to = bts_get(at, bts_to);
+
+ if (!out->variant.lbr.from && !out->variant.lbr.to)
+ out->qualifier = bts_invalid;
+ }
- spin_lock_irqsave(&ds_lock, irq);
+ return ds_cfg.sizeof_rec[ds_bts];
+}
- error = -EPERM;
- if (!check_tracer(task))
- goto out_unlock;
+static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
+{
+ unsigned char raw[MAX_SIZEOF_BTS];
- get_tracer(task);
+ if (!tracer)
+ return -EINVAL;
- error = -EALREADY;
- if (context->owner[qual] == current)
- goto out_put_tracer;
- error = -EPERM;
- if (context->owner[qual] != NULL)
- goto out_put_tracer;
- context->owner[qual] = current;
+ if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
+ return -EOVERFLOW;
- spin_unlock_irqrestore(&ds_lock, irq);
+ switch (in->qualifier) {
+ case bts_invalid:
+ bts_set(raw, bts_from, 0);
+ bts_set(raw, bts_to, 0);
+ bts_set(raw, bts_flags, 0);
+ break;
+ case bts_branch:
+ bts_set(raw, bts_from, in->variant.lbr.from);
+ bts_set(raw, bts_to, in->variant.lbr.to);
+ bts_set(raw, bts_flags, 0);
+ break;
+ case bts_task_arrives:
+ case bts_task_departs:
+ bts_set(raw, bts_qual, (bts_escape | in->qualifier));
+ bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
+ bts_set(raw, bts_pid, in->variant.timestamp.pid);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ds_write(tracer->ds.context, ds_bts, raw,
+ ds_cfg.sizeof_rec[ds_bts]);
+}
- error = -ENOMEM;
- if (!base) {
- base = ds_allocate_buffer(size, &context->pages[qual]);
- if (!base)
- goto out_release;
- context->buffer[qual] = base;
- }
- error = 0;
+static void ds_write_config(struct ds_context *context,
+ struct ds_trace *cfg, enum ds_qualifier qual)
+{
+ unsigned char *ds = context->ds;
- context->callback[qual] = ovfl;
+ ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
+ ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
+ ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
+ ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
+}
+
+static void ds_read_config(struct ds_context *context,
+ struct ds_trace *cfg, enum ds_qualifier qual)
+{
+ unsigned char *ds = context->ds;
+
+ cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
+ cfg->top = (void *)ds_get(ds, qual, ds_index);
+ cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
+ cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
+}
+
+static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
+ void *base, size_t size, size_t ith,
+ unsigned int flags) {
+ unsigned long buffer, adj;
/* adjust the buffer address and size to meet alignment
* constraints:
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
*/
buffer = (unsigned long)base;
- adj = ALIGN(buffer, alignment) - buffer;
+ adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
buffer += adj;
size -= adj;
- size /= ds_cfg.sizeof_rec[qual];
- size *= ds_cfg.sizeof_rec[qual];
-
- ds_set(context->ds, qual, ds_buffer_base, buffer);
- ds_set(context->ds, qual, ds_index, buffer);
- ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
-
- if (ovfl) {
- /* todo: select a suitable interrupt threshold */
- } else
- ds_set(context->ds, qual,
- ds_interrupt_threshold, buffer + size + 1);
+ trace->n = size / ds_cfg.sizeof_rec[qual];
+ trace->size = ds_cfg.sizeof_rec[qual];
- /* we keep the context until ds_release */
- return error;
-
- out_release:
- context->owner[qual] = NULL;
- ds_put_context(context);
- put_tracer(task);
- return error;
+ size = (trace->n * trace->size);
- out_put_tracer:
- spin_unlock_irqrestore(&ds_lock, irq);
- ds_put_context(context);
- put_tracer(task);
- return error;
+ trace->begin = (void *)buffer;
+ trace->top = trace->begin;
+ trace->end = (void *)(buffer + size);
+ /* The value for 'no threshold' is -1, which will set the
+ * threshold outside of the buffer, just like we want it.
+ */
+ trace->ith = (void *)(buffer + size - ith);
- out_unlock:
- spin_unlock_irqrestore(&ds_lock, irq);
- ds_put_context(context);
- return error;
+ trace->flags = flags;
}
-int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
-{
- return ds_request(task, base, size, ovfl, ds_bts);
-}
-int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
-{
- return ds_request(task, base, size, ovfl, ds_pebs);
-}
-
-static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
+ enum ds_qualifier qual, struct task_struct *task,
+ void *base, size_t size, size_t th, unsigned int flags)
{
struct ds_context *context;
int error;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
+ error = -EINVAL;
+ if (!base)
goto out;
- kfree(context->buffer[qual]);
- context->buffer[qual] = NULL;
-
- current->mm->total_vm -= context->pages[qual];
- current->mm->locked_vm -= context->pages[qual];
- context->pages[qual] = 0;
- context->owner[qual] = NULL;
-
- /*
- * we put the context twice:
- * once for the ds_get_context
- * once for the corresponding ds_request
- */
- ds_put_context(context);
- out:
- d