From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/ds.h | 258 +++++++++++++++++++++++++++++++++++-------- include/asm-x86/processor.h | 12 +- include/asm-x86/ptrace-abi.h | 14 ++- include/asm-x86/ptrace.h | 38 ++++++- 4 files changed, 265 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 7881368142fa..72c5a190bf48 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h @@ -2,71 +2,237 @@ * Debug Store (DS) support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H +#ifdef CONFIG_X86_DS + #include #include -struct cpuinfo_x86; +struct task_struct; -/* a branch trace record entry +/* + * Request BTS or PEBS + * + * Due to alignement constraints, the actual buffer may be slightly + * smaller than the requested or provided buffer. * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to request recording for; + * NULL for per-cpu recording on the current cpu + * base: the base pointer for the (non-pageable) buffer; + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer + * ovfl: pointer to a function to be called on buffer overflow; + * NULL if cyclic buffer requested */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); + +/* + * Release BTS or PEBS resources + * + * Frees buffers allocated on ds_request. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to release resources for; + * NULL to release resources for the current cpu + */ +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); + +/* + * Return the (array) index of the write pointer. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); + +/* + * Return the (array) index one record beyond the end of the array. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); + +/* + * Provide a pointer to the BTS/PEBS record at parameter index. + * (assuming an array of BTS/PEBS records) + * + * The pointer points directly into the buffer. The user is + * responsible for copying the record. + * + * Returns the size of a single record on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * index: the index of the requested record + * record (out): pointer to the requested record + */ +extern int ds_access_bts(struct task_struct *task, + size_t index, const void **record); +extern int ds_access_pebs(struct task_struct *task, + size_t index, const void **record); + +/* + * Write one or more BTS/PEBS records at the write pointer index and + * advance the write pointer. + * + * If size is not a multiple of the record size, trailing bytes are + * zeroed out. + * + * May result in one or more overflow notifications. + * + * If called during overflow handling, that is, with index >= + * interrupt threshold, the write will wrap around. + * + * An overflow notification is given if and when the interrupt + * threshold is reached during or after the write. + * + * Returns the number of bytes written or -Eerrno. + * + * task: the task to access; + * NULL to access the current cpu + * buffer: the buffer to write + * size: the size of the buffer + */ +extern int ds_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Reset the write pointer of the BTS/PEBS buffer. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); + +/* + * Clear the BTS/PEBS buffer and reset the write pointer. + * The entire buffer will be zeroed out. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); + +/* + * Provide the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value (out): the counter reset value + */ +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); + +/* + * Set the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value: the new counter reset value + */ +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); + +/* + * Initialization + */ +struct cpuinfo_x86; +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); + + -struct bts_struct { - u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - u64 from_ip; - u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - u64 jiffies; - } variant; +/* + * The DS context - part of struct thread_struct. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char *ds; + /* the owner of the BTS and PEBS configuration, respectively */ + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; }; -/* Overflow handling mechanisms */ -#define DS_O_SIGNAL 1 /* send overflow signal */ -#define DS_O_WRAP 2 /* wrap around */ - -extern int ds_allocate(void **, size_t); -extern int ds_free(void **); -extern int ds_get_bts_size(void *); -extern int ds_get_bts_end(void *); -extern int ds_get_bts_index(void *); -extern int ds_set_overflow(void *, int); -extern int ds_get_overflow(void *); -extern int ds_clear(void *); -extern int ds_read_bts(void *, int, struct bts_struct *); -extern int ds_write_bts(void *, const struct bts_struct *); -extern unsigned long ds_debugctl_mask(void); -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); +/* called by exit_thread() to free leftover contexts */ +extern void ds_free(struct ds_context *context); + +#else /* CONFIG_X86_DS */ + +#define ds_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 559105220a47..beaccb71628f 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -20,6 +20,7 @@ struct mm_struct; #include #include #include +#include #include #include @@ -415,9 +416,14 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -/* Debug Store - if not 0 points to a DS Save Area configuration; - * goes into MSR_IA32_DS_AREA */ - unsigned long ds_area_msr; +#ifdef CONFIG_X86_DS +/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + struct ds_context *ds_ctx; +#endif /* CONFIG_X86_DS */ +#ifdef CONFIG_X86_PTRACE_BTS +/* the signal to send on a bts buffer overflow */ + unsigned int bts_ovfl_signal; +#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index f224eb3c3157..9bcaa75cbcaf 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -80,8 +80,9 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_PTRACE_BTS +#ifndef __ASSEMBLY__ #include /* configuration/status structure used in PTRACE_BTS_CONFIG and @@ -97,20 +98,20 @@ struct ptrace_bts_config { /* actual size of bts_struct in bytes */ __u32 bts_size; }; -#endif +#endif /* __ASSEMBLY__ */ #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG on buffer overflow instead of wrapping around */ -#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available - instead of failing */ +#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ #define PTRACE_BTS_CONFIG 40 /* Configure branch trace recording. ADDR points to a struct ptrace_bts_config. DATA gives the size of that buffer. - A new buffer is allocated, iff the size changes. + A new buffer is allocated, if requested in the flags. + An overflow signal may only be requested for new buffers. Returns the number of bytes read. */ #define PTRACE_BTS_STATUS 41 @@ -119,7 +120,7 @@ struct ptrace_bts_config { Returns the number of bytes written. */ #define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records. +/* Return the number of available BTS records for draining. DATA and ADDR are ignored. */ #define PTRACE_BTS_GET 43 @@ -139,5 +140,6 @@ struct ptrace_bts_config { BTS records are read from oldest to newest. Returns number of BTS records drained. */ +#endif /* CONFIG_X86_PTRACE_BTS */ #endif diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0b95d6..6303701d18e3 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -125,14 +125,48 @@ struct pt_regs { #endif /* __KERNEL__ */ #endif /* !__i386__ */ + +#ifdef CONFIG_X86_PTRACE_BTS +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from_ip; + __u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + __u64 jiffies; + } variant; +}; +#endif /* CONFIG_X86_PTRACE_BTS */ + #ifdef __KERNEL__ -/* the DS BTS struct is used for ptrace as well */ -#include +#include +struct cpuinfo_x86; struct task_struct; +#ifdef CONFIG_X86_PTRACE_BTS +extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); +#else +#define ptrace_bts_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_PTRACE_BTS */ extern unsigned long profile_pc(struct pt_regs *regs); -- cgit v1.2.3 From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/percpu.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778e916c..69e5c1182fde 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -348,6 +348,7 @@ . = ALIGN(align); \ __per_cpu_start = .; \ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ } \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4cdd393e71e1..2edacc8e6b8b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,12 +23,19 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.page_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -- cgit v1.2.3 From 70ef6d595b6e51618a0cbe44b848d8c9db11a010 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 29 May 2008 18:41:04 +0800 Subject: x86: get irq for hpet timer HPET timer's IRQ is 0 by default. So we have to select which irq will be used by these timers. We wait to set the timer's irq until we really open it in order to reduce the chance of conflicting with other device. Signed-off-by: Kevin Hao Signed-off-by: Ingo Molnar --- include/linux/hpet.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 2dc29ce6c8e4..6d2626b63a9a 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -37,6 +37,7 @@ struct hpet { #define hpet_compare _u1._hpet_compare #define HPET_MAX_TIMERS (32) +#define HPET_MAX_IRQ (32) /* * HPET general capabilities register @@ -64,7 +65,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) -- cgit v1.2.3 From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: MM: virtual address debug Add some (configurable) expensive sanity checking to catch wrong address translations on x86. - create linux/mmdebug.h file to be able include this file in asm headers to not get unsolvable loops in header files - __phys_addr on x86_32 became a function in ioremap.c since PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined if declared in page_32.h - add __phys_addr_const for initializing doublefault_tss.__cr3 Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2. Contains Andi's enable numa virtual address debug patch. Signed-off-by: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar --- include/asm-x86/mmzone_64.h | 2 +- include/asm-x86/page_32.h | 3 ++- include/linux/mm.h | 7 +------ include/linux/mmdebug.h | 18 ++++++++++++++++++ 4 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 include/linux/mmdebug.h (limited to 'include') diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index 594bd0dc1d08..facde3e5314f 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -7,7 +7,7 @@ #ifdef CONFIG_NUMA -#define VIRTUAL_BUG_ON(x) +#include #include diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 424e82f8ae27..9159bfb9dcf9 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -64,7 +64,8 @@ typedef struct page *pgtable_t; #endif #ifndef __ASSEMBLY__ -#define __phys_addr(x) ((x) - PAGE_OFFSET) +#define __phys_addr_const(x) ((x) - PAGE_OFFSET) +extern unsigned long __phys_addr(unsigned long); #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab01..3414a8813e97 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -210,12 +211,6 @@ struct inode; */ #include -#ifdef CONFIG_DEBUG_VM -#define VM_BUG_ON(cond) BUG_ON(cond) -#else -#define VM_BUG_ON(condition) do { } while(0) -#endif - /* * Methods to modify the page usage count. * diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h new file mode 100644 index 000000000000..860ed1a71bbe --- /dev/null +++ b/include/linux/mmdebug.h @@ -0,0 +1,18 @@ +#ifndef LINUX_MM_DEBUG_H +#define LINUX_MM_DEBUG_H 1 + +#include + +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(cond) do { } while(0) +#endif + +#ifdef CONFIG_DEBUG_VIRTUAL +#define VIRTUAL_BUG_ON(cond) BUG_ON(cond) +#else +#define VIRTUAL_BUG_ON(cond) do { } while(0) +#endif + +#endif -- cgit v1.2.3 From a1bf9631be7332ce0641e299ddafad2d8223100f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: x86, MM: virtual address debug, v2 I've removed the test from phys_to_nid and made a function from __phys_addr only when the debugging is enabled (on x86_32). Signed-off-by: Jiri Slaby Cc: tglx@linutronix.de Cc: hpa@zytor.com Cc: Mike Travis Cc: Nick Piggin Cc: Cc: linux-mm@kvack.org Cc: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar --- include/asm-x86/mmzone_64.h | 1 - include/asm-x86/page_32.h | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index facde3e5314f..5e3a6cbddb49 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -29,7 +29,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { unsigned nid; VIRTUAL_BUG_ON(!memnodemap); - VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize); nid = memnodemap[addr >> memnode_shift]; VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); return nid; diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 9159bfb9dcf9..71a2e424e584 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -65,7 +65,11 @@ typedef struct page *pgtable_t; #ifndef __ASSEMBLY__ #define __phys_addr_const(x) ((x) - PAGE_OFFSET) +#ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); +#else +#define __phys_addr(x) ((x) - PAGE_OFFSET) +#endif #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM -- cgit v1.2.3 From 7aa413def76146f7b3784228556d9e4bc562eab3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 19 Jun 2008 13:28:11 +0200 Subject: x86, MM: virtual address debug, cleanups Signed-off-by: Ingo Molnar --- include/linux/mmdebug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 860ed1a71bbe..8a5509877192 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -6,13 +6,13 @@ #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) #else -#define VM_BUG_ON(cond) do { } while(0) +#define VM_BUG_ON(cond) do { } while (0) #endif #ifdef CONFIG_DEBUG_VIRTUAL #define VIRTUAL_BUG_ON(cond) BUG_ON(cond) #else -#define VIRTUAL_BUG_ON(cond) do { } while(0) +#define VIRTUAL_BUG_ON(cond) do { } while (0) #endif #endif -- cgit v1.2.3 From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b847551..3ab07e425583 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e425583..c4e96eb29617 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; -- cgit v1.2.3 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 120 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb29617..8a0238dd2c11 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c11..324bbca85a26 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3 From 72b1e22dfcad1daca6906148fd956ffe404bb0bc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:45 -0700 Subject: x64, x2apic/intr-remap: generic irq migration support from process context Generic infrastructure for migrating the irq from the process context in the presence of CONFIG_GENERIC_PENDING_IRQ. This will be used later for migrating irq in the presence of interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..c211984b55e5 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) -- cgit v1.2.3 From d94d93ca5cc36cd78c532def62772c98fe8ba5d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:46 -0700 Subject: x64, x2apic/intr-remap: 8259 specific mask/unmask routines 8259 specific mask/unmask routines which be used later while enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/i8259.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 2f98df91f1f2..31112b6c595b 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; +extern void mask_8259A(void); +extern void unmask_8259A(void); + #endif /* __ASM_I8259_H__ */ -- cgit v1.2.3 From 4dc2f96cacd1e74c688f94348a3bfd0a980817d5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:47 -0700 Subject: x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Generic ioapic specific routines which be used later during enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/io_apic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bbcb5fd..1c4a99d882f5 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -183,6 +183,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); +#ifdef CONFIG_X86_64 +extern int save_mask_IO_APIC_setup(void); +extern void restore_IO_APIC_setup(void); +extern void reinit_intr_remapped_IO_APIC(int); +#endif + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -- cgit v1.2.3 From 0c81c746f9bdbfaafe64322d540c8b7b59c27314 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:48 -0700 Subject: x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Move the read_apic_id() to genapic routines. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/genapic_64.h | 1 + include/asm-x86/mach-default/mach_apic.h | 1 + include/asm-x86/mach-default/mach_apicdef.h | 3 ++- include/asm-x86/smp.h | 4 +--- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 647e4e5c2580..d567abc347a9 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -27,6 +27,7 @@ struct genapic { /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); + unsigned int (*read_apic_id)(void); }; extern struct genapic *genapic; diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 0b2cde5e1b74..d172c554ab9f 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -30,6 +30,7 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) +#define read_apic_id (genapic->read_apic_id) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index e4b29ba37de6..453b58a67e29 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -5,8 +5,9 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24)&0xFFu) +#define GET_APIC_ID(x) (x) #define SET_APIC_ID(x) (((x)<<24)) +#define GET_XAPIC_ID(x) (((x) >> 24) & 0xFFu) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 2e221f1ce0b2..9848715fbd9e 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -169,12 +169,10 @@ static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); } -#else -extern unsigned int read_apic_id(void); #endif -# ifdef APIC_DEFINITION +# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else # include -- cgit v1.2.3 From 1b374e4d6f8b3eb2fcd034fcc24ea8ba1dfde7aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:49 -0700 Subject: x64, x2apic/intr-remap: basic apic ops support Introduce basic apic operations which handle the apic programming. This will be used later to introduce another specific operations for x2apic. For the perfomance critial accesses like IPI's, EOI etc, we use the native operations as they are already referenced by different indirections like genapic, irq_chip etc. 64bit Paravirt ops can also define their apic operations accordingly. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/apic.h | 43 +++++++++++++++++++++++++++++++++++++------ include/asm-x86/ipi.h | 16 +++++++++++----- include/asm-x86/paravirt.h | 2 ++ include/asm-x86/smp.h | 2 +- 4 files changed, 51 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 4e2c1e517f06..6fda195337c5 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -47,32 +47,59 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else -#define apic_write native_apic_write -#define apic_write_atomic native_apic_write_atomic -#define apic_read native_apic_read +#ifndef CONFIG_X86_64 +#define apic_write native_apic_mem_write +#define apic_write_atomic native_apic_mem_write_atomic +#define apic_read native_apic_mem_read +#endif #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif extern int is_vsmp_box(void); -static inline void native_apic_write(unsigned long reg, u32 v) +static inline void native_apic_mem_write(u32 reg, u32 v) { *((volatile u32 *)(APIC_BASE + reg)) = v; } -static inline void native_apic_write_atomic(unsigned long reg, u32 v) +static inline void native_apic_mem_write_atomic(u32 reg, u32 v) { (void)xchg((u32 *)(APIC_BASE + reg), v); } -static inline u32 native_apic_read(unsigned long reg) +static inline u32 native_apic_mem_read(u32 reg) { return *((volatile u32 *)(APIC_BASE + reg)); } +#ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); +extern void apic_icr_write(u32 low, u32 id); +#else + +struct apic_ops { + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + void (*write_atomic)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); +}; + +extern struct apic_ops *apic_ops; + +#define apic_read (apic_ops->read) +#define apic_write (apic_ops->write) +#define apic_write_atomic (apic_ops->write_atomic) +#define apic_icr_read (apic_ops->icr_read) +#define apic_icr_write (apic_ops->icr_write) +#define apic_wait_icr_idle (apic_ops->wait_icr_idle) +#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +#endif + extern int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC @@ -95,7 +122,11 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ +#ifdef CONFIG_X86_32 apic_write_around(APIC_EOI, 0); +#else + native_apic_mem_write(APIC_EOI, 0); +#endif } extern int lapic_get_maxlvt(void); diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index 196d63c28aa4..3d8d6a6c1f8e 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { @@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Wait for idle. */ - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * No need to touch the target chip field @@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } /* @@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, if (unlikely(vector == NMI_VECTOR)) safe_apic_wait_icr_idle(); else - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); + native_apic_mem_write(APIC_ICR2, cfg); /* * program the ICR @@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index ef5e8ec6a6ab..10adac02e6db 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -891,6 +891,7 @@ static inline void slow_down_io(void) /* * Basic functions accessing APICs. */ +#ifndef CONFIG_X86_64 static inline void apic_write(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); @@ -905,6 +906,7 @@ static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } +#endif static inline void setup_boot_clock(void) { diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 9848715fbd9e..d9d007d22785 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -158,13 +158,13 @@ extern int safe_smp_processor_id(void); #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 static inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#ifndef CONFIG_X86_64 static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); -- cgit v1.2.3 From 32e1d0a0651004f5fe47f85a2a5c725ad579a90c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:50 -0700 Subject: x64, x2apic/intr-remap: cpuid bits for x2apic feature cpuid feature for x2apic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 75ef959db329..5be9510ee012 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -90,6 +90,7 @@ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -188,6 +189,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 -- cgit v1.2.3 From 13c88fb58d0112d47f7839f24a755715c6218822 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:52 -0700 Subject: x64, x2apic/intr-remap: x2apic ops for x2apic mode support x2apic ops for x2apic mode support. This uses MSR interface and differs slightly from the xapic register layout. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/apic.h | 22 ++++++++++++++++++++++ include/asm-x86/apicdef.h | 3 +++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 6fda195337c5..bb54928373ca 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -73,6 +75,26 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u32 low, high; + + if (reg == APIC_DFR) + return -1; + + rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); + return low; +} + #ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 6b9008c78731..bcae297b30b2 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -105,6 +105,7 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 #define APIC_TDR_DIV_TMBASE (1 << 2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 @@ -128,6 +129,8 @@ #define APIC_EILVT3 0x530 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 -- cgit v1.2.3 From cff73a6ffaed726780b001937d2a42efde553922 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:53 -0700 Subject: x64, x2apic/intr-remap: introcude self IPI to genapic routines Introduce self IPI op for genapic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/genapic_64.h | 2 ++ include/asm-x86/hw_irq.h | 2 ++ include/asm-x86/mach-default/mach_apic.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index d567abc347a9..6777d71aabc9 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -24,6 +24,7 @@ struct genapic { void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); @@ -36,6 +37,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; extern int acpi_madt_oem_check(char *, char *); +extern void apic_send_IPI_self(int vector); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 18f067c310f7..2ae47e7c1063 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -72,7 +72,9 @@ extern void enable_IO_APIC(void); #endif /* IPI functions */ +#ifdef CONFIG_X86_32 extern void send_IPI_self(int vector); +#endif extern void send_IPI(int dest, int vector); /* Statistics */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index d172c554ab9f..e06d23975d6a 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -31,6 +31,7 @@ static inline cpumask_t target_cpus(void) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id (genapic->read_apic_id) +#define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio -- cgit v1.2.3 From 12a67cf6851871ca8df42025c94f140c303d0f7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:54 -0700 Subject: x64, x2apic/intr-remap: x2apic cluster mode support x2apic cluster mode support. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/genapic_64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 6777d71aabc9..232460305877 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -35,6 +35,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -- cgit v1.2.3 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/apic.h | 9 +++++++++ include/asm-x86/io_apic.h | 14 ++++++++++++++ include/asm-x86/irq_remapping.h | 8 ++++++++ include/linux/dmar.h | 1 + 4 files changed, 32 insertions(+) create mode 100644 include/asm-x86/irq_remapping.h (limited to 'include') diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index bb54928373ca..aa746704a5c9 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -134,6 +134,15 @@ extern int get_physical_broadcast(void); # define apic_write_around(x, y) apic_write_atomic((x), (y)) #endif +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + static inline void ack_APIC_irq(void) { /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 1c4a99d882f5..8dc2622714c8 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -107,6 +107,20 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + #ifdef CONFIG_X86_IO_APIC /* diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h new file mode 100644 index 000000000000..78242c6ffa58 --- /dev/null +++ b/include/asm-x86/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IRQ_REMAPPING_H +#define _ASM_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a26..bf41ffa74705 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v1.2.3 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/msidef.h | 4 ++++ include/linux/dmar.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29ce426d..57fd85935e5a 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -48,4 +48,8 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_MSIDEF_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa74705..c360c558e59e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v1.2.3 From 6e1cb38a2aef7680975e71f23de187859ee8b158 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:58 -0700 Subject: x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping x2apic support. Interrupt-remapping must be enabled before enabling x2apic, this is needed to ensure that IO interrupts continue to work properly after the cpu mode is changed to x2apic(which uses 32bit extended physical/cluster apic id). On systems where apicid's are > 255, BIOS can handover the control to OS in x2apic mode. Or if the OS handover was in legacy xapic mode, check if it is capable of x2apic mode. And if we succeed in enabling Interrupt-remapping, then we can enable x2apic mode in the CPU. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/apic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index aa746704a5c9..129752dd2525 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -100,6 +100,11 @@ extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); extern void apic_icr_write(u32 low, u32 id); #else +extern int x2apic, x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void enable_IR_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); struct apic_ops { u32 (*read)(u32 reg); -- cgit v1.2.3 From 2d9579a124d746a3e0e0ba45e57d80800ee80807 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:59 -0700 Subject: x64, x2apic/intr-remap: support for x2apic physical mode support x2apic Physical mode support. By default we will use x2apic cluster mode. x2apic physical mode can be selected using "x2apic_phys" boot parameter. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/asm-x86/genapic_64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 232460305877..122b9242a40f 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -36,6 +36,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -- cgit v1.2.3 From ad66dd340f561bdde2285992314d9e4fd9b6191e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:56 -0700 Subject: x2apic: xen64 paravirt basic apic ops Define the Xen specific basic apic ops, in additon to paravirt apic ops, with some misc warning fixes. Signed-off-by: Suresh Siddha Cc: Jeremy Fitzhardinge Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- include/asm-x86/paravirt.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 10adac02e6db..5e34d26aa3b5 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -200,13 +200,15 @@ stru