summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2013-08-06 16:04:13 -0400
committerChris Metcalf <cmetcalf@tilera.com>2013-08-13 16:04:10 -0400
commit2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9 (patch)
treeee33ba7e452e8614130a811211eb2383a3133194
parentf10da5472c6907a3fbd6886224b36d21925ce47b (diff)
downloadlinux-2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9.tar.gz
linux-2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9.tar.bz2
linux-2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9.zip
tile: fast-path unaligned memory access for tilegx
This change enables unaligned userspace memory access via a kernel fast path on tilegx. The kernel tracks user PC/instruction pairs per-thread using a direct-mapped cache in userspace. The cache maps those PC/instruction pairs to JIT'ed instruction sequences that load or store using byte-wide load store intructions and then synthesize 2-, 4- or 8-byte load or store results. Once an instruction has been seen to generate an unaligned access once, subsequent hits on that instruction typically require overhead of only around 50 cycles if cache and TLB is hot. We support the prctl() PR_GET_UNALIGN / PR_SET_UNALIGN sys call to enable or disable unaligned fixups on a per-process basis. To do this we pull some of the tilepro unaligned support out of the single_step.c file; tilepro uses instruction disassembly for both single-step and unaligned access support. Since tilegx actually has hardware singlestep support, though, it's cleaner to keep the tilegx unaligned access code in a separate file. While we're at it, properly rename the tilepro-specific types, etc., to have tilepro suffixes instead of generic tile suffixes. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
-rw-r--r--arch/tile/include/asm/processor.h7
-rw-r--r--arch/tile/include/asm/ptrace.h3
-rw-r--r--arch/tile/include/asm/sections.h4
-rw-r--r--arch/tile/include/asm/thread_info.h6
-rw-r--r--arch/tile/include/asm/traps.h11
-rw-r--r--arch/tile/kernel/Makefile3
-rw-r--r--arch/tile/kernel/asm-offsets.c6
-rw-r--r--arch/tile/kernel/intvec_32.S1
-rw-r--r--arch/tile/kernel/intvec_64.S231
-rw-r--r--arch/tile/kernel/proc.c2
-rw-r--r--arch/tile/kernel/process.c21
-rw-r--r--arch/tile/kernel/ptrace.c4
-rw-r--r--arch/tile/kernel/single_step.c116
-rw-r--r--arch/tile/kernel/unaligned.c1609
-rw-r--r--arch/tile/mm/fault.c41
15 files changed, 1996 insertions, 69 deletions
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index b3f104953da2..cda27243fb09 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -247,6 +247,13 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(task) task_pc(task)
#define KSTK_ESP(task) task_sp(task)
+/* Fine-grained unaligned JIT support */
+#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
+#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
+
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
/* Standard format for printing registers and other word-size data. */
#ifdef __tilegx__
# define REGFMT "0x%016lx"
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index fd412260aff7..73b681b566f7 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -79,8 +79,7 @@ extern void single_step_execve(void);
struct task_struct;
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code);
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
#ifdef __tilegx__
/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index 7d8a935a9238..cc95276ef9c9 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -28,7 +28,9 @@ extern char __w1data_begin[], __w1data_end[];
/* Not exactly sections, but PC comparison points in the code. */
extern char __rt_sigreturn[], __rt_sigreturn_end[];
-#ifndef __tilegx__
+#ifdef __tilegx__
+extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
+#else
extern char sys_cmpxchg[], __sys_cmpxchg_end[];
extern char __sys_cmpxchg_grab_lock[];
extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index d1733dee98a2..b8aa6df3e102 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -39,6 +39,11 @@ struct thread_info {
struct restart_block restart_block;
struct single_step_state *step_state; /* single step state
(if non-zero) */
+ int align_ctl; /* controls unaligned access */
+#ifdef __tilegx__
+ unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
+ void __user *unalign_jit_base; /* unalign fixup JIT base */
+#endif
};
/*
@@ -56,6 +61,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \
}, \
.step_state = NULL, \
+ .align_ctl = 0, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index e28c3df4176a..5f172b2403a6 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -15,6 +15,7 @@
#ifndef _ASM_TILE_TRAPS_H
#define _ASM_TILE_TRAPS_H
+#ifndef __ASSEMBLY__
#include <arch/chip.h>
/* mm/fault.c */
@@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num);
/* kernel/intvec_64.S */
void fill_ra_stack(void);
+
+/* Handle unalign data fixup. */
+extern void do_unaligned(struct pt_regs *regs, int vecnum);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef __tilegx__
+/* 128 byte JIT per unalign fixup. */
+#define UNALIGN_JIT_SHIFT 7
#endif
#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 5334be8e2538..6846c4ef5bf1 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -5,7 +5,8 @@
extra-y := vmlinux.lds head_$(BITS).o
obj-y := backtrace.o entry.o irq.o messaging.o \
pci-dma.o proc.o process.o ptrace.o reboot.o \
- setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
+ setup.o signal.o single_step.o stack.o sys.o \
+ sysfs.o time.o traps.o unaligned.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
obj-$(CONFIG_HARDWALL) += hardwall.o
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
index 8fff4757fffe..8652b0be4685 100644
--- a/arch/tile/kernel/asm-offsets.c
+++ b/arch/tile/kernel/asm-offsets.c
@@ -60,6 +60,12 @@ void foo(void)
offsetof(struct thread_info, homecache_cpu));
DEFINE(THREAD_INFO_STEP_STATE_OFFSET,
offsetof(struct thread_info, step_state));
+#ifdef __tilegx__
+ DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET,
+ offsetof(struct thread_info, unalign_jit_base));
+ DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET,
+ offsetof(struct thread_info, unalign_jit_tmp));
+#endif
DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
offsetof(struct task_struct, thread.ksp));
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 25966af74a28..388061319c4c 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1420,7 +1420,6 @@ handle_ill:
{
lw r0, r0 /* indirect thru thread_info to get task_info*/
addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
- move r2, zero /* load error code into r2 */
}
jal send_sigtrap /* issue a SIGTRAP */
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 85d483957027..884af9ea5bed 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -17,11 +17,13 @@
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/unistd.h>
+#include <linux/init.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/asm-offsets.h>
#include <asm/types.h>
+#include <asm/traps.h>
#include <asm/signal.h>
#include <hv/hypervisor.h>
#include <arch/abi.h>
@@ -98,6 +100,189 @@
}
.endm
+ /*
+ * Unalign data exception fast handling: In order to handle
+ * unaligned data access, a fast JIT version is generated and stored
+ * in a specific area in user space. We first need to do a quick poke
+ * to see if the JIT is available. We use certain bits in the fault
+ * PC (3 to 9 is used for 16KB page size) as index to address the JIT
+ * code area. The first 64bit word is the fault PC, and the 2nd one is
+ * the fault bundle itself. If these 2 words both match, then we
+ * directly "iret" to JIT code. If not, a slow path is invoked to
+ * generate new JIT code. Note: the current JIT code WILL be
+ * overwritten if it existed. So, ideally we can handle 128 unalign
+ * fixups via JIT. For lookup efficiency and to effectively support
+ * tight loops with multiple unaligned reference, a simple
+ * direct-mapped cache is used.
+ *
+ * SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
+ * SPR_EX_CONTEXT_K_1 has ICS set.
+ * SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
+ * SPR_EX_CONTEXT_0_1 = 0.
+ */
+ .macro int_hand_unalign_fast vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ /* Put r3 in SPR_SYSTEM_SAVE_K_1. */
+ mtspr SPR_SYSTEM_SAVE_K_1, r3
+
+ mfspr r3, SPR_EX_CONTEXT_K_1
+ /*
+ * Examine if exception comes from user without ICS set.
+ * If not, just go directly to the slow path.
+ */
+ bnez r3, hand_unalign_slow_nonuser
+
+ mfspr r3, SPR_SYSTEM_SAVE_K_0
+
+ /* Get &thread_info->unalign_jit_tmp[0] in r3. */
+ mm r3, zero, LOG2_THREAD_SIZE, 63
+#if THREAD_SIZE < 65536
+ addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
+#else
+ addli r3, r3, -(PAGE_SIZE/2)
+ addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
+#endif
+
+ /*
+ * Save r0, r1, r2 into thread_info array r3 points to
+ * from low to high memory in order.
+ */
+ st_add r3, r0, 8
+ st_add r3, r1, 8
+ {
+ st_add r3, r2, 8
+ andi r2, sp, 7
+ }
+
+ /* Save stored r3 value so we can revert it on a page fault. */
+ mfspr r1, SPR_SYSTEM_SAVE_K_1
+ st r3, r1
+
+ {
+ /* Generate a SIGBUS if sp is not 8-byte aligned. */
+ bnez r2, hand_unalign_slow_badsp
+ }
+
+ /*
+ * Get the thread_info in r0; load r1 with pc. Set the low bit of sp
+ * as an indicator to the page fault code in case we fault.
+ */
+ {
+ ori sp, sp, 1
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ }
+
+ /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
+ {
+ addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
+ (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
+ bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
+ }
+
+ /* Load the jit_info; multiply r2 by 128. */
+ {
+ ld r0, r0
+ shli r2, r2, UNALIGN_JIT_SHIFT
+ }
+
+ /*
+ * If r0 is NULL, the JIT page is not mapped, so go to slow path;
+ * add offset r2 to r0 at the same time.
+ */
+ {
+ beqz r0, hand_unalign_slow
+ add r2, r0, r2
+ }
+
+ /*
+ * We are loading from userspace (both the JIT info PC and
+ * instruction word, and the instruction word we executed)
+ * and since either could fault while holding the interrupt
+ * critical section, we must tag this region and check it in
+ * do_page_fault() to handle it properly.
+ */
+ENTRY(__start_unalign_asm_code)
+
+ /* Load first word of JIT in r0 and increment r2 by 8. */
+ ld_add r0, r2, 8
+
+ /*
+ * Compare the PC with the 1st word in JIT; load the fault bundle
+ * into r1.
+ */
+ {
+ cmpeq r0, r0, r1
+ ld r1, r1
+ }
+
+ /* Go to slow path if PC doesn't match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * Load the 2nd word of JIT, which is supposed to be the fault
+ * bundle for a cache hit. Increment r2; after this bundle r2 will
+ * point to the potential start of the JIT code we want to run.
+ */
+ ld_add r0, r2, 8
+
+ /* No further accesses to userspace are done after this point. */
+ENTRY(__end_unalign_asm_code)
+
+ /* Compare the real bundle with what is saved in the JIT area. */
+ {
+ cmpeq r0, r1, r0
+ mtspr SPR_EX_CONTEXT_0_1, zero
+ }
+
+ /* Go to slow path if the fault bundle does not match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * A cache hit is found.
+ * r2 points to start of JIT code (3rd word).
+ * r0 is the fault pc.
+ * r1 is the fault bundle.
+ * Reset the low bit of sp.
+ */
+ {
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ andi sp, sp, ~1
+ }
+
+ /* Write r2 into EX_CONTEXT_K_0 and increment PC. */
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r2
+ addi r0, r0, 8
+ }
+
+ /*
+ * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
+ * user with ICS set. This way, if the JIT fixup causes another
+ * unalign exception (which shouldn't be possible) the user
+ * process will be terminated with SIGBUS. Also, our fixup will
+ * run without interleaving with external interrupts.
+ * Each fixup is at most 14 bundles, so it won't hold ICS for long.
+ */
+ {
+ movei r1, PL_ICS_EX1(USER_PL, 1)
+ mtspr SPR_EX_CONTEXT_0_0, r0
+ }
+
+ {
+ mtspr SPR_EX_CONTEXT_K_1, r1
+ addi r3, r3, -(3 * 8)
+ }
+
+ /* Restore r0..r3. */
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld_add r2, r3, 8
+ ld r3, r3
+
+ iret
+ ENDPROC(intvec_\vecname)
+ .endm
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
@@ -118,15 +303,21 @@ intvec_feedback:
* The "processing" argument specifies the code for processing
* the interrupt. Defaults to "handle_interrupt".
*/
- .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
- .org (\vecnum << 8)
+ .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
intvec_\vecname:
/* Temporarily save a register so we have somewhere to work. */
mtspr SPR_SYSTEM_SAVE_K_1, r0
mfspr r0, SPR_EX_CONTEXT_K_1
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ /*
+ * The unalign data fastpath code sets the low bit in sp to
+ * force us to reset it here on fault.
+ */
+ {
+ blbs sp, 2f
+ andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ }
.ifc \vecnum, INT_DOUBLE_FAULT
/*
@@ -176,7 +367,7 @@ intvec_\vecname:
}
.endif
-
+2:
/*
* SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
* the current stack top in the higher bits. So we recover
@@ -1223,10 +1414,31 @@ STD_ENTRY(_sys_clone)
j sys_clone
STD_ENDPROC(_sys_clone)
-/* The single-step support may need to read all the registers. */
+ /*
+ * Recover r3, r2, r1 and r0 here saved by unalign fast vector.
+ * The vector area limit is 32 bundles, so we handle the reload here.
+ * r0, r1, r2 are in thread_info from low to high memory in order.
+ * r3 points to location the original r3 was saved.
+ * We put this code in the __HEAD section so it can be reached
+ * via a conditional branch from the fast path.
+ */
+ __HEAD
+hand_unalign_slow:
+ andi sp, sp, ~1
+hand_unalign_slow_badsp:
+ addi r3, r3, -(3 * 8)
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld r2, r3
+hand_unalign_slow_nonuser:
+ mfspr r3, SPR_SYSTEM_SAVE_K_1
+ __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
+
+/* The unaligned data support needs to read all the registers. */
int_unalign:
push_extra_callee_saves r0
- j do_trap
+ j do_unaligned
+ENDPROC(hand_unalign_slow)
/* Fill the return address stack with nonzero entries. */
STD_ENTRY(fill_ra_stack)
@@ -1240,6 +1452,11 @@ STD_ENTRY(fill_ra_stack)
4: jrp r0
STD_ENDPROC(fill_ra_stack)
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+ __int_hand \vecnum, \vecname, \c_routine, \processing
+ .endm
+
/* Include .intrpt1 array of interrupt vectors */
.section ".intrpt1", "ax"
@@ -1272,7 +1489,7 @@ STD_ENTRY(fill_ra_stack)
int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
int_hand INT_SWINT_0, SWINT_0, do_trap
int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
- int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index dafc447b5125..681100c59fda 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -113,7 +113,6 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory
*/
-#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
static ctl_table unaligned_subtable[] = {
{
.procname = "enabled",
@@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void)
}
arch_initcall(proc_sys_tile_init);
-#endif
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 8d6c51d55762..25678b83b747 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -33,6 +33,7 @@
#include <asm/syscalls.h>
#include <asm/traps.h>
#include <asm/setup.h>
+#include <asm/uaccess.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
@@ -147,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
task_thread_info(p)->step_state = NULL;
+#ifdef __tilegx__
+ /*
+ * Do not clone unalign jit fixup from the parent; each thread
+ * must allocate its own on demand.
+ */
+ task_thread_info(p)->unalign_jit_base = NULL;
+#endif
+
/*
* Copy the registers onto the kernel stack so the
* return-from-interrupt code will reload it into registers.
@@ -205,6 +214,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
return 0;
}
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+ task_thread_info(tsk)->align_ctl = val;
+ return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+ return put_user(task_thread_info(tsk)->align_ctl,
+ (unsigned int __user *)adr);
+}
+
/*
* Return "current" if it looks plausible, or else a pointer to a dummy.
* This can be helpful if we are just trying to emit a clean panic.
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index 0f83ed4602b2..bac187498d61 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -272,7 +272,7 @@ void do_syscall_trace_exit(struct pt_regs *regs)
trace_sys_exit(regs, regs->regs[0]);
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
{
struct siginfo info;
@@ -288,5 +288,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
/* Handle synthetic interrupt delivered only by the simulator. */
void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
{
- send_sigtrap(current, regs, fault_num);
+ send_sigtrap(current, regs);
}
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 27742e87e255..5ef2e9eae5c5 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -12,41 +12,30 @@
* more details.
*
* A code-rewriter that enables instruction single-stepping.
- * Derived from iLib's single-stepping code.
*/
-#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-
-/* These functions are only used on the TILE platform */
+#include <linux/smp.h>
+#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/uaccess.h>
#include <linux/mman.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/prctl.h>
#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <arch/abi.h>
+#include <arch/spr_def.h>
#include <arch/opcode.h>
-#define signExtend17(val) sign_extend((val), 17)
-#define TILE_X1_MASK (0xffffffffULL << 31)
-
-int unaligned_printk;
-static int __init setup_unaligned_printk(char *str)
-{
- long val;
- if (strict_strtol(str, 0, &val) != 0)
- return 0;
- unaligned_printk = val;
- pr_info("Printk for each unaligned data accesses is %s\n",
- unaligned_printk ? "enabled" : "disabled");
- return 1;
-}
-__setup("unaligned_printk=", setup_unaligned_printk);
+#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-unsigned int unaligned_fixup_count;
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
enum mem_op {
MEMOP_NONE,
@@ -56,12 +45,13 @@ enum mem_op {
MEMOP_STORE_POSTINCR
};
-static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
+static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n,
+ s32 offset)
{
- tile_bundle_bits result;
+ tilepro_bundle_bits result;
/* mask out the old offset */
- tile_bundle_bits mask = create_BrOff_X1(-1);
+ tilepro_bundle_bits mask = create_BrOff_X1(-1);
result = n & (~mask);
/* or in the new offset */
@@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
return result;
}
-static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest,
+ int src)
{
- tile_bundle_bits result;
- tile_bundle_bits op;
+ tilepro_bundle_bits result;
+ tilepro_bundle_bits op;
result = n & (~TILE_X1_MASK);
@@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
return result;
}
-static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n)
{
return move_X1(n, TREG_ZERO, TREG_ZERO);
}
-static inline tile_bundle_bits addi_X1(
- tile_bundle_bits n, int dest, int src, int imm)
+static inline tilepro_bundle_bits addi_X1(
+ tilepro_bundle_bits n, int dest, int src, int imm)
{
n &= ~TILE_X1_MASK;
@@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1(
return n;
}
-static tile_bundle_bits rewrite_load_store_unaligned(
+static tilepro_bundle_bits rewrite_load_store_unaligned(
struct single_step_state *state,
- tile_bundle_bits bundle,
+ tilepro_bundle_bits bundle,
struct pt_regs *regs,
enum mem_op mem_op,
int size, int sign_ext)
{
unsigned char __user *addr;
int val_reg, addr_reg, err, val;
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
/* Get address and value registers */
if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
@@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned(
* tilepro hardware would be doing, if it could provide us with the
* actual bad address in an SPR, which it doesn't.
*/
- if (unaligned_fixup == 0) {
+ if (align_ctl == 0) {
siginfo_t info = {
.si_signo = SIGBUS,
.si_code = BUS_ADRALN,
@@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned(
if (err) {
siginfo_t info = {
- .si_signo = SIGSEGV,
- .si_code = SEGV_MAPERR,
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
.si_addr = addr
};
- trace_unhandled_signal("segfault", regs,
- (unsigned long)addr, SIGSEGV);
+ trace_unhandled_signal("bad address for unaligned fixup", regs,
+ (unsigned long)addr, SIGBUS);
force_sig_info(info.si_signo, &info, current);
- return (tile_bundle_bits) 0;
+ return (tilepro_bundle_bits) 0;
}
if (unaligned_printk || unaligned_fixup_count == 0) {
@@ -285,7 +287,7 @@ void single_step_execve(void)
ti->step_state = NULL;
}
-/**
+/*
* single_step_once() - entry point when single stepping has been triggered.
* @regs: The machine register state
*
@@ -304,20 +306,31 @@ void single_step_execve(void)
*/
void single_step_once(struct pt_regs *regs)
{
- extern tile_bundle_bits __single_step_ill_insn;
- extern tile_bundle_bits __single_step_j_insn;
- extern tile_bundle_bits __single_step_addli_insn;
- extern tile_bundle_bits __single_step_auli_insn;
+ extern tilepro_bundle_bits __single_step_ill_insn;
+ extern tilepro_bundle_bits __single_step_j_insn;
+ extern tilepro_bundle_bits __single_step_addli_insn;
+ extern tilepro_bundle_bits __single_step_auli_insn;
struct thread_info *info = (void *)current_thread_info();
struct single_step_state *state = info->step_state;
int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
- tile_bundle_bits __user *buffer, *pc;
- tile_bundle_bits bundle;
+ tilepro_bundle_bits __user *buffer, *pc;
+ tilepro_bundle_bits bundle;
int temp_reg;
int target_reg = TREG_LR;
int err;
enum mem_op mem_op = MEMOP_NONE;
int size = 0, sign_ext = 0; /* happy compiler */
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
asm(
" .pushsection .rodata.single_step\n"
@@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs)
if (regs->faultnum == INT_SWINT_1)
regs->pc -= 8;
- pc = (tile_bundle_bits __user *)(regs->pc);
+ pc = (tilepro_bundle_bits __user *)(regs->pc);
if (get_user(bundle, pc) != 0) {
pr_err("Couldn't read instruction at %p trying to step\n", pc);
return;
@@ -627,9 +640,9 @@ void single_step_once(struct pt_regs *regs)
/*
* Check if we need to rewrite an unaligned load/store.
- * Returning zero is a special value meaning we need to SIGSEGV.
+ * Returning zero is a special value meaning we generated a signal.
*/
- if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+ if (mem_op != MEMOP_NONE && align_ctl >= 0) {
bundle = rewrite_load_store_unaligned(state, bundle, regs,
mem_op, size, sign_ext);
if (bundle == 0)
@@ -668,9 +681,9 @@ void single_step_once(struct pt_regs *regs)
}
/* End with a jump back to the next instruction */
- delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+ delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) -
(unsigned long)buffer) >>
- TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
bundle = __single_step_j_insn;
bundle |= create_JOffLong_X1(delta);
err |= __put_user(bundle, buffer++);
@@ -698,9 +711,6 @@ void single_step_once(struct pt_regs *regs)
}
#else
-#include <linux/smp.h>
-#include <linux/ptrace.h>
-#include <arch/spr_def.h>
static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
@@ -743,10 +753,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
} else if ((*ss_pc != regs->pc) ||
(!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
- ptrace_notify(SIGTRAP);
control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
+ send_sigtrap(current, regs);
}
}
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
new file mode 100644
index 000000000000..b425fb6a480d
--- /dev/null
+++ b/arch/tile/kernel/unaligned.c
@@ -0,0 +1,1609 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * A code-rewriter that handles unaligned exception.
+ */
+
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/compat.h>
+#include <linux/prctl.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <arch/abi.h>
+#include <arch/spr_def.h>
+#include <arch/opcode.h>
+
+
+/*
+ * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
+ * exception is supported out of single_step.c
+ */
+
+int unaligned_printk;
+
+static int __init setup_unaligned_printk(char *str)
+{
+ long val;
+ if (kstrtol(str, 0, &val) != 0)
+ return 0;
+ unaligned_printk = val;
+ pr_info("Printk for each unaligned data accesses is %s\n",
+ unaligned_printk ? "enabled" : "disabled");
+ return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+
+unsigned int unaligned_fixup_count;
+
+#ifdef __tilegx__
+
+/*
+ * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
+ * The 1st 64-bit word saves fault PC address, 2nd word is the fault
+ * instruction bundle followed by 14 JIT bundles.
+ */
+
+struct unaligned_jit_fragment {
+ unsigned long pc;
+ tilegx_bundle_bits bundle;
+ tilegx_bundle_bits insn[14];
+};
+
+/*
+ * Check if a nop or fnop at bundle's pipeline X0.
+ */
+
+static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_X0(bundle) ==
+ NOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)) ||
+ ((get_UnaryOpcodeExtension_X0(bundle) ==
+ FNOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)));
+}
+
+/*
+ * Check if nop or fnop at bundle's pipeline X1.