summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S28
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S5
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S7
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S8
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S8
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S13
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S41
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S42
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S41
-rw-r--r--tools/objtool/arch/x86/decode.c282
-rw-r--r--tools/objtool/arch/x86/include/arch/cfi_regs.h12
-rw-r--r--tools/objtool/builtin-check.c43
-rw-r--r--tools/objtool/builtin-orc.c5
-rw-r--r--tools/objtool/check.c53
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h5
-rw-r--r--tools/objtool/objtool.c64
18 files changed, 384 insertions, 276 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b28e36b7c96b..d0959e7b809f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
#
# x86 crypto algorithms
-OBJECT_FILES_NON_STANDARD := y
-
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 2cf8e94d986a..98e3552b6e03 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -212,10 +212,6 @@ HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsu
#define arg4 %rcx
#define arg5 %r8
#define arg6 %r9
-#define arg7 STACK_OFFSET+8*1(%r14)
-#define arg8 STACK_OFFSET+8*2(%r14)
-#define arg9 STACK_OFFSET+8*3(%r14)
-#define arg10 STACK_OFFSET+8*4(%r14)
#define keysize 2*15*16(arg1)
i = 0
@@ -237,9 +233,6 @@ define_reg j %j
.noaltmacro
.endm
-# need to push 4 registers into stack to maintain
-STACK_OFFSET = 8*4
-
TMP1 = 16*0 # Temporary storage for AAD
TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
TMP3 = 16*2 # Temporary storage for AES State 3
@@ -256,25 +249,22 @@ VARIABLE_OFFSET = 16*8
################################
.macro FUNC_SAVE
- #the number of pushes must equal STACK_OFFSET
push %r12
push %r13
- push %r14
push %r15
- mov %rsp, %r14
-
-
+ push %rbp
+ mov %rsp, %rbp
sub $VARIABLE_OFFSET, %rsp
and $~63, %rsp # align rsp to 64 bytes
.endm
.macro FUNC_RESTORE
- mov %r14, %rsp
+ mov %rbp, %rsp
+ pop %rbp
pop %r15
- pop %r14
pop %r13
pop %r12
.endm
@@ -294,7 +284,7 @@ VARIABLE_OFFSET = 16*8
# combined for GCM encrypt and decrypt functions
# clobbering all xmm registers
-# clobbering r10, r11, r12, r13, r14, r15
+# clobbering r10, r11, r12, r13, r15, rax
.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
vmovdqu AadHash(arg2), %xmm8
vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey
@@ -996,7 +986,7 @@ _partial_block_done_\@:
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
+## arg1, arg2, arg3, arg4 are used as pointers only, not modified
.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
i = (8-\num_initial_blocks)
@@ -1231,7 +1221,7 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
+# arg1, arg2, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
@@ -1944,7 +1934,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
+## arg1, arg2, arg3, arg4 are used as pointers only, not modified
.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
i = (8-\num_initial_blocks)
@@ -2186,7 +2176,7 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
+# arg1, arg2, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 782e9712a1ec..706f70829a07 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -990,6 +990,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
* %rdx: src (32 blocks)
*/
FRAME_BEGIN
+ subq $(16 * 32), %rsp;
vzeroupper;
@@ -1002,7 +1003,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
%ymm15, %rdx, (key_table)(CTX, %r8, 8));
- movq %rsp, %r10;
cmpq %rsi, %rdx;
je .Lcbc_dec_use_stack;
@@ -1015,7 +1015,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
* dst still in-use (because dst == src), so use stack for temporary
* storage.
*/
- subq $(16 * 32), %rsp;
movq %rsp, %rax;
.Lcbc_dec_continue:
@@ -1025,7 +1024,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
vpxor %ymm7, %ymm7, %ymm7;
vinserti128 $1, (%rdx), %ymm7, %ymm7;
vpxor (%rax), %ymm7, %ymm7;
- movq %r10, %rsp;
vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
@@ -1047,6 +1045,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
vzeroupper;
+ addq $(16 * 32), %rsp;
FRAME_END
ret;
SYM_FUNC_END(camellia_cbc_dec_32way)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 884dc767b051..ac1f303eed0f 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -53,7 +53,7 @@
.endm
.macro JMPTBL_ENTRY i
-.word crc_\i - crc_array
+.quad crc_\i
.endm
.macro JNC_LESS_THAN j
@@ -168,10 +168,7 @@ continue_block:
xor crc2, crc2
## branch into array
- lea jump_table(%rip), %bufp
- movzwq (%bufp, %rax, 2), len
- lea crc_array(%rip), %bufp
- lea (%bufp, len, 1), %bufp
+ mov jump_table(,%rax,8), %bufp
JMP_NOSPEC bufp
################################################################
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1e594d60afa5..5eed620f4676 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -645,9 +645,9 @@ _loop3:
RESERVE_STACK = (W_SIZE*4 + 8+24)
/* Align stack */
- mov %rsp, %rbx
+ push %rbp
+ mov %rsp, %rbp
and $~(0x20-1), %rsp
- push %rbx
sub $RESERVE_STACK, %rsp
avx2_zeroupper
@@ -665,8 +665,8 @@ _loop3:
avx2_zeroupper
- add $RESERVE_STACK, %rsp
- pop %rsp
+ mov %rbp, %rsp
+ pop %rbp
pop %r15
pop %r14
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 11efe3a45a1f..5d8415f482bd 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -59,8 +59,6 @@
#define DATA_PTR %rsi /* 2nd arg */
#define NUM_BLKS %rdx /* 3rd arg */
-#define RSPSAVE %rax
-
/* gcc conversion */
#define FRAME_SIZE 32 /* space for 2x16 bytes */
@@ -96,7 +94,8 @@
.text
.align 32
SYM_FUNC_START(sha1_ni_transform)
- mov %rsp, RSPSAVE
+ push %rbp
+ mov %rsp, %rbp
sub $FRAME_SIZE, %rsp
and $~0xF, %rsp
@@ -288,7 +287,8 @@ SYM_FUNC_START(sha1_ni_transform)
pextrd $3, E0, 1*16(DIGEST_PTR)
.Ldone_hash:
- mov RSPSAVE, %rsp
+ mov %rbp, %rsp
+ pop %rbp
ret
SYM_FUNC_END(sha1_ni_transform)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 11ff60c29c8b..4087f7432a7e 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -117,15 +117,13 @@ _XMM_SAVE_SIZE = 0
_INP_END_SIZE = 8
_INP_SIZE = 8
_CTX_SIZE = 8
-_RSP_SIZE = 8
_XFER = 0
_XMM_SAVE = _XFER + _XFER_SIZE
_INP_END = _XMM_SAVE + _XMM_SAVE_SIZE
_INP = _INP_END + _INP_END_SIZE
_CTX = _INP + _INP_SIZE
-_RSP = _CTX + _CTX_SIZE
-STACK_SIZE = _RSP + _RSP_SIZE
+STACK_SIZE = _CTX + _CTX_SIZE
# rotate_Xs
# Rotate values of symbols X0...X3
@@ -533,11 +531,11 @@ SYM_FUNC_START(sha256_transform_rorx)
pushq %r14
pushq %r15
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
+
subq $STACK_SIZE, %rsp
and $-32, %rsp # align rsp to 32 byte boundary
- mov %rax, _RSP(%rsp)
-
shl $6, NUM_BLKS # convert to bytes
jz done_hash
@@ -704,7 +702,8 @@ only_one_block:
done_hash:
- mov _RSP(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
popq %r15
popq %r14
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 684d58c8bc4f..3d8f0fd4eea8 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -76,14 +76,10 @@ tmp0 = %rax
W_SIZE = 80*8
# W[t] + K[t] | W[t+1] + K[t+1]
WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_W = 0
frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
@@ -281,18 +277,18 @@ SYM_FUNC_START(sha512_transform_avx)
test msglen, msglen
je nowork
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, frame_GPRSAVE(%rsp)
- mov %r12, frame_GPRSAVE +8*1(%rsp)
- mov %r13, frame_GPRSAVE +8*2(%rsp)
- mov %r14, frame_GPRSAVE +8*3(%rsp)
- mov %r15, frame_GPRSAVE +8*4(%rsp)
updateblock:
@@ -353,15 +349,16 @@ updateblock:
dec msglen
jnz updateblock
- # Restore GPRs
- mov frame_GPRSAVE(%rsp), %rbx
- mov frame_GPRSAVE +8*1(%rsp), %r12
- mov frame_GPRSAVE +8*2(%rsp), %r13
- mov frame_GPRSAVE +8*3(%rsp), %r14
- mov frame_GPRSAVE +8*4(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
nowork:
ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 3a44bdcfd583..072cb0f0deae 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -102,17 +102,13 @@ SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
CTX_SIZE = 1*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
frame_CTX = frame_INPEND + INPEND_SIZE
-frame_RSPSAVE = frame_CTX + CTX_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_CTX + CTX_SIZE
## assume buffers not aligned
#define VMOVDQ vmovdqu
@@ -570,18 +566,18 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# "blocks" is the message length in SHA512 blocks
########################################################################
SYM_FUNC_START(sha512_transform_rorx)
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, 8*0+frame_GPRSAVE(%rsp)
- mov %r12, 8*1+frame_GPRSAVE(%rsp)
- mov %r13, 8*2+frame_GPRSAVE(%rsp)
- mov %r14, 8*3+frame_GPRSAVE(%rsp)
- mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
@@ -672,15 +668,17 @@ loop2:
done_hash:
-# Restore GPRs
- mov 8*0+frame_GPRSAVE(%rsp), %rbx
- mov 8*1+frame_GPRSAVE(%rsp), %r12
- mov 8*2+frame_GPRSAVE(%rsp), %r13
- mov 8*3+frame_GPRSAVE(%rsp), %r14
- mov 8*4+frame_GPRSAVE(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+
ret
SYM_FUNC_END(sha512_transform_rorx)
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 50812af0b083..bd51c9070bed 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -74,14 +74,10 @@ tmp0 = %rax
W_SIZE = 80*8
WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
frame_W = 0
frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
+frame_size = frame_WK + WK_SIZE
# Useful QWORD "arrays" for simpler memory references
# MSG, DIGEST, K_t, W_t are arrays
@@ -283,18 +279,18 @@ SYM_FUNC_START(sha512_transform_ssse3)
test msglen, msglen
je nowork
+ # Save GPRs
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
# Allocate Stack Space
- mov %rsp, %rax
+ push %rbp
+ mov %rsp, %rbp
sub $frame_size, %rsp
and $~(0x20 - 1), %rsp
- mov %rax, frame_RSPSAVE(%rsp)
-
- # Save GPRs
- mov %rbx, frame_GPRSAVE(%rsp)
- mov %r12, frame_GPRSAVE +8*1(%rsp)
- mov %r13, frame_GPRSAVE +8*2(%rsp)
- mov %r14, frame_GPRSAVE +8*3(%rsp)
- mov %r15, frame_GPRSAVE +8*4(%rsp)
updateblock:
@@ -355,15 +351,16 @@ updateblock:
dec msglen
jnz updateblock
- # Restore GPRs
- mov frame_GPRSAVE(%rsp), %rbx
- mov frame_GPRSAVE +8*1(%rsp), %r12
- mov frame_GPRSAVE +8*2(%rsp), %r13
- mov frame_GPRSAVE +8*3(%rsp), %r14
- mov frame_GPRSAVE +8*4(%rsp), %r15
-
# Restore Stack Pointer
- mov frame_RSPSAVE(%rsp), %rsp
+ mov %rbp, %rsp
+ pop %rbp
+
+ # Restore GPRs
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
nowork:
ret
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 7e8b5bedd946..cedf3ede7545 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -21,17 +21,6 @@
#include <objtool/warn.h>
#include <arch/elf.h>
-static unsigned char op_to_cfi_reg[][2] = {
- {CFI_AX, CFI_R8},
- {CFI_CX, CFI_R9},
- {CFI_DX, CFI_R10},
- {CFI_BX, CFI_R11},
- {CFI_SP, CFI_R12},
- {CFI_BP, CFI_R13},
- {CFI_SI, CFI_R14},
- {CFI_DI, CFI_R15},
-};
-
static int is_x86_64(const struct elf *elf)
{
switch (elf->ehdr.e_machine) {
@@ -87,6 +76,31 @@ unsigned long arch_jump_destination(struct instruction *insn)
return -1; \
else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+/*
+ * Helpers to decode ModRM/SIB:
+ *
+ * r/m| AX CX DX BX | SP | BP | SI DI |
+ * | R8 R9 R10 R11 | R12 | R13 | R14 R15 |
+ * Mod+----------------+-----+-----+---------+
+ * 00 | [r/m] |[SIB]|[IP+]| [r/m] |
+ * 01 | [r/m + d8] |[S+d]| [r/m + d8] |
+ * 10 | [r/m + d32] |[S+D]| [r/m + d32] |
+ * 11 | r/ m |
+ */
+
+#define mod_is_mem() (modrm_mod != 3)
+#define mod_is_reg() (modrm_mod == 3)
+
+#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
+
+#define rm_is(reg) (have_SIB() ? \
+ sib_base == (reg) && sib_index == CFI_SP : \
+ modrm_rm == (reg))
+
+#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg))
+#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg))
+
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -94,12 +108,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
struct list_head *ops_list)
{
struct insn insn;
- int x86_64, sign, ret;
- unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
- rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
- modrm_reg = 0, sib = 0;
+ int x86_64, ret;
+ unsigned char op1, op2,
+ rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
+ modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+ sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
+ u64 imm;
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
@@ -132,23 +148,27 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
if (insn.modrm.nbytes) {
modrm = insn.modrm.bytes[0];
modrm_mod = X86_MODRM_MOD(modrm);
- modrm_reg = X86_MODRM_REG(modrm);
- modrm_rm = X86_MODRM_RM(modrm);
+ modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+ modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b;
}
- if (insn.sib.nbytes)
+ if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
+ /* sib_scale = X86_SIB_SCALE(sib); */
+ sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+ sib_base = X86_SIB_BASE(sib) + 8*rex_b;
+ }
switch (op1) {
case 0x1:
case 0x29:
- if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+ if (rex_w && rm_is_reg(CFI_SP)) {
/* add/sub reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_ADD;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -160,7 +180,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
/* push reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->src.reg = (op1 & 0x7) + 8*rex_b;
op->dest.type = OP_DEST_PUSH;
}
@@ -172,7 +192,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
ADD_OP(op) {
op->src.type = OP_SRC_POP;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.reg = (op1 & 0x7) + 8*rex_b;
}
break;
@@ -190,12 +210,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
*type = INSN_JUMP_CONDITIONAL;
break;
- case 0x81:
- case 0x83:
- if (rex != 0x48)
+ case 0x80 ... 0x83:
+ /*
+ * 1000 00sw : mod OP r/m : immediate
+ *
+ * s - sign extend immediate
+ * w - imm8 / imm32
+ *
+ * OP: 000 ADD 100 AND
+ * 001 OR 101 SUB
+ * 010 ADC 110 XOR
+ * 011 SBB 111 CMP
+ */
+
+ /* 64bit only */
+ if (!rex_w)
+ break;
+
+ /* %rsp target only */
+ if (!rm_is_reg(CFI_SP))
break;
- if (modrm == 0xe4) {
+ imm = insn.immediate.value;
+ if (op1 & 2) { /* sign extend */
+ if (op1 & 1) { /* imm32 */
+ imm <<= 32;
+ imm = (s64)imm >> 32;
+ } else { /* imm8 */
+ imm <<= 56;
+ imm = (s64)imm >> 56;
+ }
+ }
+
+ switch (modrm_reg & 7) {
+ case 5:
+ imm = -imm;
+ /* fallthrough */
+ case 0:
+ /* add/sub imm, %rsp */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = imm;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+
+ case 4:
/* and imm, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_AND;
@@ -205,53 +267,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_SP;
}
break;
- }
- if (modrm == 0xc4)
- sign = 1;
- else if (modrm == 0xec)
- sign = -1;
- else
+ default:
+ /* WARN ? */
break;
-
- /* add/sub imm, %rsp */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value * sign;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
}
+
break;
case 0x89:
- if (rex_w && !rex_r && modrm_reg == 4) {
+ if (!rex_w)
+ break;
+
+ if (modrm_reg == CFI_SP) {
- if (modrm_mod == 3) {
+ if (mod_is_reg()) {
/* mov %rsp, reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.reg = modrm_rm;
}
break;
} else {
- /* skip nontrivial SIB */
- if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
- break;
-
/* skip RIP relative displacement */
- if (modrm_rm == 5 && modrm_mod == 0)
+ if (is_RIP())
break;
+ /* skip nontrivial SIB */
+ if (have_SIB()) {
+ modrm_rm = sib_base;
+ if (sib_index != CFI_SP)
+ break;
+ }
+
/* mov %rsp, disp(%reg) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.reg = modrm_rm;
op->dest.offset = insn.displacement.value;
}
break;
@@ -260,12 +317,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
}
- if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+ if (rm_is_reg(CFI_SP)) {
/* mov reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -274,13 +331,15 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
/* fallthrough */
case 0x88:
- if (!rex_b &&
- (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+ if (!rex_w)
+ break;
+
+ if (rm_is_mem(CFI_BP)) {
/* mov reg, disp(%rbp) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG_INDIRECT;
op->dest.reg = CFI_BP;
op->dest.offset = insn.displacement.value;
@@ -288,12 +347,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
}
- if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+ if (rm_is_mem(CFI_SP)) {
/* mov reg, disp(%rsp) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG_INDIRECT;
op->dest.reg = CFI_SP;
op->dest.offset = insn.displacement.value;
@@ -304,7 +363,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
case 0x8b:
- if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+ if (!rex_w)
+ break;
+
+ if (rm_is_mem(CFI_BP)) {
/* mov disp(%rbp), reg */
ADD_OP(op) {
@@ -312,11 +374,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->src.reg = CFI_BP;
op->src.offset = insn.displacement.value;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.reg = modrm_reg;
}
+ break;
+ }
- } else if (rex_w && !rex_b && sib == 0x24 &&
- modrm_mod != 3 && modrm_rm == 4) {
+ if (rm_is_mem(CFI_SP)) {
/* mov disp(%rsp), reg */
ADD_OP(op) {
@@ -324,75 +387,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->src.reg = CFI_SP;
op->src.offset = insn.displacement.value;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.reg = modrm_reg;
}
+ break;
}
break;
case 0x8d:
- if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
-
- ADD_OP(op) {
- if (!insn.displacement.value) {
- /* lea (%rsp), reg */
- op->src.type = OP_SRC_REG;
- } else {
- /* lea disp(%rsp), reg */
- op->src.type = OP_SRC_ADD;
- op->src.offset = insn.displacement.value;
- }
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
- }
-
- } else if (rex == 0x48 && modrm == 0x65) {
-
- /* lea disp(%rbp), %rsp */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
- }
+ if (mod_is_reg()) {
+ WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset);
+ break;
+ }
- } else if (rex == 0x49 && modrm == 0x62 &&
- insn.displacement.value == -8) {
+ /* skip non 64bit ops */
+ if (!rex_w)
+ break;
- /*
- * lea -0x8(%r10), %rsp
- *
- * Restoring rsp back to its original value after a
- * stack realignment.
- */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R10;
- op->src.offset = -8;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
- }
+ /* skip RIP relative displacement */
+ if (is_RIP())
+ break;
- } else if (rex == 0x49 && modrm == 0x65 &&
- insn.displacement.value == -16) {
+ /* skip nontrivial SIB */
+ if (have_SIB()) {
+ modrm_rm = sib_base;
+ if (sib_index != CFI_SP)
+ break;
+ }
- /*
- * lea -0x10(%r13), %rsp
- *
- * Restoring rsp back to its original value after a
- * stack realignment.
- */
- ADD_OP(op) {
+ /* lea disp(%src), %dst */
+ ADD_OP(op) {
+ op->src.offset = insn.displacement.value;
+ if (!op->src.offset) {
+ /* lea (%src), %dst */
+ op->src.type = OP_SRC_REG;
+ } else {
+ /* lea disp(%src), %dst */
op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R13;
- op->src.offset = -16;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
}
+ op->src.reg = modrm_rm;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = modrm_reg;
}
-
break;
case 0x8f:
@@ -479,9 +515,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
* mov bp, sp
* pop bp
*/
- ADD_OP(op)
- op->dest.type = OP_DEST_LEAVE;
-
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_BP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_BP;
+ }
break;
case 0xe3:
diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517efba8..0579d22c433c 100644
--- a/tools/objtool/arch/x86/include/arch/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
@@ -4,13 +4,13 @@
#define _OBJTOOL_CFI_REGS_H
#define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
+#define CFI_CX 1</