summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-01 13:24:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-01 13:24:43 -0700
commit43aa0a195f06101bcb5d8d711bba0dd24b33a1a0 (patch)
tree0236661db875f519cc80e11fde210fdfc9b2be76
parent595b28fb0c8949463d8ec1e485f36d17c870ddb2 (diff)
parent87c87ecd00c54ecd677798cb49ef27329e0fab41 (diff)
downloadlinux-43aa0a195f06101bcb5d8d711bba0dd24b33a1a0.tar.gz
linux-43aa0a195f06101bcb5d8d711bba0dd24b33a1a0.tar.bz2
linux-43aa0a195f06101bcb5d8d711bba0dd24b33a1a0.zip
Merge tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull objtool updates from Thomas Gleixner: - Improve retpoline code patching by separating it from alternatives which reduces memory footprint and allows to do better optimizations in the actual runtime patching. - Add proper retpoline support for x86/BPF - Address noinstr warnings in x86/kvm, lockdep and paravirtualization code - Add support to handle pv_opsindirect calls in the noinstr analysis - Classify symbols upfront and cache the result to avoid redundant str*cmp() invocations. - Add a CFI hash to reduce memory consumption which also reduces runtime on a allyesconfig by ~50% - Adjust XEN code to make objtool handling more robust and as a side effect to prevent text fragmentation due to placement of the hypercall page. * tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) bpf,x86: Respect X86_FEATURE_RETPOLINE* bpf,x86: Simplify computing label offsets x86,bugs: Unconditionally allow spectre_v2=retpoline,amd x86/alternative: Add debug prints to apply_retpolines() x86/alternative: Try inline spectre_v2=retpoline,amd x86/alternative: Handle Jcc __x86_indirect_thunk_\reg x86/alternative: Implement .retpoline_sites support x86/retpoline: Create a retpoline thunk array x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h x86/asm: Fixup odd GEN-for-each-reg.h usage x86/asm: Fix register order x86/retpoline: Remove unused replacement symbols objtool,x86: Replace alternatives with .retpoline_sites objtool: Shrink struct instruction objtool: Explicitly avoid self modifying code in .altinstr_replacement objtool: Classify symbols objtool: Support pv_opsindirect calls for noinstr x86/xen: Rework the xen_{cpu,irq,mmu}_opsarrays x86/xen: Mark xen_force_evtchn_callback() noinstr x86/xen: Make irq_disable() noinstr ...
-rw-r--r--arch/um/kernel/um_arch.c4
-rw-r--r--arch/x86/include/asm/GEN-for-each-reg.h14
-rw-r--r--arch/x86/include/asm/alternative.h1
-rw-r--r--arch/x86/include/asm/asm-prototypes.h18
-rw-r--r--arch/x86/include/asm/nospec-branch.h72
-rw-r--r--arch/x86/include/asm/paravirt.h31
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h6
-rw-r--r--arch/x86/kernel/alternative.c191
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/irqflags.S2
-rw-r--r--arch/x86/kernel/module.c9
-rw-r--r--arch/x86/kernel/paravirt.c45
-rw-r--r--arch/x86/kernel/sev-shared.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S14
-rw-r--r--arch/x86/kvm/svm/svm.h8
-rw-r--r--arch/x86/kvm/svm/svm_ops.h4
-rw-r--r--arch/x86/kvm/vmx/evmcs.h4
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/net/bpf_jit_comp.c160
-rw-r--r--arch/x86/net/bpf_jit_comp32.c22
-rw-r--r--arch/x86/xen/enlighten_pv.c70
-rw-r--r--arch/x86/xen/irq.c31
-rw-r--r--arch/x86/xen/mmu_pv.c93
-rw-r--r--arch/x86/xen/xen-asm.S79
-rw-r--r--arch/x86/xen/xen-head.S34
-rw-r--r--include/linux/context_tracking.h2
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--tools/objtool/arch/x86/decode.c180
-rw-r--r--tools/objtool/check.c646
-rw-r--r--tools/objtool/elf.c84
-rw-r--r--tools/objtool/include/objtool/arch.h5
-rw-r--r--tools/objtool/include/objtool/cfi.h2
-rw-r--r--tools/objtool/include/objtool/check.h3
-rw-r--r--tools/objtool/include/objtool/elf.h9
-rw-r--r--tools/objtool/include/objtool/objtool.h9
-rw-r--r--tools/objtool/objtool.c22
-rw-r--r--tools/objtool/orc_gen.c15
-rw-r--r--tools/objtool/special.c8
40 files changed, 1157 insertions, 811 deletions
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index a149a5e9a16a..54447690de11 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -421,6 +421,10 @@ void __init check_bugs(void)
os_check_bugs();
}
+void apply_retpolines(s32 *start, s32 *end)
+{
+}
+
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
index 1b07fb102c4e..07949102a08d 100644
--- a/arch/x86/include/asm/GEN-for-each-reg.h
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -1,11 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are in machine order; things rely on that.
+ */
#ifdef CONFIG_64BIT
GEN(rax)
-GEN(rbx)
GEN(rcx)
GEN(rdx)
+GEN(rbx)
+GEN(rsp)
+GEN(rbp)
GEN(rsi)
GEN(rdi)
-GEN(rbp)
GEN(r8)
GEN(r9)
GEN(r10)
@@ -16,10 +21,11 @@ GEN(r14)
GEN(r15)
#else
GEN(eax)
-GEN(ebx)
GEN(ecx)
GEN(edx)
+GEN(ebx)
+GEN(esp)
+GEN(ebp)
GEN(esi)
GEN(edi)
-GEN(ebp)
#endif
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index a3c2315aca12..58eee6402832 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,6 +75,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_retpolines(s32 *start, s32 *end);
struct module;
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4cb726c71ed8..8f80de627c60 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,21 +17,3 @@
extern void cmpxchg8b_emu(void);
#endif
-#ifdef CONFIG_RETPOLINE
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ec2d5c8c6694..cc74dc584836 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -5,12 +5,15 @@
#include <linux/static_key.h>
#include <linux/objtool.h>
+#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
+#define RETPOLINE_THUNK_SIZE 32
+
/*
* Fill the CPU return stack buffer.
*
@@ -118,6 +121,16 @@
".popsection\n\t"
#ifdef CONFIG_RETPOLINE
+
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
#ifdef CONFIG_X86_64
/*
@@ -303,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
#endif /* __ASSEMBLY__ */
-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- * callq do_rop
- * spec_trap:
- * pause
- * lfence
- * jmp spec_trap
- * do_rop:
- * mov %rcx,(%rsp) for x86_64
- * mov %edx,(%esp) for x86_32
- * retq
- *
- * Without retpolines configured:
- *
- * jmp *%rcx for x86_64
- * jmp *%edx for x86_32
- */
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
-# define RETPOLINE_RCX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
- EMIT1(0xC3); /* retq */ \
-} while (0)
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
- EMIT1(0xC3); /* ret */ \
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
-# define RETPOLINE_RCX_BPF_JIT() \
- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
-# endif
-#endif
-
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index da3a1ac82be5..cebec95a7124 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -52,11 +52,11 @@ void __init paravirt_set_cap(void);
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
#ifdef REALLY_SLOW_IO
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
#endif
}
@@ -113,12 +113,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
/*
* These special macros can be used to get or set a debugging register
*/
-static inline unsigned long paravirt_get_debugreg(int reg)
+static __always_inline unsigned long paravirt_get_debugreg(int reg)
{
return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
-static inline void set_debugreg(unsigned long val, int reg)
+static __always_inline void set_debugreg(unsigned long val, int reg)
{
PVOP_VCALL2(cpu.set_debugreg, reg, val);
}
@@ -133,14 +133,14 @@ static inline void write_cr0(unsigned long x)
PVOP_VCALL1(cpu.write_cr0, x);
}
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
"mov %%cr2, %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
{
PVOP_VCALL1(mmu.write_cr2, x);
}
@@ -653,10 +653,10 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
* functions.
*/
#define PV_THUNK_NAME(func) "__raw_callee_save_" #func
-#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \
extern typeof(func) __raw_callee_save_##func; \
\
- asm(".pushsection .text;" \
+ asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
PV_THUNK_NAME(func) ":" \
@@ -669,6 +669,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
+#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+ __PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
+
/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { __raw_callee_save_##func })
@@ -678,23 +681,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
((struct paravirt_callee_save) { func })
#ifdef CONFIG_PARAVIRT_XXL
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace void arch_local_irq_disable(void)
+static __always_inline void arch_local_irq_disable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace void arch_local_irq_enable(void)
+static __always_inline void arch_local_irq_enable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
{
unsigned long f;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index b94f615600d5..703663175a5a 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -181,7 +181,7 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
-static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 454b20815f35..4a7ff8b0db20 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -308,13 +308,13 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
return _hypercall1(int, platform_op, op);
}
-static inline int
+static __always_inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
-static inline unsigned long
+static __always_inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
@@ -358,7 +358,7 @@ HYPERVISOR_event_channel_op(int cmd, void *arg)
return _hypercall2(int, event_channel_op, cmd, arg);
}
-static inline int
+static __always_inline int
HYPERVISOR_xen_version(int cmd, void *arg)
{
return _hypercall2(int, xen_version, cmd, arg);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e9da3dc71254..23fb4d51a5da 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -29,6 +29,7 @@
#include <asm/io.h>
#include <asm/fixmap.h>
#include <asm/paravirt.h>
+#include <asm/asm-prototypes.h>
int __read_mostly alternatives_patched;
@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
}
}
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
{
struct insn insn;
int i = 0;
@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
* optimized.
*/
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
- i += optimize_nops_range(instr, a->instrlen, i);
+ i += optimize_nops_range(instr, len, i);
else
i += insn.length;
- if (i >= a->instrlen)
+ if (i >= len)
return;
}
}
@@ -331,10 +333,185 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz);
next:
- optimize_nops(a, instr);
+ optimize_nops(instr, a->instrlen);
}
}
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
+
+/*
+ * CALL/JMP *%\reg
+ */
+static int emit_indirect(int op, int reg, u8 *bytes)
+{
+ int i = 0;
+ u8 modrm;
+
+ switch (op) {
+ case CALL_INSN_OPCODE:
+ modrm = 0x10; /* Reg = 2; CALL r/m */
+ break;
+
+ case JMP32_INSN_OPCODE:
+ modrm = 0x20; /* Reg = 4; JMP r/m */
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+
+ modrm |= 0xc0; /* Mod = 3 */
+ modrm += reg;
+
+ bytes[i++] = 0xff; /* opcode */
+ bytes[i++] = modrm;
+
+ return i;
+}
+
+/*
+ * Rewrite the compiler generated retpoline thunk calls.
+ *
+ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
+ * indirect instructions, avoiding the extra indirection.
+ *
+ * For example, convert:
+ *
+ * CALL __x86_indirect_thunk_\reg
+ *
+ * into:
+ *
+ * CALL *%\reg
+ *
+ * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ */
+static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+{
+ retpoline_thunk_t *target;
+ int reg, ret, i = 0;
+ u8 op, cc;
+
+ target = addr + insn->length + insn->immediate.value;
+ reg = target - __x86_indirect_thunk_array;
+
+ if (WARN_ON_ONCE(reg & ~0xf))
+ return -1;
+
+ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
+ BUG_ON(reg == 4);
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+ return -1;
+
+ op = insn->opcode.bytes[0];
+
+ /*
+ * Convert:
+ *
+ * Jcc.d32 __x86_indirect_thunk_\reg
+ *
+ * into:
+ *
+ * Jncc.d8 1f
+ * [ LFENCE ]
+ * JMP *%\reg
+ * [ NOP ]
+ * 1:
+ */
+ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
+ cc = insn->opcode.bytes[1] & 0xf;
+ cc ^= 1; /* invert condition */
+
+ bytes[i++] = 0x70 + cc; /* Jcc.d8 */
+ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
+
+ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
+ op = JMP32_INSN_OPCODE;
+ }
+
+ /*
+ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ bytes[i++] = 0x0f;
+ bytes[i++] = 0xae;
+ bytes[i++] = 0xe8; /* LFENCE */
+ }
+
+ ret = emit_indirect(op, reg, bytes + i);
+ if (ret < 0)
+ return ret;
+ i += ret;
+
+ for (; i < insn->length;)
+ bytes[i++] = BYTES_NOP1;
+
+ return i;
+}
+
+/*
+ * Generated by 'objtool --retpoline'.
+ */
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+ u8 op1, op2;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op1 = insn.opcode.bytes[0];
+ op2 = insn.opcode.bytes[1];
+
+ switch (op1) {
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ break;
+
+ case 0x0f: /* escape */
+ if (op2 >= 0x80 && op2 <= 0x8f)
+ break;
+ fallthrough;
+ default:
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
+ addr, addr, insn.length,
+ addr + insn.length + insn.immediate.value);
+
+ len = patch_retpoline(addr, &insn, bytes);
+ if (len == insn.length) {
+ optimize_nops(bytes, len);
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
+}
+
+#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+
+#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
@@ -643,6 +820,12 @@ void __init alternative_instructions(void)
apply_paravirt(__parainstructions, __parainstructions_end);
/*
+ * Rewrite the retpolines, must be done before alternatives since
+ * those can rewrite the retpoline thunks.
+ */
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+
+ /*
* Then patch alternatives, such that those paravirt calls that are in
* alternatives can be overwritten by their immediate fragments.
*/
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ecfca3bbcd96..ba43597f1027 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -882,13 +882,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
- if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
- boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
- return SPECTRE_V2_CMD_AUTO;
- }
-
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 8ef35063964b..760e1f293093 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -7,9 +7,11 @@
/*
* unsigned long native_save_fl(void)
*/
+.pushsection .noinstr.text, "ax"
SYM_FUNC_START(native_save_fl)
pushf
pop %_ASM_AX
ret
SYM_FUNC_END(native_save_fl)
+.popsection
EXPORT_SYMBOL(native_save_fl)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 5e9a34b5bd74..169fb6f4cd2e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
- *para = NULL, *orc = NULL, *orc_ip = NULL;
+ *para = NULL, *orc = NULL, *orc_ip = NULL,
+ *retpolines = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
orc_ip = s;
+ if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
+ retpolines = s;
}
+ if (retpolines) {
+ void *rseg = (void *)retpolines->sh_addr;
+ apply_retpolines(rseg, rseg + retpolines->sh_size);
+ }
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 04cafc057bed..ebc45360ffd4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -218,6 +218,36 @@ void paravirt_end_context_switch(struct task_struct *next)
if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
arch_enter_lazy_mmu_mode();
}
+
+static noinstr unsigned long pv_native_read_cr2(void)
+{
+ return native_read_cr2();
+}
+
+static noinstr void pv_native_write_cr2(unsigned long val)
+{
+ native_write_cr2(val);
+}
+
+static noinstr unsigned long pv_native_get_debugreg(int regno)
+{
+ return native_get_debugreg(regno);
+}
+
+static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
+{
+ native_set_debugreg(regno, val);
+}
+
+static noinstr void pv_native_irq_enable(void)
+{
+ native_irq_enable();
+}
+
+static noinstr void pv_native_irq_disable(void)
+{
+ native_irq_disable();
+}
#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
@@ -244,8 +274,8 @@ struct paravirt_patch_template pv_ops = {
#ifdef CONFIG_PARAVIRT_XXL
.cpu.cpuid = native_cpuid,
- .cpu.get_debugreg = native_get_debugreg,
- .cpu.set_debugreg = native_set_debugreg,
+ .cpu.get_debugreg = pv_native_get_debugreg,
+ .cpu.set_debugreg = pv_native_set_debugreg,
.cpu.read_cr0 = native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
@@ -281,8 +311,8 @@ struct paravirt_patch_template pv_ops = {
/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
- .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
- .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
+ .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
+ .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
.irq.safe_halt = native_safe_halt,
.irq.halt = native_halt,
#endif /* CONFIG_PARAVIRT_XXL */
@@ -298,8 +328,8 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
- .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
- .mmu.write_cr2 = native_write_cr2,
+ .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
+ .mmu.write_cr2 = pv_native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
@@ -371,9 +401,6 @@ struct paravirt_patch_template pv_ops = {
};
#ifdef CONFIG_PARAVIRT_XXL
-/* At this point, native_get/set_debugreg has real function entries */
-NOKPROBE_SYMBOL(native_get_debugreg);
-NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
void (*paravirt_iret)(void) = native_iret;
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index bf1033a62e48..ff1e82ff52d9 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/