diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:03:00 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:03:00 -0800 |
| commit | 94a855111ed9106971ca2617c5d075269e6aefde (patch) | |
| tree | 330c762a403cf70c2cdbf12e7394e5e7c2a69a79 /arch | |
| parent | 93761c93e9da28d8a020777cee2a84133082b477 (diff) | |
| parent | f1a033cc6b9eb6d80322008422df3c87aa5d47a0 (diff) | |
| download | linux-94a855111ed9106971ca2617c5d075269e6aefde.tar.gz linux-94a855111ed9106971ca2617c5d075269e6aefde.tar.bz2 linux-94a855111ed9106971ca2617c5d075269e6aefde.zip | |
Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov:
- Add the call depth tracking mitigation for Retbleed which has been
long in the making. It is a lighterweight software-only fix for
Skylake-based cores where enabling IBRS is a big hammer and causes a
significant performance impact.
What it basically does is, it aligns all kernel functions to 16 bytes
boundary and adds a 16-byte padding before the function, objtool
collects all functions' locations and when the mitigation gets
applied, it patches a call accounting thunk which is used to track
the call depth of the stack at any time.
When that call depth reaches a magical, microarchitecture-specific
value for the Return Stack Buffer, the code stuffs that RSB and
avoids its underflow which could otherwise lead to the Intel variant
of Retbleed.
This software-only solution brings a lot of the lost performance
back, as benchmarks suggest:
https://lore.kernel.org/all/20220915111039.092790446@infradead.org/
That page above also contains a lot more detailed explanation of the
whole mechanism
- Implement a new control flow integrity scheme called FineIBT which is
based on the software kCFI implementation and uses hardware IBT
support where present to annotate and track indirect branches using a
hash to validate them
- Other misc fixes and cleanups
* tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits)
x86/paravirt: Use common macro for creating simple asm paravirt functions
x86/paravirt: Remove clobber bitmask from .parainstructions
x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al
x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit
x86/Kconfig: Enable kernel IBT by default
x86,pm: Force out-of-line memcpy()
objtool: Fix weak hole vs prefix symbol
objtool: Optimize elf_dirty_reloc_sym()
x86/cfi: Add boot time hash randomization
x86/cfi: Boot time selection of CFI scheme
x86/ibt: Implement FineIBT
objtool: Add --cfi to generate the .cfi_sites section
x86: Add prefix symbols for function padding
objtool: Add option to generate prefix symbols
objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
objtool: Slice up elf_create_section_symbol()
kallsyms: Revert "Take callthunks into account"
x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces
x86/retpoline: Fix crash printing warning
x86/paravirt: Fix a !PARAVIRT build warning
...
Diffstat (limited to 'arch')
79 files changed, 1902 insertions, 399 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 2d0e7099eb3f..a3c47c2a79cd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config FUNCTION_ALIGNMENT_4B + bool + +config FUNCTION_ALIGNMENT_8B + bool + +config FUNCTION_ALIGNMENT_16B + bool + +config FUNCTION_ALIGNMENT_32B + bool + +config FUNCTION_ALIGNMENT_64B + bool + +config FUNCTION_ALIGNMENT + int + default 64 if FUNCTION_ALIGNMENT_64B + default 32 if FUNCTION_ALIGNMENT_32B + default 16 if FUNCTION_ALIGNMENT_16B + default 8 if FUNCTION_ALIGNMENT_8B + default 4 if FUNCTION_ALIGNMENT_4B + default 0 + endmenu diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c6e06cdc738f..d7e4a24e8644 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -63,6 +63,7 @@ config IA64 select NUMA if !FLATMEM select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select ZONE_DMA32 + select FUNCTION_ALIGNMENT_32B default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 56c4bb276b6e..d553ab7022fe 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls + -frename-registers -fno-optimize-sibling-calls KBUILD_CFLAGS_KERNEL := -mconstant-gp GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8adf8e89b255..786b44dc20c9 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end) { } +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f8c03b347f9..61b4c7bcd4d9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -292,6 +292,8 @@ config X86 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 + select FUNCTION_ALIGNMENT_4B imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE @@ -1855,7 +1857,7 @@ config CC_HAS_IBT config X86_KERNEL_IBT prompt "Indirect Branch Tracking" - bool + def_bool y depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f depends on !LD_IS_LLD || LLD_VERSION >= 140000 @@ -2492,6 +2494,46 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) +config CC_HAS_ENTRY_PADDING + def_bool $(cc-option,-fpatchable-function-entry=16,16) + +config FUNCTION_PADDING_CFI + int + default 59 if FUNCTION_ALIGNMENT_64B + default 27 if FUNCTION_ALIGNMENT_32B + default 11 if FUNCTION_ALIGNMENT_16B + default 3 if FUNCTION_ALIGNMENT_8B + default 0 + +# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG +# except Kconfig can't do arithmetic :/ +config FUNCTION_PADDING_BYTES + int + default FUNCTION_PADDING_CFI if CFI_CLANG + default FUNCTION_ALIGNMENT + +config CALL_PADDING + def_bool n + depends on CC_HAS_ENTRY_PADDING && OBJTOOL + select FUNCTION_ALIGNMENT_16B + +config FINEIBT + def_bool y + depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE + select CALL_PADDING + +config HAVE_CALL_THUNKS + def_bool y + depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL + +config CALL_THUNKS + def_bool n + select CALL_PADDING + +config PREFIX_SYMBOLS + def_bool y + depends on CALL_PADDING && !CFI_CLANG + menuconfig SPECULATION_MITIGATIONS bool "Mitigations for speculative execution vulnerabilities" default y @@ -2543,6 +2585,37 @@ config CPU_UNRET_ENTRY help Compile the kernel with support for the retbleed=unret mitigation. +config CALL_DEPTH_TRACKING + bool "Mitigate RSB underflow with call depth tracking" + depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + select CALL_THUNKS + default y + help + Compile the kernel with call depth tracking to mitigate the Intel + SKL Return-Speculation-Buffer (RSB) underflow issue. The + mitigation is off by default and needs to be enabled on the + kernel command line via the retbleed=stuff option. For + non-affected systems the overhead of this option is marginal as + the call depth tracking is using run-time generated call thunks + in a compiler generated padding area and call patching. This + increases text size by ~5%. For non affected systems this space + is unused. On affected SKL systems this results in a significant + performance gain over the IBRS mitigation. + +config CALL_THUNKS_DEBUG + bool "Enable call thunks and call depth tracking debugging" + depends on CALL_DEPTH_TRACKING + select FUNCTION_ALIGNMENT_32B + default n + help + Enable call/ret counters for imbalance detection and build in + a noisy dmesg about callthunks generation and call patching for + trouble shooting. The debug prints need to be enabled on the + kernel command line with 'debug-callthunks'. + Only enable this, when you are debugging call thunks as this + creates a noticable runtime overhead. If unsure say N. + config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" depends on CPU_SUP_AMD && X86_64 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 415a5d138de4..a3a07df8a609 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -208,6 +208,12 @@ ifdef CONFIG_SLS KBUILD_CFLAGS += -mharden-sls=all endif +ifdef CONFIG_CALL_PADDING +PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) +KBUILD_CFLAGS += $(PADDING_CFLAGS) +export PADDING_CFLAGS +endif + KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d4c4281db635..a75712991df3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -38,6 +38,14 @@ #include "pgtable.h" /* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + +/* * Locally defined symbols should be marked hidden: */ .hidden _bss diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2e1658ddbe1a..4a30618281ec 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk16) /* input: * %rdi: ctx, CTX @@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0e4e9abbf4de..deaf62aa73a6 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -221,7 +221,6 @@ * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, @@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, @@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk32) /* input: * %rdi: ctx, CTX @@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk32) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk32) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b258af420c92..0326a01503c3 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -208,7 +208,6 @@ .text -.align 16 SYM_FUNC_START_LOCAL(__cast5_enc_blk16) /* input: * %rdi: ctx @@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) RET; SYM_FUNC_END(__cast5_enc_blk16) -.align 16 SYM_FUNC_START_LOCAL(__cast5_dec_blk16) /* input: * %rdi: ctx diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 721474abfb71..5286db5b8165 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -94,7 +94,6 @@ # # Assumes len >= 16. # -.align 16 SYM_FUNC_START(crc_t10dif_pcl) movdqa .Lbswap_mask(%rip), BSWAP_MASK diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 2077ce7a5647..b9abcd79c1f4 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -108,7 +108,6 @@ if (!$kernel) { sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { - $code .= ".align $align\n"; $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 82f2313f512b..97e283621851 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) RET; SYM_FUNC_END(__serpent_enc_blk8_avx) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 8ea34c9b9316..6d60c50593a9 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk16) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) RET; SYM_FUNC_END(__serpent_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 3cae5a1bb3d6..cade913d4882 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/ |
