diff options
62 files changed, 906 insertions, 584 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 93404c802d29..b34946d90e4b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -60,9 +60,9 @@ config GENERIC_ENTRY config KPROBES bool "Kprobes" - depends on MODULES depends on HAVE_KPROBES select KALLSYMS + select EXECMEM select NEED_TASKS_RCU help Kprobes allows you to trap at almost any kernel address and @@ -977,6 +977,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC For architectures like powerpc/32 which have constraints on module allocation and need to allocate module data outside of module area. +config ARCH_WANTS_EXECMEM_LATE + bool + help + For architectures that do not allocate executable memory early on + boot, but rather require its initialization late when there is + enough entropy for module space randomization, for instance + arm64. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e74d84f58b77..677f218f7e84 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -12,48 +12,14 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/elf.h> -#include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/string.h> -#include <linux/gfp.h> #include <asm/sections.h> #include <asm/smp_plat.h> #include <asm/unwind.h> #include <asm/opcodes.h> -#ifdef CONFIG_XIP_KERNEL -/* - * The XIP kernel text is mapped in the module area for modules and - * some other stuff to work without any indirect relocations. - * MODULES_VADDR is redefined here and not in asm/memory.h to avoid - * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. - */ -#undef MODULES_VADDR -#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK) -#endif - -#ifdef CONFIG_MMU -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) - return p; - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - bool module_init_section(const char *name) { return strstarts(name, ".init") || diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index e8c6f4be0ce1..5345d218899a 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -22,6 +22,7 @@ #include <linux/sizes.h> #include <linux/stop_machine.h> #include <linux/swiotlb.h> +#include <linux/execmem.h> #include <asm/cp15.h> #include <asm/mach-types.h> @@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long end) free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif + +#ifdef CONFIG_EXECMEM + +#ifdef CONFIG_XIP_KERNEL +/* + * The XIP kernel text is mapped in the module area for modules and + * some other stuff to work without any indirect relocations. + * MODULES_VADDR is redefined here and not in asm/memory.h to avoid + * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. + */ +#undef MODULES_VADDR +#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK) +#endif + +#ifdef CONFIG_MMU +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long fallback_start = 0, fallback_end = 0; + + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) { + fallback_start = VMALLOC_START; + fallback_end = VMALLOC_END; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_EXEC, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_MMU */ + +#endif /* CONFIG_EXECMEM */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a04059c31aba..2df0818c3ca9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,6 +105,7 @@ config ARM64 select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_EXECMEM_LATE if EXECMEM select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 47e0be610bb6..36b25af56324 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -12,144 +12,18 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> -#include <linux/gfp.h> #include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/random.h> #include <linux/scs.h> -#include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> -static u64 module_direct_base __ro_after_init = 0; -static u64 module_plt_base __ro_after_init = 0; - -/* - * Choose a random page-aligned base address for a window of 'size' bytes which - * entirely contains the interval [start, end - 1]. - */ -static u64 __init random_bounding_box(u64 size, u64 start, u64 end) -{ - u64 max_pgoff, pgoff; - - if ((end - start) >= size) - return 0; - - max_pgoff = (size - (end - start)) / PAGE_SIZE; - pgoff = get_random_u32_inclusive(0, max_pgoff); - - return start - pgoff * PAGE_SIZE; -} - -/* - * Modules may directly reference data and text anywhere within the kernel - * image and other modules. References using PREL32 relocations have a +/-2G - * range, and so we need to ensure that the entire kernel image and all modules - * fall within a 2G window such that these are always within range. - * - * Modules may directly branch to functions and code within the kernel text, - * and to functions and code within other modules. These branches will use - * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure - * that the entire kernel text and all module text falls within a 128M window - * such that these are always within range. With PLTs, we can expand this to a - * 2G window. - * - * We chose the 128M region to surround the entire kernel image (rather than - * just the text) as using the same bounds for the 128M and 2G regions ensures - * by construction that we never select a 128M region that is not a subset of - * the 2G region. For very large and unusual kernel configurations this means - * we may fall back to PLTs where they could have been avoided, but this keeps - * the logic significantly simpler. - */ -static int __init module_init_limits(void) -{ - u64 kernel_end = (u64)_end; - u64 kernel_start = (u64)_text; - u64 kernel_size = kernel_end - kernel_start; - - /* - * The default modules region is placed immediately below the kernel - * image, and is large enough to use the full 2G relocation range. - */ - BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); - BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); - - if (!kaslr_enabled()) { - if (kernel_size < SZ_128M) - module_direct_base = kernel_end - SZ_128M; - if (kernel_size < SZ_2G) - module_plt_base = kernel_end - SZ_2G; - } else { - u64 min = kernel_start; - u64 max = kernel_end; - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); - } else { - module_direct_base = random_bounding_box(SZ_128M, min, max); - if (module_direct_base) { - min = module_direct_base; - max = module_direct_base + SZ_128M; - } - } - - module_plt_base = random_bounding_box(SZ_2G, min, max); - } - - pr_info("%llu pages in range for non-PLT usage", - module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); - pr_info("%llu pages in range for PLT usage", - module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); - - return 0; -} -subsys_initcall(module_init_limits); - -void *module_alloc(unsigned long size) -{ - void *p = NULL; - - /* - * Where possible, prefer to allocate within direct branch range of the - * kernel such that no PLTs are necessary. - */ - if (module_direct_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_direct_base, - module_direct_base + SZ_128M, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p && module_plt_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_plt_base, - module_plt_base + SZ_2G, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p) { - pr_warn_ratelimited("%s: unable to allocate memory\n", - __func__); - } - - if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { - vfree(p); - return NULL; - } - - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(p); -} - enum aarch64_reloc_op { RELOC_OP_NONE, RELOC_OP_ABS, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 327855a11df2..4268678d0e86 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 03efd86dce0a..9b5ab6818f7f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,6 +32,7 @@ #include <linux/hugetlb.h> #include <linux/acpi_iort.h> #include <linux/kmemleak.h> +#include <linux/execmem.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -432,3 +433,142 @@ void dump_mem_limit(void) pr_emerg("Memory Limit: none\n"); } } + +#ifdef CONFIG_EXECMEM +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; + +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) +{ + u64 max_pgoff, pgoff; + + if ((end - start) >= size) + return 0; + + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); + + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; + } else { + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); + } + + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); + + return 0; +} + +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start = 0, end = 0; + + module_init_limits(); + + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. + */ + if (module_direct_base) { + start = module_direct_base; + end = module_direct_base + SZ_128M; + + if (module_plt_base) { + fallback_start = module_plt_base; + fallback_end = module_plt_base + SZ_2G; + } + } else if (module_plt_base) { + start = module_plt_base; + end = module_plt_base + SZ_2G; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = PAGE_KERNEL, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_ROX, + .alignment = 1, + }, + [EXECMEM_BPF] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d88711467bb..720336d28856 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1897,17 +1897,6 @@ u64 bpf_jit_alloc_exec_limit(void) return VMALLOC_END - VMALLOC_START; } -void *bpf_jit_alloc_exec(unsigned long size) -{ - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(vmalloc(size)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} - /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index c7d0338d12c1..36d6d9eeb7c7 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -490,12 +490,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); -} - static void module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 4dd53427f657..bf789d114c2d 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -24,6 +24,7 @@ #include <linux/gfp.h> #include <linux/hugetlb.h> #include <linux/mmzone.h> +#include <linux/execmem.h> #include <asm/asm-offsets.h> #include <asm/bootinfo.h> @@ -248,3 +249,23 @@ EXPORT_SYMBOL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; EXPORT_SYMBOL(invalid_pte_table); + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 20ca48c1b606..c0109aff223b 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -147,8 +147,8 @@ #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \ VMALLOC_START != CKSSEG /* Load modules into 32bit-compatible segment. */ -#define MODULE_START CKSSEG -#define MODULE_END (FIXADDR_START-2*PAGE_SIZE) +#define MODULES_VADDR CKSSEG +#define MODULES_END (FIXADDR_START-2*PAGE_SIZE) #endif #define pte_ERROR(e) \ diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 7b2fbaa9cac5..ba0f62d8eff5 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -13,7 +13,6 @@ #include <linux/elf.h> #include <linux/mm.h> #include <linux/numa.h> -#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/string.h> @@ -31,15 +30,6 @@ struct mips_hi16 { static LIST_HEAD(dbe_list); static DEFINE_SPINLOCK(dbe_lock); -#ifdef MODULE_START -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - static void apply_r_mips_32(u32 *location, u32 base, Elf_Addr v) { *location = base + v; diff --git a/arch/mips/mm/fault.c b/arch/mip |
