From d83d42d071b6c58e71e54b8778ca5b279be98f7d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Oct 2021 14:57:41 -0600 Subject: module: fix validate_section_offset() overflow bug on 64-bit validate_section_offset() uses unsigned long local variable to add/store shdr->sh_offset and shdr->sh_size on all platforms. unsigned long is too short when sh_offset is Elf64_Off which would be the case on 64bit ELF headers. Without this fix applied we were shorting the design of modules to have section headers placed within the 32-bit boundary (4 GiB) instead of 64-bits when on 64-bit architectures (which allows for up to 16,777,216 TiB). In practice this just meant we were limiting modules sections to below 4 GiB even on 64-bit systems. This then should not really affect any real-world use case as modules these days obviously should likely never exceed 1 GiB in size overall. A specially crafted invalid module might succeed to skip validation in validate_section_offset() due to this mistake, but in such case no impact is observed through code inspection given the correct data types are used for the copy of the module when needed on move_module() when the section type is not SHT_NOBITS (which indicates no the section occupies no space on the file). Fix the overflow problem using the right size local variable when CONFIG_64BIT is defined. Signed-off-by: Shuah Khan [mcgrof: expand commit log with possible impact if not applied] Signed-off-by: Luis Chamberlain --- kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 5c26a76e800b..c7ad73807446 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2942,7 +2942,11 @@ static int module_sig_check(struct load_info *info, int flags) static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) { +#if defined(CONFIG_64BIT) + unsigned long long secend; +#else unsigned long secend; +#endif /* * Check for both overflow and offset/size being -- cgit v1.2.3 From 7fd982f394c42f25a73fe9dfbf1e6b11fa26b40a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Oct 2021 14:57:40 -0600 Subject: module: change to print useful messages from elf_validity_check() elf_validity_check() checks ELF headers for errors and ELF Spec. compliance and if any of them fail it returns -ENOEXEC from all of these error paths. Almost all of them don't print any messages. When elf_validity_check() returns an error, load_module() prints an error message without error code. It is hard to determine why the module ELF structure is invalid, even if load_module() prints the error code which is -ENOEXEC in all of these cases. Change to print useful error messages from elf_validity_check() to clearly say what went wrong and why the ELF validity checks failed. Remove the load_module() error message which is no longer needed. This patch includes changes to fix build warns on 32-bit platforms: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Off' {aka 'unsigned int'} Reported-by: kernel test robot Signed-off-by: Shuah Khan Signed-off-by: Luis Chamberlain --- kernel/module.c | 75 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 21 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c7ad73807446..84a9141a5e15 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2971,14 +2971,29 @@ static int elf_validity_check(struct load_info *info) Elf_Shdr *shdr, *strhdr; int err; - if (info->len < sizeof(*(info->hdr))) - return -ENOEXEC; + if (info->len < sizeof(*(info->hdr))) { + pr_err("Invalid ELF header len %lu\n", info->len); + goto no_exec; + } - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 - || info->hdr->e_type != ET_REL - || !elf_check_arch(info->hdr) - || info->hdr->e_shentsize != sizeof(Elf_Shdr)) - return -ENOEXEC; + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { + pr_err("Invalid ELF header magic: != %s\n", ELFMAG); + goto no_exec; + } + if (info->hdr->e_type != ET_REL) { + pr_err("Invalid ELF header type: %u != %u\n", + info->hdr->e_type, ET_REL); + goto no_exec; + } + if (!elf_check_arch(info->hdr)) { + pr_err("Invalid architecture in ELF header: %u\n", + info->hdr->e_machine); + goto no_exec; + } + if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { + pr_err("Invalid ELF section header size\n"); + goto no_exec; + } /* * e_shnum is 16 bits, and sizeof(Elf_Shdr) is @@ -2987,8 +3002,10 @@ static int elf_validity_check(struct load_info *info) */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > - info->len - info->hdr->e_shoff)) - return -ENOEXEC; + info->len - info->hdr->e_shoff)) { + pr_err("Invalid ELF section header overflow\n"); + goto no_exec; + } info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; @@ -2996,13 +3013,19 @@ static int elf_validity_check(struct load_info *info) * Verify if the section name table index is valid. */ if (info->hdr->e_shstrndx == SHN_UNDEF - || info->hdr->e_shstrndx >= info->hdr->e_shnum) - return -ENOEXEC; + || info->hdr->e_shstrndx >= info->hdr->e_shnum) { + pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", + info->hdr->e_shstrndx, info->hdr->e_shstrndx, + info->hdr->e_shnum); + goto no_exec; + } strhdr = &info->sechdrs[info->hdr->e_shstrndx]; err = validate_section_offset(info, strhdr); - if (err < 0) + if (err < 0) { + pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); return err; + } /* * The section name table must be NUL-terminated, as required @@ -3010,8 +3033,10 @@ static int elf_validity_check(struct load_info *info) * strings in the section safe. */ info->secstrings = (void *)info->hdr + strhdr->sh_offset; - if (info->secstrings[strhdr->sh_size - 1] != '\0') - return -ENOEXEC; + if (info->secstrings[strhdr->sh_size - 1] != '\0') { + pr_err("ELF Spec violation: section name table isn't null terminated\n"); + goto no_exec; + } /* * The code assumes that section 0 has a length of zero and @@ -3019,8 +3044,11 @@ static int elf_validity_check(struct load_info *info) */ if (info->sechdrs[0].sh_type != SHT_NULL || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) - return -ENOEXEC; + || info->sechdrs[0].sh_addr != 0) { + pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", + info->sechdrs[0].sh_type); + goto no_exec; + } for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; @@ -3030,8 +3058,12 @@ static int elf_validity_check(struct load_info *info) continue; case SHT_SYMTAB: if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) - return -ENOEXEC; + || shdr->sh_link >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + shdr->sh_link, shdr->sh_link, + info->hdr->e_shnum); + goto no_exec; + } fallthrough; default: err = validate_section_offset(info, shdr); @@ -3053,6 +3085,9 @@ static int elf_validity_check(struct load_info *info) } return 0; + +no_exec: + return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) @@ -3944,10 +3979,8 @@ static int load_module(struct load_info *info, const char __user *uargs, * sections. */ err = elf_validity_check(info); - if (err) { - pr_err("Module has invalid ELF structures\n"); + if (err) goto free_copy; - } /* * Everything checks out, so set up the section info -- cgit v1.2.3 From ca3574bd653aba234a4b31955f2778947403be16 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Dec 2021 11:00:19 -0600 Subject: exit: Rename module_put_and_exit to module_put_and_kthread_exit Update module_put_and_exit to call kthread_exit instead of do_exit. Change the name to reflect this change in functionality. All of the users of module_put_and_exit are causing the current kthread to exit so this change makes it clear what is happening. There is no functional change. Signed-off-by: "Eric W. Biederman" --- kernel/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 84a9141a5e15..a3aa00bf270d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -337,12 +337,12 @@ static inline void add_taint_module(struct module *mod, unsigned flag, * A thread that wants to hold a reference to a module only while it * is running can call this to safely exit. nfsd and lockd use this. */ -void __noreturn __module_put_and_exit(struct module *mod, long code) +void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) { module_put(mod); - do_exit(code); + kthread_exit(code); } -EXPORT_SYMBOL(__module_put_and_exit); +EXPORT_SYMBOL(__module_put_and_kthread_exit); /* Find a module section: 0 means not found. */ static unsigned int find_sec(const struct load_info *info, const char *name) -- cgit v1.2.3 From f5bdb34bf0c9314548f2d8e2360b703ff3610303 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 29 Dec 2021 13:56:47 -0800 Subject: livepatch: Avoid CPU hogging with cond_resched When initializing a 'struct klp_object' in klp_init_object_loaded(), and performing relocations in klp_resolve_symbols(), klp_find_object_symbol() is invoked to look up the address of a symbol in an already-loaded module (or vmlinux). This, in turn, calls kallsyms_on_each_symbol() or module_kallsyms_on_each_symbol() to find the address of the symbol that is being patched. It turns out that symbol lookups often take up the most CPU time when enabling and disabling a patch, and may hog the CPU and cause other tasks on that CPU's runqueue to starve -- even in paths where interrupts are enabled. For example, under certain workloads, enabling a KLP patch with many objects or functions may cause ksoftirqd to be starved, and thus for interrupts to be backlogged and delayed. This may end up causing TCP retransmits on the host where the KLP patch is being applied, and in general, may cause any interrupts serviced by softirqd to be delayed while the patch is being applied. So as to ensure that kallsyms_on_each_symbol() does not end up hogging the CPU, this patch adds a call to cond_resched() in kallsyms_on_each_symbol() and module_kallsyms_on_each_symbol(), which are invoked when doing a symbol lookup in vmlinux and a module respectively. Without this patch, if a live-patch is applied on a 36-core Intel host with heavy TCP traffic, a ~10x spike is observed in TCP retransmits while the patch is being applied. Additionally, collecting sched events with perf indicates that ksoftirqd is awakened ~1.3 seconds before it's eventually scheduled. With the patch, no increase in TCP retransmit events is observed, and ksoftirqd is scheduled shortly after it's awakened. Signed-off-by: David Vernet Acked-by: Miroslav Benes Acked-by: Song Liu Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20211229215646.830451-1-void@manifault.com --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 40ec9a030eec..c96160f7f3f5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4462,6 +4462,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, mod, kallsyms_symbol_value(sym)); if (ret != 0) goto out; + + cond_resched(); } } out: -- cgit v1.2.3 From 9dc3c3f691bca10d3aa94887eee33bf629840b23 Mon Sep 17 00:00:00 2001 From: Yu Chen Date: Mon, 22 Nov 2021 06:26:48 -0800 Subject: module: Remove outdated comment Since commit e513cc1c07e2 ("module: Remove stop_machine from module unloading") this comment is no longer correct. Remove it. Signed-off-by: Yu Chen Signed-off-by: Luis Chamberlain --- kernel/module.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 84a9141a5e15..320ec908045f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -958,7 +958,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, } } - /* Stop the machine so refcounts can't move and disable module. */ ret = try_stop_module(mod, flags, &forced); if (ret != 0) goto out; -- cgit v1.2.3 From b1ae6dc41eaaa98bb75671e0f3665bfda248c3e7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 5 Jan 2022 13:55:12 -0800 Subject: module: add in-kernel support for decompressing Current scheme of having userspace decompress kernel modules before loading them into the kernel runs afoul of LoadPin security policy, as it loses link between the source of kernel module on the disk and binary blob that is being loaded into the kernel. To solve this issue let's implement decompression in kernel, so that we can pass a file descriptor of compressed module file into finit_module() which will keep LoadPin happy. To let userspace know what compression/decompression scheme kernel supports it will create /sys/module/compression attribute. kmod can read this attribute and decide if it can pass compressed file to finit_module(). New MODULE_INIT_COMPRESSED_DATA flag indicates that the kernel should attempt to decompress the data read from file descriptor prior to trying load the module. To simplify things kernel will only implement single decompression method matching compression method selected when generating modules. This patch implements gzip and xz; more can be added later, Signed-off-by: Dmitry Torokhov Signed-off-by: Luis Chamberlain --- kernel/module.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 320ec908045f..34fe2824eb56 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3173,9 +3173,12 @@ out: return err; } -static void free_copy(struct load_info *info) +static void free_copy(struct load_info *info, int flags) { - vfree(info->hdr); + if (flags & MODULE_INIT_COMPRESSED_FILE) + module_decompress_cleanup(info); + else + vfree(info->hdr); } static int rewrite_section_headers(struct load_info *info, int flags) @@ -4124,7 +4127,7 @@ static int load_module(struct load_info *info, const char __user *uargs, } /* Get rid of temporary copy. */ - free_copy(info); + free_copy(info, flags); /* Done! */ trace_module_load(mod); @@ -4173,7 +4176,7 @@ static int load_module(struct load_info *info, const char __user *uargs, module_deallocate(mod, info); free_copy: - free_copy(info); + free_copy(info, flags); return err; } @@ -4200,7 +4203,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { struct load_info info = { }; - void *hdr = NULL; + void *buf = NULL; + int len; int err; err = may_init_module(); @@ -4210,15 +4214,24 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS - |MODULE_INIT_IGNORE_VERMAGIC)) + |MODULE_INIT_IGNORE_VERMAGIC + |MODULE_INIT_COMPRESSED_FILE)) return -EINVAL; - err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL, + len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL, READING_MODULE); - if (err < 0) - return err; - info.hdr = hdr; - info.len = err; + if (len < 0) + return len; + + if (flags & MODULE_INIT_COMPRESSED_FILE) { + err = module_decompress(&info, buf, len); + vfree(buf); /* compressed data is no longer needed */ + if (err) + return err; + } else { + info.hdr = buf; + info.len = len; + } return load_module(&info, uargs, flags); } -- cgit v1.2.3 From a97ac8cb24a3c3ad74794adb83717ef1605d1b47 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 13 Jan 2022 16:51:52 -0800 Subject: module: fix signature check failures when using in-kernel decompression The new flag MODULE_INIT_COMPRESSED_FILE unintentionally trips check in module_sig_check(). The check was supposed to catch case when version info or magic was removed from a signed module, making signature invalid, but it was coded too broadly and was catching this new flag as well. Change the check to only test the 2 particular flags affecting signature validity. Fixes: b1ae6dc41eaa ("module: add in-kernel support for decompressing") Signed-off-by: Dmitry Torokhov Reviewed-by: Douglas Anderson Signed-off-by: Luis Chamberlain --- kernel/module.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 34fe2824eb56..387ee77bdbd6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2883,12 +2883,13 @@ static int module_sig_check(struct load_info *info, int flags) const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const char *reason; const void *mod = info->hdr; - + bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | + MODULE_INIT_IGNORE_VERMAGIC); /* - * Require flags == 0, as a module with version information - * removed is no longer the module that was signed + * Do not allow mangled modules as a module with version information + * removed is no longer the module that was signed. */ - if (flags == 0 && + if (!mangled_module && info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ -- cgit v1.2.3 From 67d6212afda218d564890d1674bab28e8612170f Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jan 2022 15:39:53 -0800 Subject: Revert "module, async: async_synchronize_full() on module init iff async is used" This reverts commit 774a1221e862b343388347bac9b318767336b20b. We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule(). For example, PCI driver probing is invoked from a worker thread based on a node where device is attached: if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish. The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async) modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full(); Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix. Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed. Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested. Signed-off-by: Igor Pylypiv Reviewed-by: Changyuan Lyu Reviewed-by: Luis Chamberlain Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- kernel/module.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 24dab046e16c..46a5c2ed1928 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + -- cgit v1.2.3