summaryrefslogtreecommitdiff
path: root/kernel/module
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/module')
-rw-r--r--kernel/module/Kconfig100
-rw-r--r--kernel/module/Makefile6
-rw-r--r--kernel/module/decompress.c6
-rw-r--r--kernel/module/dups.c246
-rw-r--r--kernel/module/internal.h140
-rw-r--r--kernel/module/kallsyms.c78
-rw-r--r--kernel/module/kdb.c17
-rw-r--r--kernel/module/kmod.c180
-rw-r--r--kernel/module/main.c1091
-rw-r--r--kernel/module/procfs.c16
-rw-r--r--kernel/module/stats.c430
-rw-r--r--kernel/module/strict_rwx.c99
-rw-r--r--kernel/module/tracking.c7
-rw-r--r--kernel/module/tree_lookup.c39
14 files changed, 1786 insertions, 669 deletions
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 424b3bc58f3f..33a2e991f608 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -22,6 +22,104 @@ menuconfig MODULES
if MODULES
+config MODULE_DEBUGFS
+ bool
+
+config MODULE_DEBUG
+ bool "Module debugging"
+ depends on DEBUG_FS
+ help
+ Allows you to enable / disable features which can help you debug
+ modules. You don't need these options on production systems.
+
+if MODULE_DEBUG
+
+config MODULE_STATS
+ bool "Module statistics"
+ depends on DEBUG_FS
+ select MODULE_DEBUGFS
+ help
+ This option allows you to maintain a record of module statistics.
+ For example, size of all modules, average size, text size, a list
+ of failed modules and the size for each of those. For failed
+ modules we keep track of modules which failed due to either the
+ existing module taking too long to load or that module was already
+ loaded.
+
+ You should enable this if you are debugging production loads
+ and want to see if userspace or the kernel is doing stupid things
+ with loading modules when it shouldn't or if you want to help
+ optimize userspace / kernel space module autoloading schemes.
+ You might want to do this because failed modules tend to use
+ up significant amount of memory, and so you'd be doing everyone a
+ favor in avoiding these failures proactively.
+
+ This functionality is also useful for those experimenting with
+ module .text ELF section optimization.
+
+ If unsure, say N.
+
+config MODULE_DEBUG_AUTOLOAD_DUPS
+ bool "Debug duplicate modules with auto-loading"
+ help
+ Module autoloading allows in-kernel code to request modules through
+ the *request_module*() API calls. This in turn just calls userspace
+ modprobe. Although modprobe checks to see if a module is already
+ loaded before trying to load a module there is a small time window in
+ which multiple duplicate requests can end up in userspace and multiple
+ modprobe calls race calling finit_module() around the same time for
+ duplicate modules. The finit_module() system call can consume in the
+ worst case more than twice the respective module size in virtual
+ memory for each duplicate module requests. Although duplicate module
+ requests are non-fatal virtual memory is a limited resource and each
+ duplicate module request ends up just unnecessarily straining virtual
+ memory.
+
+ This debugging facility will create pr_warn() splats for duplicate
+ module requests to help identify if module auto-loading may be the
+ culprit to your early boot virtual memory pressure. Since virtual
+ memory abuse caused by duplicate module requests could render a
+ system unusable this functionality will also converge races in
+ requests for the same module to a single request. You can boot with
+ the module.enable_dups_trace=1 kernel parameter to use WARN_ON()
+ instead of the pr_warn().
+
+ If the first module request used request_module_nowait() we cannot
+ use that as the anchor to wait for duplicate module requests, since
+ users of request_module() do want a proper return value. If a call
+ for the same module happened earlier with request_module() though,
+ then a duplicate request_module_nowait() would be detected. The
+ non-wait request_module() call is synchronous and waits until modprobe
+ completes. Subsequent auto-loading requests for the same module do
+ not trigger a new finit_module() calls and do not strain virtual
+ memory, and so as soon as modprobe successfully completes we remove
+ tracking for duplicates for that module.
+
+ Enable this functionality to try to debug virtual memory abuse during
+ boot on systems which are failing to boot or if you suspect you may be
+ straining virtual memory during boot, and you want to identify if the
+ abuse was due to module auto-loading. These issues are currently only
+ known to occur on systems with many CPUs (over 400) and is likely the
+ result of udev issuing duplicate module requests for each CPU, and so
+ module auto-loading is not the culprit. There may very well still be
+ many duplicate module auto-loading requests which could be optimized
+ for and this debugging facility can be used to help identify them.
+
+ Only enable this for debugging system functionality, never have it
+ enabled on real systems.
+
+config MODULE_DEBUG_AUTOLOAD_DUPS_TRACE
+ bool "Force full stack trace when duplicates are found"
+ depends on MODULE_DEBUG_AUTOLOAD_DUPS
+ help
+ Enabling this will force a full stack trace for duplicate module
+ auto-loading requests using WARN_ON() instead of pr_warn(). You
+ should keep this disabled at all times unless you are a developer
+ and are doing a manual inspection and want to debug exactly why
+ these duplicates occur.
+
+endif # MODULE_DEBUG
+
config MODULE_FORCE_LOAD
bool "Forced module loading"
default n
@@ -51,7 +149,7 @@ config MODULE_FORCE_UNLOAD
config MODULE_UNLOAD_TAINT_TRACKING
bool "Tainted module unload tracking"
depends on MODULE_UNLOAD
- default n
+ select MODULE_DEBUGFS
help
This option allows you to maintain a record of each unloaded
module that tainted the kernel. In addition to displaying a
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 948efea81e85..a10b2b9a6fdf 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -7,7 +7,10 @@
# and produce insane amounts of uninteresting coverage.
KCOV_INSTRUMENT_module.o := n
-obj-y += main.o strict_rwx.o
+obj-y += main.o
+obj-y += strict_rwx.o
+obj-y += kmod.o
+obj-$(CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS) += dups.o
obj-$(CONFIG_MODULE_DECOMPRESS) += decompress.o
obj-$(CONFIG_MODULE_SIG) += signing.o
obj-$(CONFIG_LIVEPATCH) += livepatch.o
@@ -19,3 +22,4 @@ obj-$(CONFIG_SYSFS) += sysfs.o
obj-$(CONFIG_KGDB_KDB) += kdb.o
obj-$(CONFIG_MODVERSIONS) += version.o
obj-$(CONFIG_MODULE_UNLOAD_TAINT_TRACKING) += tracking.o
+obj-$(CONFIG_MODULE_STATS) += stats.o
diff --git a/kernel/module/decompress.c b/kernel/module/decompress.c
index bb79ac1a6d8f..e97232b125eb 100644
--- a/kernel/module/decompress.c
+++ b/kernel/module/decompress.c
@@ -267,7 +267,7 @@ static ssize_t module_zstd_decompress(struct load_info *info,
zstd_dec.size = PAGE_SIZE;
ret = zstd_decompress_stream(dstream, &zstd_dec, &zstd_buf);
- kunmap(page);
+ kunmap_local(zstd_dec.dst);
retval = zstd_get_error_code(ret);
if (retval)
break;
@@ -297,6 +297,10 @@ int module_decompress(struct load_info *info, const void *buf, size_t size)
ssize_t data_size;
int error;
+#if defined(CONFIG_MODULE_STATS)
+ info->compressed_len = size;
+#endif
+
/*
* Start with number of pages twice as big as needed for
* compressed data.
diff --git a/kernel/module/dups.c b/kernel/module/dups.c
new file mode 100644
index 000000000000..aa8e1361fdb5
--- /dev/null
+++ b/kernel/module/dups.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * kmod dups - the kernel module autoloader duplicate suppressor
+ *
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
+ */
+
+#define pr_fmt(fmt) "module: " fmt
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+#include <linux/rwsem.h>
+#include <linux/ptrace.h>
+#include <linux/async.h>
+#include <linux/uaccess.h>
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "module."
+static bool enable_dups_trace = IS_ENABLED(CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS_TRACE);
+module_param(enable_dups_trace, bool_enable_only, 0644);
+
+/*
+ * Protects dup_kmod_reqs list, adds / removals with RCU.
+ */
+static DEFINE_MUTEX(kmod_dup_mutex);
+static LIST_HEAD(dup_kmod_reqs);
+
+struct kmod_dup_req {
+ struct list_head list;
+ char name[MODULE_NAME_LEN];
+ struct completion first_req_done;
+ struct work_struct complete_work;
+ struct delayed_work delete_work;
+ int dup_ret;
+};
+
+static struct kmod_dup_req *kmod_dup_request_lookup(char *module_name)
+{
+ struct kmod_dup_req *kmod_req;
+
+ list_for_each_entry_rcu(kmod_req, &dup_kmod_reqs, list,
+ lockdep_is_held(&kmod_dup_mutex)) {
+ if (strlen(kmod_req->name) == strlen(module_name) &&
+ !memcmp(kmod_req->name, module_name, strlen(module_name))) {
+ return kmod_req;
+ }
+ }
+
+ return NULL;
+}
+
+static void kmod_dup_request_delete(struct work_struct *work)
+{
+ struct kmod_dup_req *kmod_req;
+ kmod_req = container_of(to_delayed_work(work), struct kmod_dup_req, delete_work);
+
+ /*
+ * The typical situation is a module successully loaded. In that
+ * situation the module will be present already in userspace. If
+ * new requests come in after that, userspace will already know the
+ * module is loaded so will just return 0 right away. There is still
+ * a small chance right after we delete this entry new request_module()
+ * calls may happen after that, they can happen. These heuristics
+ * are to protect finit_module() abuse for auto-loading, if modules
+ * are still tryign to auto-load even if a module is already loaded,
+ * that's on them, and those inneficiencies should not be fixed by
+ * kmod. The inneficies there are a call to modprobe and modprobe
+ * just returning 0.
+ */
+ mutex_lock(&kmod_dup_mutex);
+ list_del_rcu(&kmod_req->list);
+ synchronize_rcu();
+ mutex_unlock(&kmod_dup_mutex);
+ kfree(kmod_req);
+}
+
+static void kmod_dup_request_complete(struct work_struct *work)
+{
+ struct kmod_dup_req *kmod_req;
+
+ kmod_req = container_of(work, struct kmod_dup_req, complete_work);
+
+ /*
+ * This will ensure that the kernel will let all the waiters get
+ * informed its time to check the return value. It's time to
+ * go home.
+ */
+ complete_all(&kmod_req->first_req_done);
+
+ /*
+ * Now that we have allowed prior request_module() calls to go on
+ * with life, let's schedule deleting this entry. We don't have
+ * to do it right away, but we *eventually* want to do it so to not
+ * let this linger forever as this is just a boot optimization for
+ * possible abuses of vmalloc() incurred by finit_module() thrashing.
+ */
+ queue_delayed_work(system_wq, &kmod_req->delete_work, 60 * HZ);
+}
+
+bool kmod_dup_request_exists_wait(char *module_name, bool wait, int *dup_ret)
+{
+ struct kmod_dup_req *kmod_req, *new_kmod_req;
+ int ret;
+
+ /*
+ * Pre-allocate the entry in case we have to use it later
+ * to avoid contention with the mutex.
+ */
+ new_kmod_req = kzalloc(sizeof(*new_kmod_req), GFP_KERNEL);
+ if (!new_kmod_req)
+ return false;
+
+ memcpy(new_kmod_req->name, module_name, strlen(module_name));
+ INIT_WORK(&new_kmod_req->complete_work, kmod_dup_request_complete);
+ INIT_DELAYED_WORK(&new_kmod_req->delete_work, kmod_dup_request_delete);
+ init_completion(&new_kmod_req->first_req_done);
+
+ mutex_lock(&kmod_dup_mutex);
+
+ kmod_req = kmod_dup_request_lookup(module_name);
+ if (!kmod_req) {
+ /*
+ * If the first request that came through for a module
+ * was with request_module_nowait() we cannot wait for it
+ * and share its return value with other users which may
+ * have used request_module() and need a proper return value
+ * so just skip using them as an anchor.
+ *
+ * If a prior request to this one came through with
+ * request_module() though, then a request_module_nowait()
+ * would benefit from duplicate detection.
+ */
+ if (!wait) {
+ kfree(new_kmod_req);
+ pr_debug("New request_module_nowait() for %s -- cannot track duplicates for this request\n", module_name);
+ mutex_unlock(&kmod_dup_mutex);
+ return false;
+ }
+
+ /*
+ * There was no duplicate, just add the request so we can
+ * keep tab on duplicates later.
+ */
+ pr_debug("New request_module() for %s\n", module_name);
+ list_add_rcu(&new_kmod_req->list, &dup_kmod_reqs);
+ mutex_unlock(&kmod_dup_mutex);
+ return false;
+ }
+ mutex_unlock(&kmod_dup_mutex);
+
+ /* We are dealing with a duplicate request now */
+ kfree(new_kmod_req);
+
+ /*
+ * To fix these try to use try_then_request_module() instead as that
+ * will check if the component you are looking for is present or not.
+ * You could also just queue a single request to load the module once,
+ * instead of having each and everything you need try to request for
+ * the module.
+ *
+ * Duplicate request_module() calls can cause quite a bit of wasted
+ * vmalloc() space when racing with userspace.
+ */
+ if (enable_dups_trace)
+ WARN(1, "module-autoload: duplicate request for module %s\n", module_name);
+ else
+ pr_warn("module-autoload: duplicate request for module %s\n", module_name);
+
+ if (!wait) {
+ /*
+ * If request_module_nowait() was used then the user just
+ * wanted to issue the request and if another module request
+ * was already its way with the same name we don't care for
+ * the return value either. Let duplicate request_module_nowait()
+ * calls bail out right away.
+ */
+ *dup_ret = 0;
+ return true;
+ }
+
+ /*
+ * If a duplicate request_module() was used they *may* care for
+ * the return value, so we have no other option but to wait for
+ * the first caller to complete. If the first caller used
+ * the request_module_nowait() call, subsquent callers will
+ * deal with the comprmise of getting a successful call with this
+ * optimization enabled ...
+ */
+ ret = wait_for_completion_state(&kmod_req->first_req_done,
+ TASK_UNINTERRUPTIBLE | TASK_KILLABLE);
+ if (ret) {
+ *dup_ret = ret;
+ return true;
+ }
+
+ /* Now the duplicate request has the same exact return value as the first request */
+ *dup_ret = kmod_req->dup_ret;
+
+ return true;
+}
+
+void kmod_dup_request_announce(char *module_name, int ret)
+{
+ struct kmod_dup_req *kmod_req;
+
+ mutex_lock(&kmod_dup_mutex);
+
+ kmod_req = kmod_dup_request_lookup(module_name);
+ if (!kmod_req)
+ goto out;
+
+ kmod_req->dup_ret = ret;
+
+ /*
+ * If we complete() here we may allow duplicate threads
+ * to continue before the first one that submitted the
+ * request. We're in no rush also, given that each and
+ * every bounce back to userspace is slow we avoid that
+ * with a slight delay here. So queueue up the completion
+ * and let duplicates suffer, just wait a tad bit longer.
+ * There is no rush. But we also don't want to hold the
+ * caller up forever or introduce any boot delays.
+ */
+ queue_work(system_wq, &kmod_req->complete_work);
+
+out:
+ mutex_unlock(&kmod_dup_mutex);
+}
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 1c877561a7d2..dc7b0160c480 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -3,6 +3,7 @@
*
* Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
*/
#include <linux/elf.h>
@@ -17,27 +18,19 @@
#define ARCH_SHF_SMALL 0
#endif
-/* If this is set, the section belongs in the init part of the module */
-#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG - 1))
-/* Maximum number of characters written by module_flags() */
-#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
-
-#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
-#define data_layout core_layout
-#endif
-
/*
- * Modules' sections will be aligned on page boundaries
- * to ensure complete separation of code and data, but
- * only when CONFIG_STRICT_MODULE_RWX=y
+ * Use highest 4 bits of sh_entsize to store the mod_mem_type of this
+ * section. This leaves 28 bits for offset on 32-bit systems, which is
+ * about 256 MiB (WARN_ON_ONCE if we exceed that).
*/
-static inline unsigned int strict_align(unsigned int size)
-{
- if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
- return PAGE_ALIGN(size);
- else
- return size;
-}
+
+#define SH_ENTSIZE_TYPE_BITS 4
+#define SH_ENTSIZE_TYPE_SHIFT (BITS_PER_LONG - SH_ENTSIZE_TYPE_BITS)
+#define SH_ENTSIZE_TYPE_MASK ((1UL << SH_ENTSIZE_TYPE_BITS) - 1)
+#define SH_ENTSIZE_OFFSET_MASK ((1UL << (BITS_PER_LONG - SH_ENTSIZE_TYPE_BITS)) - 1)
+
+/* Maximum number of characters written by module_flags() */
+#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
extern struct mutex module_mutex;
extern struct list_head modules;
@@ -53,7 +46,6 @@ extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const s32 __start___kcrctab[];
extern const s32 __start___kcrctab_gpl[];
-#include <linux/dynamic_debug.h>
struct load_info {
const char *name;
/* pointer to module in temporary copy, freed at end of load_module() */
@@ -63,12 +55,14 @@ struct load_info {
Elf_Shdr *sechdrs;
char *secstrings, *strtab;
unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs;
- struct _ddebug_info dyndbg;
bool sig_ok;
#ifdef CONFIG_KALLSYMS
unsigned long mod_kallsyms_init_off;
#endif
#ifdef CONFIG_MODULE_DECOMPRESS
+#ifdef CONFIG_MODULE_STATS
+ unsigned long compressed_len;
+#endif
struct page **pages;
unsigned int max_pages;
unsigned int used_pages;
@@ -101,11 +95,16 @@ int try_to_force_load(struct module *mod, const char *reason);
bool find_symbol(struct find_symbol_arg *fsa);
struct module *find_module_all(const char *name, size_t len, bool even_unformed);
int cmp_name(const void *name, const void *sym);
-long module_get_offset(struct module *mod, unsigned int *size, Elf_Shdr *sechdr,
- unsigned int section);
+long module_get_offset_and_type(struct module *mod, enum mod_mem_type type,
+ Elf_Shdr *sechdr, unsigned int section);
char *module_flags(struct module *mod, char *buf, bool show_state);
size_t module_flags_taint(unsigned long taints, char *buf);
+char *module_next_tag_pair(char *string, unsigned long *secsize);
+
+#define for_each_modinfo_entry(entry, info, name) \
+ for (entry = get_modinfo(info, name); entry; entry = get_next_modinfo(info, name, entry))
+
static inline void module_assert_mutex_or_preempt(void)
{
#ifdef CONFIG_LOCKDEP
@@ -148,6 +147,95 @@ static inline bool set_livepatch_module(struct module *mod)
#endif
}
+/**
+ * enum fail_dup_mod_reason - state at which a duplicate module was detected
+ *
+ * @FAIL_DUP_MOD_BECOMING: the module is read properly, passes all checks but
+ * we've determined that another module with the same name is already loaded
+ * or being processed on our &modules list. This happens on early_mod_check()
+ * right before layout_and_allocate(). The kernel would have already
+ * vmalloc()'d space for the entire module through finit_module(). If
+ * decompression was used two vmap() spaces were used. These failures can
+ * happen when userspace has not seen the module present on the kernel and
+ * tries to load the module multiple times at same time.
+ * @FAIL_DUP_MOD_LOAD: the module has been read properly, passes all validation
+ * checks and the kernel determines that the module was unique and because
+ * of this allocated yet another private kernel copy of the module space in
+ * layout_and_allocate() but after this determined in add_unformed_module()
+ * that another module with the same name is already loaded or being processed.
+ * These failures should be mitigated as much as possible and are indicative
+ * of really fast races in loading modules. Without module decompression
+ * they waste twice as much vmap space. With module decompression three
+ * times the module's size vmap space is wasted.
+ */
+enum fail_dup_mod_reason {
+ FAIL_DUP_MOD_BECOMING = 0,
+ FAIL_DUP_MOD_LOAD,
+};
+
+#ifdef CONFIG_MODULE_DEBUGFS
+extern struct dentry *mod_debugfs_root;
+#endif
+
+#ifdef CONFIG_MODULE_STATS
+
+#define mod_stat_add_long(count, var) atomic_long_add(count, var)
+#define mod_stat_inc(name) atomic_inc(name)
+
+extern atomic_long_t total_mod_size;
+extern atomic_long_t total_text_size;
+extern atomic_long_t invalid_kread_bytes;
+extern atomic_long_t invalid_decompress_bytes;
+
+extern atomic_t modcount;
+extern atomic_t failed_kreads;
+extern atomic_t failed_decompress;
+struct mod_fail_load {
+ struct list_head list;
+ char name[MODULE_NAME_LEN];
+ atomic_long_t count;
+ unsigned long dup_fail_mask;
+};
+
+int try_add_failed_module(const char *name, enum fail_dup_mod_reason reason);
+void mod_stat_bump_invalid(struct load_info *info, int flags);
+void mod_stat_bump_becoming(struct load_info *info, int flags);
+
+#else
+
+#define mod_stat_add_long(name, var)
+#define mod_stat_inc(name)
+
+static inline int try_add_failed_module(const char *name,
+ enum fail_dup_mod_reason reason)
+{
+ return 0;
+}
+
+static inline void mod_stat_bump_invalid(struct load_info *info, int flags)
+{
+}
+
+static inline void mod_stat_bump_becoming(struct load_info *info, int flags)
+{
+}
+
+#endif /* CONFIG_MODULE_STATS */
+
+#ifdef CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS
+bool kmod_dup_request_exists_wait(char *module_name, bool wait, int *dup_ret);
+void kmod_dup_request_announce(char *module_name, int ret);
+#else
+static inline bool kmod_dup_request_exists_wait(char *module_name, bool wait, int *dup_ret)
+{
+ return false;
+}
+
+static inline void kmod_dup_request_announce(char *module_name, int ret)
+{
+}
+#endif
+
#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
struct mod_unload_taint {
struct list_head list;
@@ -190,10 +278,13 @@ struct mod_tree_root {
#endif
unsigned long addr_min;
unsigned long addr_max;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ unsigned long data_addr_min;
+ unsigned long data_addr_max;
+#endif
};
extern struct mod_tree_root mod_tree;
-extern struct mod_tree_root mod_data_tree;
#ifdef CONFIG_MODULES_TREE_LOOKUP
void mod_tree_insert(struct module *mod);
@@ -224,7 +315,6 @@ void module_enable_nx(const struct module *mod);
void module_enable_x(const struct module *mod);
int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod);
-bool module_check_misalignment(const struct module *mod);
#ifdef CONFIG_MODULE_SIG
int module_sig_check(struct load_info *info, int flags);
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index bdc911dbcde5..c550d7d45f2f 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
+#include <linux/module_symbol.h>
#include <linux/kallsyms.h>
#include <linux/buildid.h>
#include <linux/bsearch.h>
@@ -78,6 +79,7 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
unsigned int shnum, unsigned int pcpundx)
{
const Elf_Shdr *sec;
+ enum mod_mem_type type;
if (src->st_shndx == SHN_UNDEF ||
src->st_shndx >= shnum ||
@@ -90,11 +92,12 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
#endif
sec = sechdrs + src->st_shndx;
+ type = sec->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT;
if (!(sec->sh_flags & SHF_ALLOC)
#ifndef CONFIG_KALLSYMS_ALL
|| !(sec->sh_flags & SHF_EXECINSTR)
#endif
- || (sec->sh_entsize & INIT_OFFSET_MASK))
+ || mod_mem_type_is_init(type))
return false;
return true;
@@ -113,11 +116,13 @@ void layout_symtab(struct module *mod, struct load_info *info)
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst, strtab_size = 0;
+ struct module_memory *mod_mem_data = &mod->mem[MOD_DATA];
+ struct module_memory *mod_mem_init_data = &mod->mem[MOD_INIT_DATA];
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
- symsect->sh_entsize = module_get_offset(mod, &mod->init_layout.size, symsect,
- info->index.sym) | INIT_OFFSET_MASK;
+ symsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA,
+ symsect, info->index.sym);
pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
@@ -134,28 +139,27 @@ void layout_symtab(struct module *mod, struct load_info *info)
}
/* Append room for core symbols at end of core part. */
- info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
- info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
- mod->data_layout.size += strtab_size;
+ info->symoffs = ALIGN(mod_mem_data->size, symsect->sh_addralign ?: 1);
+ info->stroffs = mod_mem_data->size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod_mem_data->size += strtab_size;
/* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */
- info->core_typeoffs = mod->data_layout.size;
- mod->data_layout.size += ndst * sizeof(char);
- mod->data_layout.size = strict_align(mod->data_layout.size);
+ info->core_typeoffs = mod_mem_data->size;
+ mod_mem_data->size += ndst * sizeof(char);
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
- strsect->sh_entsize = module_get_offset(mod, &mod->init_layout.size, strsect,
- info->index.str) | INIT_OFFSET_MASK;
+ strsect->sh_entsize = module_get_offset_and_type(mod, MOD_INIT_DATA,
+ strsect, info->index.str);
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
/* We'll tack temporary mod_kallsyms on the end. */
- mod->init_layout.size = ALIGN(mod->init_layout.size,
- __alignof__(struct mod_kallsyms));
- info->mod_kallsyms_init_off = mod->init_layout.size;
- mod->init_layout.size += sizeof(struct mod_kallsyms);
- info->init_typeoffs = mod->init_layout.size;
- mod->init_layout.size += nsrc * sizeof(char);
- mod->init_layout.size = strict_align(mod->init_layout.size);
+ mod_mem_init_data->size = ALIGN(mod_mem_init_data->size,
+ __alignof__(struct mod_kallsyms));
+ info->mod_kallsyms_init_off = mod_mem_init_data->size;
+
+ mod_mem_init_data->size += sizeof(struct mod_kallsyms);
+ info->init_typeoffs = mod_mem_init_data->size;
+ mod_mem_init_data->size += nsrc * sizeof(char);
}
/*
@@ -171,9 +175,11 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
char *s;
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
unsigned long strtab_size;
+ void *data_base = mod->mem[MOD_DATA].base;
+ void *init_data_base = mod->mem[MOD_INIT_DATA].base;
/* Set up to point into init section. */
- mod->kallsyms = (void __rcu *)mod->init_layout.base +
+ mod->kallsyms = (void __rcu *)init_data_base +
info->mod_kallsyms_init_off;
rcu_read_lock();
@@ -183,15 +189,15 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
/* Make sure we get permanent strtab: don't use info->strtab. */
rcu_dereference(mod->kallsyms)->strtab =
(void *)info->sechdrs[info->index.str].sh_addr;
- rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
+ rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs;
/*
* Now populate the cut down core kallsyms for after init
* and set types up while we still have access to sections.
*/
- mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
- mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
- mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
+ mod->core_kallsyms.symtab = dst = data_base + info->symoffs;
+ mod->core_kallsyms.strtab = s = data_base + info->stroffs;
+ mod->core_kallsyms.typetab = data_base + info->core_typeoffs;
strtab_size = info->core_typeoffs - info->stroffs;
src = rcu_dereference(mod->kallsyms)->symtab;
for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) {
@@ -238,18 +244,6 @@ void init_build_id(struct module *mod, const struct load_info *info)
}
#endif
-/*
- * This ignores the intensely annoying "mapping symbols" found
- * in ARM ELF files: $a, $t and $d.
- */
-static inline int is_arm_mapping_symbol(const char *str)
-{
- if (str[0] == '.' && str[1] == 'L')
- return true;
- return str[0] == '$' && strchr("axtd", str[1]) &&
- (str[2] == '\0' || str[2] == '.');
-}
-
static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, unsigned int symnum)
{
return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
@@ -267,12 +261,15 @@ static const char *find_kallsyms_symbol(struct module *mod,
unsigned int i, best = 0;
unsigned long nextval, bestval;
struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
+ struct module_memory *mod_mem;
/* At worse, next value is at end of module */
if (within_module_init(addr, mod))
- nextval = (unsigned long)mod->init_layout.base + mod->init_layout.text_size;
+ mod_mem = &mod->mem[MOD_INIT_TEXT];
else
- nextval = (unsigned long)mod->core_layout.base + mod->core_layout.text_size;
+ mod_mem = &mod->mem[MOD_TEXT];
+
+ nextval = (unsigned long)mod_mem->base + mod_mem->size;
bestval = kallsyms_symbol_value(&kallsyms->symtab[best]);
@@ -292,7 +289,7 @@ static const char *find_kallsyms_symbol(struct module *mod,
* and inserted at a whim.
*/
if (*kallsyms_symbol_name(kallsyms, i) == '\0' ||
- is_arm_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
+ is_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
continue;
if (thisval <= addr && thisval > bestval) {
@@ -505,8 +502,7 @@ unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name)
}
int module_kallsyms_on_each_symbol(const char *modname,
- int (*fn)(void *, const char *,
- struct module *, unsigned long),
+ int (*fn)(void *, const char *, unsigned long),
void *data)
{
struct module *mod;
@@ -535,7 +531,7 @@ int module_kallsyms_on_each_symbol(const char *modname,
continue;
ret = fn(data, kallsyms_symbol_name(kallsyms, i),
- mod, kallsyms_symbol_value(sym));
+ kallsyms_symbol_value(sym));
if (ret != 0)
goto out;
}
diff --git a/kernel/module/kdb.c b/kernel/module/kdb.c
index f4317f92e189..995c32d3698f 100644
--- a/kernel/module/kdb.c
+++ b/kernel/module/kdb.c
@@ -26,10 +26,11 @@ int kdb_lsmod(int argc, const char **argv)
if (mod->state == MODULE_STATE_UNFORMED)
continue;
- kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
-#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
- kdb_printf("/%8u", mod->data_layout.size);
-#endif
+ kdb_printf("%-20s%8u", mod->name, mod->mem[MOD_TEXT].size);
+ kdb_printf("/%8u", mod->mem[MOD_RODATA].size);
+ kdb_printf("/%8u", mod->mem[MOD_RO_AFTER_INIT].size);
+ kdb_printf("/%8u", mod->mem[MOD_DATA].size);
+
kdb_printf(" 0x%px ", (void *)mod);
#ifdef CONFIG_MODULE_UNLOAD
kdb_printf("%4d ", module_refcount(mod));
@@ -40,10 +41,10 @@ int kdb_lsmod(int argc, const char **argv)
kdb_printf(" (Loading)");
else
kdb_printf(" (Live)");
- kdb_printf(" 0x%px", mod->core_layout.base);
-#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
- kdb_printf("/0x%px", mod->data_layout.base);
-#endif
+ kdb_printf(" 0x%px", mod->mem[MOD_TEXT].base);
+ kdb_printf("/0x%px", mod->mem[MOD_RODATA].base);
+ kdb_printf("/0x%px", mod->mem[MOD_RO_AFTER_INIT].base);
+ kdb_printf("/0x%px", mod->mem[MOD_DATA].base);
#ifdef CONFIG_MODULE_UNLOAD
{
diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c
new file mode 100644
index 000000000000..0800d9891692
--- /dev/null
+++ b/kernel/module/kmod.c
@@ -0,0 +1,180 @@
+/*
+ * kmod - the kernel module loader
+ *
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+#include <linux/rwsem.h>
+#include <linux/ptrace.h>
+#include <linux/async.h>
+#include <linux/uaccess.h>
+
+#include <trace/events/module.h>
+#include "internal.h"
+
+/*