diff options
34 files changed, 311 insertions, 303 deletions
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index cb30ca3df27c..a748e7eb4429 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -146,9 +146,9 @@ System kernel config options CONFIG_SYSFS=y Note that "sysfs file system support" might not appear in the "Pseudo - filesystems" menu if "Configure standard kernel features (for small - systems)" is not enabled in "General Setup." In this case, check the - .config file itself to ensure that sysfs is turned on, as follows:: + filesystems" menu if "Configure standard kernel features (expert users)" + is not enabled in "General Setup." In this case, check the .config file + itself to ensure that sysfs is turned on, as follows:: grep 'CONFIG_SYSFS' .config @@ -533,6 +533,10 @@ the following command:: cp /proc/vmcore <dump-file> +or use scp to write out the dump file between hosts on a network, e.g:: + + scp /proc/vmcore remote_username@remote_ip:<dump-file> + You can also use makedumpfile utility to write out the dump file with specified options to filter out unwanted contents, e.g:: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9927564db88e..b7ccaa2ea867 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3792,6 +3792,11 @@ bit 3: print locks info if CONFIG_LOCKDEP is on bit 4: print ftrace buffer bit 5: print all printk messages in buffer + bit 6: print all CPUs backtrace (if available in the arch) + *Be aware* that this option may print a _lot_ of lines, + so there are risks of losing older messages in the log. + Use this option carefully, maybe worth to setup a + bigger log buffer with "log_buf_len" along with this. panic_on_taint= Bitmask for conditionally calling panic() in add_taint() Format: <hex>[,nousertaint] diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 803c60bf21d3..1144ea3229a3 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -763,6 +763,8 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer +bit 5 print all printk messages in buffer +bit 6 print all CPUs backtrace (if available in the arch) ===== ============================================ So for example to print tasks and memory info on panic, user can:: diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst index 02102be7ff49..dc791c8d84d1 100644 --- a/Documentation/dev-tools/sparse.rst +++ b/Documentation/dev-tools/sparse.rst @@ -100,3 +100,5 @@ have already built it. The optional make variable CF can be used to pass arguments to sparse. The build system passes -Wbitwise to sparse automatically. + +Note that sparse defines the __CHECKER__ preprocessor symbol. diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9e26ec80d317..8ac25f19084e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -90,7 +90,6 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; #endif -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -104,6 +103,9 @@ static void __init reserve_crashkernel(void) unsigned long long crash_max = arm64_dma_phys_limit; int ret; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); /* no crashkernel= or invalid value specified */ @@ -136,11 +138,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#else -static void __init reserve_crashkernel(void) -{ -} -#endif /* CONFIG_KEXEC_CORE */ /* * Return the maximum physical address for a zone accessible by the given bits diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0d588032d6e6..f6275b317129 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -957,7 +957,6 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -974,6 +973,8 @@ static void __init reserve_crashkernel(void) int ret = 0; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; /* * Don't reserve a region for a crash kernel on a crash kernel * since it doesn't make much sense and we have limited memory @@ -1023,7 +1024,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#endif /* CONFIG_KEXEC_CORE */ void __init paging_init(void) { @@ -1037,9 +1037,7 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); -#ifdef CONFIG_KEXEC_CORE reserve_crashkernel(); -#endif memblock_dump_all(); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d7e1788c91..c95b9ac5a457 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -411,8 +411,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) * --------- Crashkernel reservation ------------------------------ */ -#ifdef CONFIG_KEXEC_CORE - /* 16M alignment for crash kernel regions */ #define CRASH_ALIGN SZ_16M @@ -490,6 +488,9 @@ static void __init reserve_crashkernel(void) bool high = false; int ret; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + total_mem = memblock_phys_mem_size(); /* crashkernel=XM */ @@ -555,11 +556,6 @@ static void __init reserve_crashkernel(void) crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); } -#else -static void __init reserve_crashkernel(void) -{ -} -#endif static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, diff --git a/fs/fat/dir.c b/fs/fat/dir.c index c4a274285858..249825017da7 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -722,7 +722,7 @@ static int func(struct dir_context *ctx, const char *name, int name_len, \ if (name_len >= sizeof(d1->d_name)) \ name_len = sizeof(d1->d_name) - 1; \ \ - if (put_user(0, d2->d_name) || \ + if (put_user(0, &d2->d_name[0]) || \ put_user(0, &d2->d_reclen) || \ copy_to_user(d1->d_name, name, name_len) || \ put_user(0, d1->d_name + name_len) || \ diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 700228c7f38b..f1a6610e4ee6 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -448,7 +448,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { diff --git a/fs/pipe.c b/fs/pipe.c index 2667db9506e2..9648ac15164a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -607,7 +607,7 @@ out: static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; - int count, head, tail, mask; + unsigned int count, head, tail, mask; switch (cmd) { case FIONREAD: @@ -804,7 +804,7 @@ struct pipe_inode_info *alloc_pipe_info(void) if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) goto out_revert_acct; - pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), + pipe->bufs = kvcalloc(pipe_bufs, sizeof(struct pipe_buffer), GFP_KERNEL_ACCOUNT); if (pipe->bufs) { @@ -829,7 +829,7 @@ out_free_uid: void free_pipe_info(struct pipe_inode_info *pipe) { - int i; + unsigned int i; #ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) @@ -849,7 +849,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) #endif if (pipe->tmp_page) __free_page(pipe->tmp_page); - kfree(pipe->bufs); + kvfree(pipe->bufs); kfree(pipe); } @@ -1264,8 +1264,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) if (nr_slots < n) return -EBUSY; - bufs = kcalloc(nr_slots, sizeof(*bufs), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + bufs = kvcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT); if (unlikely(!bufs)) return -ENOMEM; @@ -1292,7 +1291,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) head = n; tail = 0; - kfree(pipe->bufs); + kvfree(pipe->bufs); pipe->bufs = bufs; pipe->ring_size = nr_slots; if (pipe->max_usage > nr_slots) diff --git a/fs/proc/base.c b/fs/proc/base.c index d654ce7150fd..76bf1aa3cfe8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1764,25 +1764,25 @@ out: static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - char *tmp = (char *)__get_free_page(GFP_KERNEL); + char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); char *pathname; int len; if (!tmp) return -ENOMEM; - pathname = d_path(path, tmp, PAGE_SIZE); + pathname = d_path(path, tmp, PATH_MAX); len = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - pathname; + len = tmp + PATH_MAX - 1 - pathname; if (len > buflen) len = buflen; if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: - free_page((unsigned long)tmp); + kfree(tmp); return len; } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 702754dd1daf..6f1b8ddc6f7a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -62,7 +62,8 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); /* Device Dump Size */ static size_t vmcoredd_orig_sz; -static DECLARE_RWSEM(vmcore_cb_rwsem); +static DEFINE_SPINLOCK(vmcore_cb_lock); +DEFINE_STATIC_SRCU(vmcore_cb_srcu); /* List of registered vmcore callbacks. */ static LIST_HEAD(vmcore_cb_list); /* Whether the vmcore has been opened once. */ @@ -70,8 +71,8 @@ static bool vmcore_opened; void register_vmcore_cb(struct vmcore_cb *cb) { - down_write(&vmcore_cb_rwsem); INIT_LIST_HEAD(&cb->next); + spin_lock(&vmcore_cb_lock); list_add_tail(&cb->next, &vmcore_cb_list); /* * Registering a vmcore callback after the vmcore was opened is @@ -79,14 +80,14 @@ void register_vmcore_cb(struct vmcore_cb *cb) */ if (vmcore_opened) pr_warn_once("Unexpected vmcore callback registration\n"); - up_write(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); } EXPORT_SYMBOL_GPL(register_vmcore_cb); void unregister_vmcore_cb(struct vmcore_cb *cb) { - down_write(&vmcore_cb_rwsem); - list_del(&cb->next); + spin_lock(&vmcore_cb_lock); + list_del_rcu(&cb->next); /* * Unregistering a vmcore callback after the vmcore was opened is * very unusual (e.g., forced driver removal), but we cannot stop @@ -94,7 +95,9 @@ void unregister_vmcore_cb(struct vmcore_cb *cb) */ if (vmcore_opened) pr_warn_once("Unexpected vmcore callback unregistration\n"); - up_write(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); + + synchronize_srcu(&vmcore_cb_srcu); } EXPORT_SYMBOL_GPL(unregister_vmcore_cb); @@ -103,9 +106,8 @@ static bool pfn_is_ram(unsigned long pfn) struct vmcore_cb *cb; bool ret = true; - lockdep_assert_held_read(&vmcore_cb_rwsem); - - list_for_each_entry(cb, &vmcore_cb_list, next) { + list_for_each_entry_srcu(cb, &vmcore_cb_list, next, + srcu_read_lock_held(&vmcore_cb_srcu)) { if (unlikely(!cb->pfn_is_ram)) continue; ret = cb->pfn_is_ram(cb, pfn); @@ -118,9 +120,9 @@ static bool pfn_is_ram(unsigned long pfn) static int open_vmcore(struct inode *inode, struct file *file) { - down_read(&vmcore_cb_rwsem); + spin_lock(&vmcore_cb_lock); vmcore_opened = true; - up_read(&vmcore_cb_rwsem); + spin_unlock(&vmcore_cb_lock); return 0; } @@ -133,6 +135,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, unsigned long pfn, offset; size_t nr_bytes; ssize_t read = 0, tmp; + int idx; if (!count) return 0; @@ -140,7 +143,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = (unsigned long)(*ppos % PAGE_SIZE); pfn = (unsigned long)(*ppos / PAGE_SIZE); - down_read(&vmcore_cb_rwsem); + idx = srcu_read_lock(&vmcore_cb_srcu); do { if (count > (PAGE_SIZE - offset)) nr_bytes = PAGE_SIZE - offset; @@ -165,7 +168,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset, userbuf); } if (tmp < 0) { - up_read(&vmcore_cb_rwsem); + srcu_read_unlock(&vmcore_cb_srcu, idx); return tmp; } @@ -176,8 +179,8 @@ ssize_t read_from_oldmem(char *buf, size_t count, ++pfn; offset = 0; } while (count); + srcu_read_unlock(&vmcore_cb_srcu, idx); - up_read(&vmcore_cb_rwsem); return read; } @@ -477,7 +480,7 @@ static const struct vm_operations_struct vmcore_mmap_ops = { /** * vmcore_alloc_buf - allocate buffer in vmalloc memory - * @sizez: size of buffer + * @size: size of buffer * * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap * the buffer to user-space by means of remap_vmalloc_range(). @@ -568,18 +571,18 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { - int ret; + int ret, idx; /* - * Check if oldmem_pfn_is_ram was registered to avoid - * looping over all pages without a reason. + * Check if a callback was registered to avoid looping over all + * pages without a reason. */ - down_read(&vmcore_cb_rwsem); + idx = srcu_read_lock(&vmcore_cb_srcu); if (!list_empty(&vmcore_cb_list)) ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot); else ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot); - up_read(&vmcore_cb_rwsem); + srcu_read_unlock(&vmcore_cb_srcu, idx); return ret; } diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6093fa6db260..c9be1657f03d 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -19,6 +19,9 @@ * * Example: * + * #include <linux/bitfield.h> + * #include <linux/bits.h> + * * #define REG_FIELD_A GENMASK(6, 0) * #define REG_FIELD_B BIT(7) * #define REG_FIELD_C GENMASK(15, 8) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1bc760ba400c..1c2c33ae1b37 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -11,6 +11,7 @@ # define BTF_TYPE_TAG(value) /* nothing */ #endif +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) @@ -144,8 +145,6 @@ struct ftrace_likely_data { */ #define __naked __attribute__((__naked__)) notrace -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - /* * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 diff --git a/include/linux/init.h b/include/linux/init.h index d82b4b2e1d25..baf0b29a7010 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -320,12 +320,19 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } +/* + * NOTE: __setup functions return values: + * @fn returns 1 (or non-zero) if the option argument is "handled" + * and returns 0 if the option argument is "not handled". + */ #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) /* - * NOTE: fn is as per module_param, not __setup! - * Emits warning if fn returns non-zero. + * NOTE: @fn is as per module_param, not __setup! + * I.e., @fn returns 0 for no error or non-zero for error + * (possibly @fn returns a -errno value, but it does not matter). + * Emits warning if @fn returns non-zero. */ #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729..58d1b58a971e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -20,6 +20,12 @@ #include <uapi/linux/kexec.h> +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; +extern note_buf_t __percpu *crash_notes; + #ifdef CONFIG_KEXEC_CORE #include <linux/list.h> #include <linux/compat.h> @@ -350,12 +356,6 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; -extern note_buf_t __percpu *crash_notes; - /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/linux/log2.h b/include/linux/log2.h index df0b155c2141..9f30d087a128 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -18,7 +18,7 @@ * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; @@ -26,7 +26,7 @@ int __ilog2_u32(u32 n) #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index ca507bd5f808..929d67710cc5 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -13,11 +13,7 @@ enum { }; #undef offsetof -#ifdef __compiler_offsetof -#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) -#else -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) /** * sizeof_field() - Report the size of a struct field in bytes diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index f6d2f83cbe29..c4dc597f3dcf 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -19,12 +19,12 @@ * any application/library that wants linux/types.h. */ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; diff --git a/init/main.c b/init/main.c index 0c064c2c79fd..98182c3c2c4b 100644 --- a/init/main.c +++ b/init/main.c @@ -1192,7 +1192,7 @@ static int __init initcall_blacklist(char *str) } } while (str_entry); - return 0; + return 1; } static bool __init_or_module initcall_blacklisted(initcall_t fn) @@ -1248,15 +1248,11 @@ trace_initcall_start_cb(void *data, initcall_t fn) static __init_or_module void trace_initcall_finish_cb(void *data, initcall_t fn, int ret) { - ktime_t *calltime = (ktime_t *)data; - ktime_t delta, rettime; - unsigned long long duration; + ktime_t rettime, *calltime = (ktime_t *)data; rettime = ktime_get(); - delta = ktime_sub(rettime, *calltime); - duration = (unsigned long long) ktime_to_ns(delta) >> 10; printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, duration); + fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } static ktime_t initcall_calltime; @@ -1454,7 +1450,9 @@ static noinline void __init kernel_init_freeable(void); bool rodata_enabled __ro_after_init = true; static int __init set_debug_rodata(char *str) { - return strtobool(str, &rodata_enabled); + if (strtobool(str, &rodata_enabled)) + pr_warn("Invalid option string for rodata: '%s'\n", str); + return 1; } __setup("rodata=", set_debug_rodata); #endif diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 29ea74f0eab3..24b5c2ab5598 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -153,8 +153,17 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); struct cgroup *pos = NULL; + unsigned long flags; - raw_spin_lock(cpu_lock); + /* + * The _irqsave() is needed because cgroup_rstat_lock is + * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring + * this lock with the _irq() suffix only disables interrupts on + * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables + * interrupts on both configurations. The _irqsave() ensures + * that interrupts are always disabled and later restored. + */ + raw_spin_lock_irqsave(cpu_lock, flags); while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { struct cgroup_subsys_state *css; @@ -166,7 +175,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) css->ss->css_rstat_flush(css, cpu); rcu_read_unlock(); } - raw_spin_unlock(cpu_lock); + raw_spin_unlock_irqrestore(cpu_lock, flags); /* if @may_sleep, play nice and yield if necessary */ if (may_sleep && (need_resched() || diff --git a/kernel/kcov.c b/kernel/kcov.c index 36ca640c4f8e..475524bd900a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -459,37 +459,28 @@ void kcov_task_exit(struct task_struct *t) static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) { int res = 0; - void *area; struct kcov *kcov = vma->vm_file->private_data; unsigned long size, off; struct page *page; unsigned long flags; - area = vmalloc_user(vma->vm_end - vma->vm_start); - if (!area) - return -ENOMEM; - spin_lock_irqsave(&kcov->lock, flags); size = kcov->size * sizeof(unsigned long); - if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || + if (kcov->area == NULL || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; } - if (!kcov->area) { - kcov->area = area; - vma->vm_flags |= VM_DONTEXPAND; - spin_unlock_irqrestore(&kcov->lock, flags); - for (off = 0; off < size; off += PAGE_SIZE) { - page = vmalloc_to_page(kcov->area + off); - if (vm_insert_page(vma, vma->vm_start + off, page)) - WARN_ONCE(1, "vm_insert_page() failed"); - } - return 0; + spin_unlock_irqrestore(&kcov->lock, flags); + vma->vm_flags |= VM_DONTEXPAND; + for (off = 0; off < size; off += PAGE_SIZE) { + page = vmalloc_to_page(kcov->area + off); + if (vm_insert_page(vma, vma->vm_start + off, page)) + WARN_ONCE(1, "vm_insert_page() failed"); } + return 0; exit: spin_unlock_irqrestore(&kcov->lock, flags); - vfree(area); return res; } @@ -564,31 +555,12 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; - unsigned long size, unused; + unsigned long flags, unused; int mode, i; struct kcov_remote_arg *remote_arg; struct kcov_remote *remote; - unsigned long flags; switch (cmd) { - case KCOV_INIT_TRACE: - /* - * Enable kcov in trace mode and setup buffer size. - * Must happen before anything else. - */ - if (kcov->mode != KCOV_MODE_DISABLED) - return -EBUSY; - /* - * Size must be at least 2 to hold current position and one PC. - * Later we allocate size * sizeof(unsigned long) memory, - * that must not overflow. - */ - size = arg; - if (size < 2 || size > INT_MAX / sizeof(unsigned long)) - return -EINVAL; - kcov->size = size; - kcov->mode = KCOV_MODE_INIT; - return 0; case KCOV_ENABLE: /* * Enable coverage for the current task. @@ -692,9 +664,37 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) struct kcov_remote_arg *remote_arg = NULL; unsigned int remote_num_handles; unsigned long remote_arg_size; - unsigned long flags; + unsigned long size, flags; + void *area; - if (cmd == KCOV_REMOTE_ENABLE) { + kcov = filep->private_data; + switch (cmd) { + case KCOV_INIT_TRACE: + /* + * Enable kcov in trace mode and setup buffer size. + * Must happen before anything else. + * + * First check the size argument - it must be at least 2 + * to hold the current position and one PC. + */ + size = arg; + if (size < 2 || size > INT_MAX / sizeof(unsigned long)) + return -EINVAL; + area = vmalloc_user(size * sizeof(unsigned long)); + if (area == NULL) + return -ENOMEM; + spin_lock_irqsave(&kcov->lock, flags); + if (kcov->mode != KCOV_MODE_DISABLED) { + spin_unlock_irqrestore(&kcov->lock, flags); + vfree(area); + return -EBUSY; + } + kcov->area = area; + kcov->size = size; + kcov->mode = KCOV_MODE_INIT; + spin_unlock_irqrestore(&kcov->lock, flags); + return 0; + case KCOV_REMOTE_ENABLE: if (get_user(remote_num_handles, (unsigned __user *)(arg + offsetof(struct kcov_remote_arg, num_handles)))) return -EFAULT; @@ -710,16 +710,18 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) return -EINVAL; } arg = (unsigned long)remote_arg; + fallthrough; + default: + /* + * All other commands can be normally executed under a spin lock, so we + * obtain and release i |