diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/Makefile | 2 | ||||
| -rw-r--r-- | kernel/bpf/arena.c | 25 | ||||
| -rw-r--r-- | kernel/bpf/bloom_filter.c | 13 | ||||
| -rw-r--r-- | kernel/bpf/helpers.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 35 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 30 | ||||
| -rw-r--r-- | kernel/configs/hardening.config | 11 | ||||
| -rw-r--r-- | kernel/cpu.c | 3 | ||||
| -rw-r--r-- | kernel/crash_reserve.c | 7 | ||||
| -rw-r--r-- | kernel/dma/swiotlb.c | 91 | ||||
| -rw-r--r-- | kernel/fork.c | 33 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 9 | ||||
| -rw-r--r-- | kernel/kprobes.c | 18 | ||||
| -rw-r--r-- | kernel/module/Kconfig | 5 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 6 | ||||
| -rw-r--r-- | kernel/printk/printk.c | 6 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 20 | ||||
| -rw-r--r-- | kernel/sys.c | 7 | ||||
| -rw-r--r-- | kernel/time/posix-clock.c | 16 | ||||
| -rw-r--r-- | kernel/time/tick-common.c | 17 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 54 | ||||
| -rw-r--r-- | kernel/time/tick-sched.h | 2 | ||||
| -rw-r--r-- | kernel/time/timer.c | 22 | ||||
| -rw-r--r-- | kernel/time/timer_migration.c | 32 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 10 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.c | 2 |
29 files changed, 349 insertions, 141 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 368c5d86b5b7..e497011261b8 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) # ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif -CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) +CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 86571e760dd6..343c3456c8dd 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -38,7 +38,7 @@ /* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ #define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8) -#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ) +#define KERN_VM_SZ (SZ_4G + GUARD_SZ) struct bpf_arena { struct bpf_map map; @@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr) return ERR_PTR(-EINVAL); vm_range = (u64)attr->max_entries * PAGE_SIZE; - if (vm_range > (1ull << 32)) + if (vm_range > SZ_4G) return ERR_PTR(-E2BIG); if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32)) @@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (pgoff) return -EINVAL; - if (len > (1ull << 32)) + if (len > SZ_4G) return -E2BIG; /* if user_vm_start was specified at arena creation time */ @@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad if (WARN_ON_ONCE(arena->user_vm_start)) /* checks at map creation time should prevent this */ return -EFAULT; - return round_up(ret, 1ull << 32); + return round_up(ret, SZ_4G); } static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) @@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) return -EBUSY; /* Earlier checks should prevent this */ - if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff)) + if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff)) return -EFAULT; if (remember_vma(arena, vma)) @@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt if (uaddr & ~PAGE_MASK) return 0; pgoff = compute_pgoff(arena, uaddr); - if (pgoff + page_cnt > page_cnt_max) + if (pgoff > page_cnt_max - page_cnt) /* requested address will be outside of user VMA */ return 0; } @@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt goto out; uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE); - /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */ + /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1 + * will not overflow 32-bit. Lower 32-bit need to represent + * contiguous user address range. + * Map these pages at kern_vm_start base. + * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow + * lower 32-bit and it's ok. + */ ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32, kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages); if (ret) { @@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt) if (!page) continue; if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */ + /* Optimization for the common case of page_cnt==1: + * If page wasn't mapped into some user vma there + * is no need to call zap_pages which is slow. When + * page_cnt is big it's faster to do the batched zap. + */ zap_pages(arena, full_uaddr, 1); vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE); __free_page(page); diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index addf3dd57b59..35e1ddca74d2 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key return -EOPNOTSUPP; } +/* Called from syscall */ +static int bloom_map_alloc_check(union bpf_attr *attr) +{ + if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return -E2BIG; + + return 0; +} + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; @@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map) BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a89587859571..449b9a5d3fe3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..c287925471f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link) atomic64_inc(&link->refcnt); } +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) +{ + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); + + /* free bpf_link and its containing memory */ + link->ops->dealloc_deferred(link); +} + +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_link_defer_dealloc_rcu_gp(rcu); + else + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); +} + /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bool sleepable = false; + bpf_link_free_id(link->id); if (link->prog) { + sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ link->ops->release(link); bpf_prog_put(link->prog); } - /* free bpf_link and its containing memory */ - link->ops->dealloc(link); + if (link->ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period + */ + if (sleepable) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); + } + if (link->ops->dealloc) + link->ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, - .dealloc = bpf_raw_tp_link_dealloc, + .dealloc_deferred = bpf_raw_tp_link_dealloc, .show_fdinfo = bpf_raw_tp_link_show_fdinfo, .fill_link_info = bpf_raw_tp_link_fill_link_info, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 63749ad5ac6b..98188379d5c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_arena_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return reg->type == PTR_TO_ARENA; +} + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #ifdef CONFIG_NET [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], @@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { @@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg)) { + is_sk_reg(env, insn->dst_reg) || + is_arena_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str(env, reg_state(env, insn->dst_reg)->type)); @@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); return -EINVAL; } + if (!env->prog->aux->arena) { + verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); + return -EINVAL; + } } else { if ((insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) || insn->imm) { @@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->imm) { /* off == BPF_ADDR_SPACE_CAST */ mark_reg_unknown(env, regs, insn->dst_reg); - if (insn->imm == 1) /* cast from as(1) to as(0) */ + if (insn->imm == 1) { /* cast from as(1) to as(0) */ dst_reg->type = PTR_TO_ARENA; + /* PTR_TO_ARENA is 32-bit */ + dst_reg->subreg_def = env->insn_idx + 1; + } } else if (insn->off == 0) { /* case: R1 = R2 * copy register state to dest reg @@ -18359,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) } if (!env->prog->jit_requested) { verbose(env, "JIT is required to use arena\n"); + fdput(f); return -EOPNOTSUPP; } if (!bpf_jit_supports_arena()) { verbose(env, "JIT doesn't support arena\n"); + fdput(f); return -EOPNOTSUPP; } env->prog->aux->arena = (void *)map; if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { verbose(env, "arena's user address must be set via map_extra or mmap()\n"); + fdput(f); return -EINVAL; } } @@ -19601,8 +19624,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { /* convert to 32-bit mov that clears upper 32-bit */ insn->code = BPF_ALU | BPF_MOV | BPF_X; - /* clear off, so it's a normal 'wX = wY' from JIT pov */ + /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ insn->off = 0; + insn->imm = 0; } /* cast from as(0) to as(1) should be handled by JIT */ goto next_insn; } diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 7a5bbfc024b7..4b4cfcba3190 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -39,11 +39,12 @@ CONFIG_UBSAN=y CONFIG_UBSAN_TRAP=y CONFIG_UBSAN_BOUNDS=y # CONFIG_UBSAN_SHIFT is not set -# CONFIG_UBSAN_DIV_ZERO -# CONFIG_UBSAN_UNREACHABLE -# CONFIG_UBSAN_BOOL -# CONFIG_UBSAN_ENUM -# CONFIG_UBSAN_ALIGNMENT +# CONFIG_UBSAN_DIV_ZERO is not set +# CONFIG_UBSAN_UNREACHABLE is not set +# CONFIG_UBSAN_SIGNED_WRAP is not set +# CONFIG_UBSAN_BOOL is not set +# CONFIG_UBSAN_ENUM is not set +# CONFIG_UBSAN_ALIGNMENT is not set # Sampling-based heap out-of-bounds and use-after-free detection. CONFIG_KFENCE=y diff --git a/kernel/cpu.c b/kernel/cpu.c index 8f6affd051f7..07ad53b7f119 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3207,7 +3207,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index bbb6c3cb00e4..066668799f75 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -366,8 +366,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY insert_resource(&iomem_resource, &crashk_low_res); #endif +#endif return 0; } @@ -448,8 +450,12 @@ retry: crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; +#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + insert_resource(&iomem_resource, &crashk_res); +#endif } +#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY static __init int insert_crashkernel_resources(void) { if (crashk_res.start < crashk_res.end) @@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void) } early_initcall(insert_crashkernel_resources); #endif +#endif diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 86fe172b5958..a5e0dfc44d24 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -69,11 +69,14 @@ * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; - unsigned int list; + unsigned short list; + unsigned short pad_slots; }; static bool swiotlb_force_bounce; @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); @@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev) #endif } -/* - * Return the offset into a iotlb slot required to keep the device happy. +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. */ -static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) { - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* @@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; - unsigned int tlb_offset, orig_addr_offset; + int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; - tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); - orig_addr_offset = swiotlb_align_offset(dev, orig_addr); - if (tlb_offset < orig_addr_offset) { - dev_WARN_ONCE(dev, 1, - "Access before mapping start detected. orig offset %u, requested offset %u.\n", - orig_addr_offset, tlb_offset); - return; - } - - tlb_offset -= orig_addr_offset; - if (tlb_offset > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", - alloc_size, size, tlb_offset); - return; - } + /* + * It's valid for tlb_offset to be negative. This can happen when the + * "offset" returned by swiotlb_align_offset() is non-zero, and the + * tlb_addr is pointing within the first "offset" bytes of the second + * or subsequent slots of the allocated swiotlb area. While it's not + * valid for tlb_addr to be pointing within the first "offset" bytes + * of the first slot, there's no way to check for such an error since + * this function can't distinguish the first slot from the second and + * subsequent slots. + */ + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, 0, orig_addr); orig_addr += tlb_offset; alloc_size -= tlb_offset; @@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; @@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset; struct io_tlb_pool *pool; unsigned int i; int index; phys_addr_t tlb_addr; + unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, @@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset, alloc_align_mask, &pool); if (index == -1) { @@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; for (i = 0; i < nr_slots(alloc_size + offset); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; @@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; - unsigned int offset = swiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct io_tlb_area *area = &mem->areas[aindex]; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; int count, i; + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; + /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. @@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } /* @@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { - atomic_long_set(&mem->total_used, 0); - atomic_long_set(&mem->used_hiwater, 0); - mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); if (!mem->nslabs) return; @@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, &fops_io_tlb_hiwater); #ifdef CONFIG_SWIOTLB_DYNAMIC - atomic_long_set(&mem->transient_nslabs, 0); debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, mem, &fops_io_tlb_transient_used); #endif diff --git a/kernel/fork.c b/kernel/fork.c index 39a5046c2f0b..aebb3e6c96dc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; vm_flags_clear(tmp, VM_LOCKED_MASK); + /* + * Copy/update hugetlb private vma information. + */ + if (is_vm_hugetlb_page(tmp)) + hugetlb_dup_vma_private(tmp); + + /* + * Link the vma into the MT. After using __mt_dup(), memory + * allocation is not necessary here, so it cannot fail. + */ + vma_iter_bulk_store(&vmi, tmp); + + mm->map_count++; + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + file = tmp->vm_file; if (file) { struct address_space *mapping = file->f_mapping; @@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } - /* - * Copy/update hugetlb private vma information. - */ - if (is_vm_hugetlb_page(tmp)) - hugetlb_dup_vma_private(tmp); - - /* - * Link the vma into the MT. After using __mt_dup(), memory - * allocation is not necessary here, so it cannot fail. - */ - vma_iter_bulk_store(&vmi, tmp); - - mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); - if (tmp->vm_ops && tmp->vm_ops->open) - tmp->vm_ops->open(tmp); - if (retval) { mpnt = vma_next(&vmi); goto loop_out; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..bf9ae8a8686f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9d9095e81792..65adc815fc6e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index c3ced519e14b..f3e0329337f6 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -236,6 +236,10 @@ choice possible to load a signed module containing the algorithm to check the signature on that module. +config MODULE_SIG_SHA1 + bool "Sign modules with SHA-1" + select CRYPTO_SHA1 + config MODULE_SIG_SHA256 bool "Sign modules with SHA-256" select CRYPTO_SHA256 @@ -265,6 +269,7 @@ endchoice config MODULE_SIG_HASH string depends on MODULE_SIG || IMA_APPRAISE_MODSIG + default "sha1" if MODULE_SIG_SHA1 default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index e3ae93bbcb9b..09f8397bae15 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,6 +106,12 @@ static void s2idle_enter(void) swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); + /* + * Kick all CPUs to ensure that they resume their timers and restore + * consistent system state. + */ + wake_up_all_idle_cpus(); + cpus_read_unlock(); raw_spin_lock_irq(&s2idle_lock); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca5146006b94..adf99c05adca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2009,6 +2009,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d2242679239e..ae50f212775e 100644 --- a/kernel/sched/sched.h +++ b/ |
