diff options
| author | Ingo Molnar <mingo@kernel.org> | 2023-01-31 15:01:20 +0100 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2023-01-31 15:01:20 +0100 |
| commit | 57a30218fa25c469ed507964bbf028b7a064309a (patch) | |
| tree | 231c7ab6b08f567077b96ed760aaf4c685c899f3 /kernel | |
| parent | 68efe8f7a1c5168be2228bfb806ddc05475b7205 (diff) | |
| parent | 6d796c50f84ca79f1722bb131799e5a5710c4700 (diff) | |
| download | linux-57a30218fa25c469ed507964bbf028b7a064309a.tar.gz linux-57a30218fa25c469ed507964bbf028b7a064309a.tar.bz2 linux-57a30218fa25c469ed507964bbf028b7a064309a.zip | |
Merge tag 'v6.2-rc6' into sched/core, to pick up fixes
Pick up fixes before merging another batch of cpuidle updates.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
34 files changed, 330 insertions, 173 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9ea42a45da47..a4a41ee3e80b 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -351,8 +351,10 @@ BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) +#endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_task_free) BTF_SET_END(untrusted_lsm_hooks) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5aa2b5525f79..66bded144377 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,7 +152,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, { unsigned long flags; - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); preempt_disable(); if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { @@ -171,7 +171,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, struct bucket *b, u32 hash, unsigned long flags) { - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); raw_spin_unlock_irqrestore(&b->raw_lock, flags); __this_cpu_dec(*(htab->map_locked[hash])); preempt_enable(); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6..190d9f9dc987 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) offload->offdev->ops->destroy(prog); - /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ - bpf_prog_free_id(prog, true); - list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c553..ecca9366c7a6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1972,7 +1972,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) return; if (audit_enabled == AUDIT_OFF) return; - if (op == BPF_AUDIT_LOAD) + if (!in_irq() && !irqs_disabled()) ctx = audit_context(); ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); if (unlikely(!ab)) @@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog) { unsigned long flags; @@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) if (!prog->aux->id) return; - if (do_idr_lock) - spin_lock_irqsave(&prog_idr_lock, flags); - else - __acquire(&prog_idr_lock); - + spin_lock_irqsave(&prog_idr_lock, flags); idr_remove(&prog_idr, prog->aux->id); prog->aux->id = 0; - - if (do_idr_lock) - spin_unlock_irqrestore(&prog_idr_lock, flags); - else - __release(&prog_idr_lock); + spin_unlock_irqrestore(&prog_idr_lock, flags); } static void __bpf_prog_put_rcu(struct rcu_head *rcu) @@ -2067,17 +2059,15 @@ static void bpf_prog_put_deferred(struct work_struct *work) prog = aux->prog; perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + bpf_prog_free_id(prog); __bpf_prog_put_noref(prog, true); } -static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) +static void __bpf_prog_put(struct bpf_prog *prog) { struct bpf_prog_aux *aux = prog->aux; if (atomic64_dec_and_test(&aux->refcnt)) { - /* bpf_prog_free_id() must be called first */ - bpf_prog_free_id(prog, do_idr_lock); - if (in_irq() || irqs_disabled()) { INIT_WORK(&aux->work, bpf_prog_put_deferred); schedule_work(&aux->work); @@ -2089,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) void bpf_prog_put(struct bpf_prog *prog) { - __bpf_prog_put(prog, true); + __bpf_prog_put(prog); } EXPORT_SYMBOL_GPL(bpf_prog_put); diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c2a2182ce570..c4ab9d6cdbe9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; + struct mm_struct *mm; struct vm_area_struct *vma; u32 tid; unsigned long prev_vm_start; @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; + struct mm_struct *curr_mm; u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to - * the task_struct, and holds read lock on vma->mm->mmap_lock. + * the task_struct, holds a refcount on mm->mm_users, and holds + * read lock on vma->mm->mmap_lock. * If this function returns NULL, it does not hold any reference or * lock. */ if (info->task) { curr_task = info->task; curr_vma = info->vma; + curr_mm = info->mm; /* In case of lock contention, drop mmap_lock to unblock * the writer. * @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) * 4.2) VMA2 and VMA2' covers different ranges, process * VMA2'. */ - if (mmap_lock_is_contended(curr_task->mm)) { + if (mmap_lock_is_contended(curr_mm)) { info->prev_vm_start = curr_vma->vm_start; info->prev_vm_end = curr_vma->vm_end; op = task_vma_iter_find_vma; - mmap_read_unlock(curr_task->mm); - if (mmap_read_lock_killable(curr_task->mm)) + mmap_read_unlock(curr_mm); + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } else { op = task_vma_iter_next_vma; } @@ -535,42 +541,47 @@ again: op = task_vma_iter_find_vma; } - if (!curr_task->mm) + curr_mm = get_task_mm(curr_task); + if (!curr_mm) goto next_task; - if (mmap_read_lock_killable(curr_task->mm)) + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } switch (op) { case task_vma_iter_first_vma: - curr_vma = find_vma(curr_task->mm, 0); + curr_vma = find_vma(curr_mm, 0); break; case task_vma_iter_next_vma: - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; case task_vma_iter_find_vma: /* We dropped mmap_lock so it is necessary to use find_vma * to find the next vma. This is similar to the mechanism * in show_smaps_rollup(). */ - curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1); + curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); /* case 1) and 4.2) above just use curr_vma */ /* check for case 2) or case 4.1) above */ if (curr_vma && curr_vma->vm_start == info->prev_vm_start && curr_vma->vm_end == info->prev_vm_end) - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; } if (!curr_vma) { /* case 3) above, or case 2) 4.1) with vma->next == NULL */ - mmap_read_unlock(curr_task->mm); + mmap_read_unlock(curr_mm); + mmput(curr_mm); goto next_task; } info->task = curr_task; info->vma = curr_vma; + info->mm = curr_mm; return curr_vma; next_task: @@ -579,6 +590,7 @@ next_task: put_task_struct(curr_task); info->task = NULL; + info->mm = NULL; info->tid++; goto again; @@ -587,6 +599,7 @@ finish: put_task_struct(curr_task); info->task = NULL; info->vma = NULL; + info->mm = NULL; return NULL; } @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v) */ info->prev_vm_start = ~0UL; info->prev_vm_end = info->vma->vm_end; - mmap_read_unlock(info->task->mm); + mmap_read_unlock(info->mm); + mmput(info->mm); + info->mm = NULL; put_task_struct(info->task); info->task = NULL; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 11f5ec0b8016..d0ed7d6f5eec 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -488,6 +488,10 @@ again: /* reset fops->func and fops->trampoline for re-register */ tr->fops->func = NULL; tr->fops->trampoline = 0; + + /* reset im->image memory attr for arch_prepare_bpf_trampoline */ + set_memory_nx((long)im->image, 1); + set_memory_rw((long)im->image, 1); goto again; } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a5255a0dcbb6..dbef0b0967ae 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env, */ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { + size_t alloc_bytes; + void *orig = dst; size_t bytes; if (ZERO_OR_NULL_PTR(src)) @@ -1062,11 +1064,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < ksize(src)) { - kfree(dst); - dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); - if (!dst) - return NULL; + alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); + dst = krealloc(orig, alloc_bytes, flags); + if (!dst) { + kfree(orig); + return NULL; } memcpy(dst, src, bytes); @@ -2746,6 +2748,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg == 0 && is_callback_calling_function(insn->imm)) return -ENOTSUPP; + /* kfunc with imm==0 is invalid and fixup_kfunc_call will + * catch this error later. Make backtracking conservative + * with ENOTSUPP. + */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) + return -ENOTSUPP; /* regular helper call sets R0 */ *reg_mask &= ~1; if (*reg_mask & 0x3f) { @@ -3287,7 +3295,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - if (state->stack[spi].slot_type[i] == STACK_INVALID) { + u8 type = state->stack[spi].slot_type[i]; + + if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; break; } @@ -11822,10 +11832,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * register B - not null * for JNE A, B, ... - A is not null in the false branch; * for JEQ A, B, ... - A is not null in the true branch. + * + * Since PTR_TO_BTF_ID points to a kernel struct that does + * not need to be null checked by the BPF program, i.e., + * could be null even without PTR_MAYBE_NULL marking, so + * only propagate nullness when neither reg is that type. */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && - type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { + type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && + base_type(src_reg->type) != PTR_TO_BTF_ID && + base_type(dst_reg->type) != PTR_TO_BTF_ID) { eq_branch_regs = NULL; switch (opcode) { case BPF_JEQ: diff --git a/kernel/events/core.c b/kernel/events/core.c index eacc3702654d..d56328e5080e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -380,7 +380,6 @@ enum event_type_t { /* * perf_sched_events : >0 events exist - * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ static void perf_sched_delayed(struct work_struct *work); @@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed); static DEFINE_MUTEX(perf_sched_mutex); static atomic_t perf_sched_count; -static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; @@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task) struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); struct perf_cgroup *cgrp; - cgrp = perf_cgroup_from_task(task, NULL); + /* + * cpuctx->cgrp is set when the first cgroup event enabled, + * and is cleared when the last cgroup event disabled. + */ + if (READ_ONCE(cpuctx->cgrp) == NULL) + return; WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + + cgrp = perf_cgroup_from_task(task, NULL); if (READ_ONCE(cpuctx->cgrp) == cgrp) return; @@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * to check if we have to switch out PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_switch(next); + perf_cgroup_switch(next); } static bool perf_less_group_idx(const void *l, const void *r) @@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event) detach_sb_event(event); } -static void unaccount_event_cpu(struct perf_event *event, int cpu) -{ - if (event->parent) - return; - - if (is_cgroup_event(event)) - atomic_dec(&per_cpu(perf_cgroup_events, cpu)); -} - #ifdef CONFIG_NO_HZ_FULL static DEFINE_SPINLOCK(nr_freq_lock); #endif @@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event) schedule_delayed_work(&perf_sched_work, HZ); } - unaccount_event_cpu(event, event->cpu); - unaccount_pmu_sb_event(event); } @@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event) attach_sb_event(event); } -static void account_event_cpu(struct perf_event *event, int cpu) -{ - if (event->parent) - return; - - if (is_cgroup_event(event)) - atomic_inc(&per_cpu(perf_cgroup_events, cpu)); -} - /* Freq events need the tick to stay alive (see perf_event_task_tick). */ static void account_freq_event_nohz(void) { @@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event) } enabled: - account_event_cpu(event, event->cpu); - account_pmu_sb_event(event); } @@ -12339,12 +12321,12 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; - err = perf_copy_attr(attr_uptr, &attr); + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) return err; @@ -12689,7 +12671,8 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: - /* event->pmu_ctx freed by free_event() */ + put_pmu_ctx(event->pmu_ctx); + event->pmu_ctx = NULL; /* _free_event() */ err_locked: mutex_unlock(&ctx->mutex); perf_unpin_context(ctx); @@ -12802,6 +12785,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, err_pmu_ctx: put_pmu_ctx(pmu_ctx); + event->pmu_ctx = NULL; /* _free_event() */ err_unlock: mutex_unlock(&ctx->mutex); perf_unpin_context(ctx); @@ -12822,13 +12806,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx, perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) { perf_remove_from_context(event, 0); - unaccount_event_cpu(event, cpu); put_pmu_ctx(event->pmu_ctx); list_add(&event->migrate_entry, events); for_each_sibling_event(sibling, event) { perf_remove_from_context(sibling, 0); - unaccount_event_cpu(sibling, cpu); put_pmu_ctx(sibling->pmu_ctx); list_add(&sibling->migrate_entry, events); } @@ -12847,7 +12829,6 @@ static void __perf_pmu_install_event(struct pmu *pmu, if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; - account_event_cpu(event, cpu); perf_install_in_context(ctx, event, cpu); } @@ -13231,7 +13212,7 @@ inherit_event(struct perf_event *parent_event, pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); - return NULL; + return ERR_CAST(pmu_ctx); } child_event->pmu_ctx = pmu_ctx; @@ -13742,8 +13723,7 @@ static int __perf_cgroup_move(void *info) struct task_struct *task = info; preempt_disable(); - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_switch(task); + perf_cgroup_switch(task); preempt_enable(); return 0; diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 086a22d1adb7..a8074079b09e 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -286,19 +286,22 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, } futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); - if (!futexv) - return -ENOMEM; + if (!futexv) { + ret = -ENOMEM; + goto destroy_timer; + } ret = futex_parse_waitv(futexv, waiters, nr_futexes); if (!ret) ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); + kfree(futexv); + +destroy_timer: if (timeout) { hrtimer_cancel(&to.timer); destroy_hrtimer_on_stack(&to.timer); } - - kfree(futexv); return ret; } diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 473036b43c83..81b97f0f6556 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -14,6 +14,8 @@ include/ arch/$SRCARCH/include/ " +type cpio > /dev/null + # Support incremental builds by skipping archive generation # if timestamps of files being archived are not changed. diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8fe1da9614ee..5c3fb6168eef 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -114,7 +114,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid; - if (WARN_ON(!is_fwnode_irqchip(fwnode))) + if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) return; fwid = container_of(fwnode, struct irqchip_fwid, fwnode); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 955267bbc2be..783a3e6a0b10 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1000,7 +1000,7 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, fail: msi_unlock_descs(dev); free_fwnode: - kfree(fwnode); + irq_domain_free_fwnode(fwnode); free_bundle: kfree(bundle); return false; @@ -1013,6 +1013,7 @@ free_bundle: */ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) { + struct fwnode_handle *fwnode = NULL; struct msi_domain_info *info; struct irq_domain *domain; @@ -1025,7 +1026,10 @@ void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) dev->msi.data->__domains[domid].domain = NULL; info = domain->host_data; + if (irq_domain_is_msi_device(domain)) + fwnode = domain->fwnode; irq_domain_remove(domain); + irq_domain_free_fwnode(fwnode); kfree(container_of(info, struct msi_domain_template, info)); unlock: diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index f35d9cc1aab1..bfbc12da3326 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -157,14 +157,11 @@ static void test_kallsyms_compression_ratio(void) static int lookup_name(void *data, const char *name, struct module *mod, unsigned long addr) { u64 t0, t1, t; - unsigned long flags; struct test_stat *stat = (struct test_stat *)data; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); (void)kallsyms_lookup_name(name); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); t = t1 - t0; if (t < stat->min) @@ -234,18 +231,15 @@ static int find_symbol(void *data, const char *name, struct module *mod, unsigne static void test_perf_kallsyms_on_each_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; stat.perf = 1; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_symbol(find_symbol, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_symbol() traverse all: %lld ns\n", t1 - t0); } @@ -270,17 +264,14 @@ static int match_symbol(void *data, unsigned long addr) static void test_perf_kallsyms_on_each_match_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_match_symbol(match_symbol, stat.name, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_match_symbol() traverse all: %lld ns\n", t1 - t0); } diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index dcec1b743c69..a60c561724be 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -159,7 +159,7 @@ static bool __report_matches(const struct expect_report *r) const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; unsigned long flags; - typeof(observed.lines) expect; + typeof(*observed.lines) *expect; const char *end; char *cur; int i; @@ -168,6 +168,10 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; + expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); + if (WARN_ON(!expect)) + return false; + /* Generate expected report contents. */ /* Title */ @@ -253,6 +257,7 @@ static bool __report_matches(const struct expect_report *r) strstr(observed.lines[2], expect[1]))); out: spin_unlock_irqrestore(&observed.lock, flags); + kfree(expect); return ret; } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 7779ee8abc2a..010cf4e6d0b8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -89,15 +89,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock, * set this bit before looking at the lock. */ -static __always_inline void -rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) +static __always_inline struct task_struct * +rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) { unsigned long val = (unsigned long)owner; if (rt_mutex_has_waiters(lock)) val |= RT_MUTEX_HAS_WAITERS; - WRITE_ONCE(lock->owner, (struct task_struct *)val); + return (struct task_struct *)val; +} + +static __always_inline void +rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) +{ + /* + * lock->wait_lock is held but explicit acquire semantics are needed + * for a new lock owner so WRITE_ONCE is insufficient. + */ + xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); +} + +static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) +{ + /* lock->wait_lock is held so the unlock provides release semantics. */ + WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); } static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) @@ -106,7 +122,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); } -static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) +static __always_inline void +fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) { unsigned long owner, *p = (unsigned long *) &lock->owner; @@ -172,8 +189,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) * still set. */ owner = READ_ONCE(*p); - if (owner & RT_MUTEX_HAS_WAITERS) - WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); + if (owner & RT_MUTEX_HAS_WAITERS) { + /* + * See rt_mutex_set_owner() and rt_mutex_clear_owner() on + * why xchg_acquire() is used for updating owner for + * locking and WRITE_ONCE() for unlocking. + * + * WRITE_ONCE() would work for the acquire case too, but + * in case that the lock acquisition failed it might + * force other lockers into the slow path unnecessarily. + */ + if (acqu |
