diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/arraymap.c | 20 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 14 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 24 | ||||
| -rw-r--r-- | kernel/bpf/stackmap.c | 20 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 34 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 2 | ||||
| -rw-r--r-- | kernel/capability.c | 1 | ||||
| -rw-r--r-- | kernel/cpu.c | 24 | ||||
| -rw-r--r-- | kernel/events/core.c | 175 | ||||
| -rw-r--r-- | kernel/jump_label.c | 7 | ||||
| -rw-r--r-- | kernel/memremap.c | 4 | ||||
| -rw-r--r-- | kernel/module.c | 2 | ||||
| -rw-r--r-- | kernel/panic.c | 4 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 10 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 1 | ||||
| -rw-r--r-- | kernel/rcu/tiny.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/tiny_plugin.h | 9 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 33 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 52 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 38 | ||||
| -rw-r--r-- | kernel/signal.c | 4 | ||||
| -rw-r--r-- | kernel/sysctl.c | 1 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 9 | ||||
| -rw-r--r-- | kernel/time/tick-sched.h | 2 | ||||
| -rw-r--r-- | kernel/ucount.c | 14 | ||||
| -rw-r--r-- | kernel/watchdog.c | 9 | ||||
| -rw-r--r-- | kernel/watchdog_hld.c | 3 |
29 files changed, 371 insertions, 155 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a2ac051c342f..3d55d95dcf49 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,7 +11,6 @@ */ #include <linux/bpf.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> @@ -56,7 +55,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) attr->value_size == 0 || attr->map_flags) return ERR_PTR(-EINVAL); - if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) + if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ @@ -74,14 +73,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) if (array_size >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); - /* allocate all map elements and zero-initialize them */ - array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); - if (!array) { - array = vzalloc(array_size); - if (!array) - return ERR_PTR(-ENOMEM); - } + array = bpf_map_area_alloc(array_size); + if (!array) + return ERR_PTR(-ENOMEM); /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -97,7 +92,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) if (array_size >= U32_MAX - PAGE_SIZE || elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { - kvfree(array); + bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } out: @@ -262,7 +257,7 @@ static void array_map_free(struct bpf_map *map) if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); - kvfree(array); + bpf_map_area_free(array); } static const struct bpf_map_ops array_ops = { @@ -319,7 +314,8 @@ static void fd_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); - kvfree(array); + + bpf_map_area_free(array); } static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1eb4f1303756..503d4211988a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -146,10 +146,11 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } -int bpf_prog_calc_digest(struct bpf_prog *fp) +int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); - u32 raw_size = bpf_prog_digest_scratch_size(fp); + u32 raw_size = bpf_prog_tag_scratch_size(fp); + u32 digest[SHA_DIGEST_WORDS]; u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; @@ -162,7 +163,7 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) if (!raw) return -ENOMEM; - sha_init(fp->digest); + sha_init(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation @@ -204,13 +205,14 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { - sha_transform(fp->digest, todo, ws); + sha_transform(digest, todo, ws); todo += SHA_MESSAGE_BYTES; } - result = (__force __be32 *)fp->digest; + result = (__force __be32 *)digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) - result[i] = cpu_to_be32(fp->digest[i]); + result[i] = cpu_to_be32(digest[i]); + memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 34debc1a9641..a753bbe7df0a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -13,7 +13,6 @@ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> -#include <linux/vmalloc.h> #include "percpu_freelist.h" #include "bpf_lru_list.h" @@ -103,7 +102,7 @@ static void htab_free_elems(struct bpf_htab *htab) free_percpu(pptr); } free_elems: - vfree(htab->elems); + bpf_map_area_free(htab->elems); } static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, @@ -125,7 +124,8 @@ static int prealloc_init(struct bpf_htab *htab) { int err = -ENOMEM, i; - htab->elems = vzalloc(htab->elem_size * htab->map.max_entries); + htab->elems = bpf_map_area_alloc(htab->elem_size * + htab->map.max_entries); if (!htab->elems) return -ENOMEM; @@ -274,7 +274,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; - if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) - + if (htab->map.value_size >= KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct htab_elem)) /* if value_size is bigger, the user space won't be able to * access the elements via bpf syscall. This check also makes @@ -320,14 +320,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; err = -ENOMEM; - htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), - GFP_USER | __GFP_NOWARN); - - if (!htab->buckets) { - htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket)); - if (!htab->buckets) - goto free_htab; - } + htab->buckets = bpf_map_area_alloc(htab->n_buckets * + sizeof(struct bucket)); + if (!htab->buckets) + goto free_htab; for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_HEAD(&htab->buckets[i].head); @@ -354,7 +350,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) free_extra_elems: free_percpu(htab->extra_elems); free_buckets: - kvfree(htab->buckets); + bpf_map_area_free(htab->buckets); free_htab: kfree(htab); return ERR_PTR(err); @@ -1014,7 +1010,7 @@ static void htab_map_free(struct bpf_map *map) prealloc_destroy(htab); free_percpu(htab->extra_elems); - kvfree(htab->buckets); + bpf_map_area_free(htab->buckets); kfree(htab); } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 732ae16d12b7..be8519148c25 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -7,7 +7,6 @@ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> -#include <linux/vmalloc.h> #include <linux/stacktrace.h> #include <linux/perf_event.h> #include "percpu_freelist.h" @@ -32,7 +31,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; int err; - smap->elems = vzalloc(elem_size * smap->map.max_entries); + smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries); if (!smap->elems) return -ENOMEM; @@ -45,7 +44,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) return 0; free_elems: - vfree(smap->elems); + bpf_map_area_free(smap->elems); return err; } @@ -76,12 +75,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-E2BIG); - smap = kzalloc(cost, GFP_USER | __GFP_NOWARN); - if (!smap) { - smap = vzalloc(cost); - if (!smap) - return ERR_PTR(-ENOMEM); - } + smap = bpf_map_area_alloc(cost); + if (!smap) + return ERR_PTR(-ENOMEM); err = -E2BIG; cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); @@ -112,7 +108,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) put_buffers: put_callchain_buffers(); free_smap: - kvfree(smap); + bpf_map_area_free(smap); return ERR_PTR(err); } @@ -262,9 +258,9 @@ static void stack_map_free(struct bpf_map *map) /* wait for bpf programs to complete before freeing stack map */ synchronize_rcu(); - vfree(smap->elems); + bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); - kvfree(smap); + bpf_map_area_free(smap); put_callchain_buffers(); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e89acea22ecf..19b6129eab23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -12,6 +12,8 @@ #include <linux/bpf.h> #include <linux/syscalls.h> #include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mmzone.h> #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/license.h> @@ -49,6 +51,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) list_add(&tl->list_node, &bpf_map_types); } +void *bpf_map_area_alloc(size_t size) +{ + /* We definitely need __GFP_NORETRY, so OOM killer doesn't + * trigger under memory pressure as we really just want to + * fail instead. + */ + const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + void *area; + + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + area = kmalloc(size, GFP_USER | flags); + if (area != NULL) + return area; + } + + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, + PAGE_KERNEL); +} + +void bpf_map_area_free(void *area) +{ + kvfree(area); +} + int bpf_map_precharge_memlock(u32 pages) { struct user_struct *user = get_current_user(); @@ -688,17 +714,17 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; - char prog_digest[sizeof(prog->digest) * 2 + 1] = { }; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_digest, prog->digest, sizeof(prog->digest)); + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" - "prog_digest:\t%s\n" + "prog_tag:\t%s\n" "memlock:\t%llu\n", prog->type, prog->jited, - prog_digest, + prog_tag, prog->pages * 1ULL << PAGE_SHIFT); } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83ed2f8f6f22..cdc43b899f28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2936,7 +2936,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i, j, err; - err = bpf_prog_calc_digest(env->prog); + err = bpf_prog_calc_tag(env->prog); if (err) return err; diff --git a/kernel/capability.c b/kernel/capability.c index a98e814f216f..f97fe77ceb88 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -318,6 +318,7 @@ bool has_capability(struct task_struct *t, int cap) { return has_ns_capability(t, &init_user_ns, cap); } +EXPORT_SYMBOL(has_capability); /** * has_ns_capability_noaudit - Does a task have a capability (unaudited) diff --git a/kernel/cpu.c b/kernel/cpu.c index f75c4d031eeb..0a5f630f5c54 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -764,7 +764,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int prev_state, ret = 0; - bool hasdied = false; if (num_online_cpus() == 1) return -EBUSY; @@ -809,7 +808,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, cpuhp_kick_ap_work(cpu); } - hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; out: cpu_hotplug_done(); return ret; @@ -1302,10 +1300,24 @@ static int cpuhp_cb_check(enum cpuhp_state state) */ static int cpuhp_reserve_state(enum cpuhp_state state) { - enum cpuhp_state i; + enum cpuhp_state i, end; + struct cpuhp_step *step; - for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { - if (!cpuhp_ap_states[i].name) + switch (state) { + case CPUHP_AP_ONLINE_DYN: + step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN; + end = CPUHP_AP_ONLINE_DYN_END; + break; + case CPUHP_BP_PREPARE_DYN: + step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN; + end = CPUHP_BP_PREPARE_DYN_END; + break; + default: + return -EINVAL; + } + + for (i = state; i <= end; i++, step++) { + if (!step->name) return i; } WARN(1, "No more dynamic states available for CPU hotplug\n"); @@ -1323,7 +1335,7 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN) { + if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) goto out; diff --git a/kernel/events/core.c b/kernel/events/core.c index ab15509fab8c..110b38a58493 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info) struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); struct perf_event_context *task_ctx = cpuctx->task_ctx; - bool activate = true; + bool reprogram = true; int ret = 0; raw_spin_lock(&cpuctx->ctx.lock); @@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info) raw_spin_lock(&ctx->lock); task_ctx = ctx; - /* If we're on the wrong CPU, try again */ - if (task_cpu(ctx->task) != smp_processor_id()) { - ret = -ESRCH; - goto unlock; - } + reprogram = (ctx->task == current); /* - * If we're on the right CPU, see if the task we target is - * current, if not we don't have to activate the ctx, a future - * context switch will do that for us. + * If the task is running, it must be running on this CPU, + * otherwise we cannot reprogram things. + * + * If its not running, we don't care, ctx->lock will + * serialize against it becoming runnable. */ - if (ctx->task != current) - activate = false; - else - WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); + if (task_curr(ctx->task) && !reprogram) { + ret = -ESRCH; + goto unlock; + } + WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx); } else if (task_ctx) { raw_spin_lock(&task_ctx->lock); } - if (activate) { + if (reprogram) { ctx_sched_out(ctx, cpuctx, EVENT_TIME); add_event_to_ctx(event, ctx); ctx_resched(cpuctx, task_ctx); @@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx, /* * Installing events is tricky because we cannot rely on ctx->is_active * to be set in case this is the nr_events 0 -> 1 transition. + * + * Instead we use task_curr(), which tells us if the task is running. + * However, since we use task_curr() outside of rq::lock, we can race + * against the actual state. This means the result can be wrong. + * + * If we get a false positive, we retry, this is harmless. + * + * If we get a false negative, things are complicated. If we are after + * perf_event_context_sched_in() ctx::lock will serialize us, and the + * value must be correct. If we're before, it doesn't matter since + * perf_event_context_sched_in() will program the counter. + * + * However, this hinges on the remote context switch having observed + * our task->perf_event_ctxp[] store, such that it will in fact take + * ctx::lock in perf_event_context_sched_in(). + * + * We do this by task_function_call(), if the IPI fails to hit the task + * we know any future context switch of task must see the + * perf_event_ctpx[] store. */ -again: + /* - * Cannot use task_function_call() because we need to run on the task's - * CPU regardless of whether its current or not. + * This smp_mb() orders the task->perf_event_ctxp[] store with the + * task_cpu() load, such that if the IPI then does not find the task + * running, a future context switch of that task must observe the + * store. */ - if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) + smp_mb(); +again: + if (!task_function_call(task, __perf_install_in_context, event)) return; raw_spin_lock_irq(&ctx->lock); @@ -2348,12 +2370,16 @@ again: raw_spin_unlock_irq(&ctx->lock); return; } - raw_spin_unlock_irq(&ctx->lock); /* - * Since !ctx->is_active doesn't mean anything, we must IPI - * unconditionally. + * If the task is not running, ctx->lock will avoid it becoming so, + * thus we can safely install the event. */ - goto again; + if (task_curr(task)) { + raw_spin_unlock_irq(&ctx->lock); + goto again; + } + add_event_to_ctx(event, ctx); + raw_spin_unlock_irq(&ctx->lock); } /* @@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } -/* - * Generic event overflow handling, sampling. - */ - -static int __perf_event_overflow(struct perf_event *event, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) +static int +__perf_event_account_interrupt(struct perf_event *event, int throttle) { - int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; - u64 seq; int ret = 0; - - /* - * Non-sampling counters might still use the PMI to fold short - * hardware counters, ignore those. - */ - if (unlikely(!is_sampling_event(event))) - return 0; + u64 seq; seq = __this_cpu_read(perf_throttled_seq); if (seq != hwc->interrupts_seq) { @@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, perf_adjust_period(event, delta, hwc->last_period, true); } + return ret; +} + +int perf_event_account_interrupt(struct perf_event *event) +{ + return __perf_event_account_interrupt(event, 1); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + int ret = 0; + + /* + * Non-sampling counters might still use the PMI to fold short + * hardware counters, ignore those. + */ + if (unlikely(!is_sampling_event(event))) + return 0; + + ret = __perf_event_account_interrupt(event, throttle); + /* * XXX event_limit might not quite work as expected on inherited * events @@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) return 0; } +/* + * Variation on perf_event_ctx_lock_nested(), except we take two context + * mutexes. + */ +static struct perf_event_context * +__perf_event_ctx_lock_double(struct perf_event *group_leader, + struct perf_event_context *ctx) +{ + struct perf_event_context *gctx; + +again: + rcu_read_lock(); + gctx = READ_ONCE(group_leader->ctx); + if (!atomic_inc_not_zero(&gctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock_double(&gctx->mutex, &ctx->mutex); + + if (group_leader->ctx != gctx) { + mutex_unlock(&ctx->mutex); + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + goto again; + } + + return gctx; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - gctx = group_leader->ctx; - mutex_lock_double(&gctx->mutex, &ctx->mutex); + gctx = __perf_event_ctx_lock_double(group_leader, ctx); + if (gctx->task == TASK_TOMBSTONE) { err = -ESRCH; goto err_locked; } + + /* + * Check if we raced against another sys_perf_event_open() call + * moving the software group underneath us. + */ + if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { + /* + * If someone moved the group out from under us, check + * if this new event wound up on the same ctx, if so + * its the regular !move_group case, otherwise fail. + */ + if (gctx != ctx) { + err = -EINVAL; + goto err_locked; + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; + } + } } else { mutex_lock(&ctx->mutex); } @@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); if (task) { @@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open, err_locked: if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); /* err_file: */ fput(event_file); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93ad6c1fb9b6..a9b8cf500591 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); +void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); + flush_delayed_work(&key->work); +} +EXPORT_SYMBOL_GPL(static_key_deferred_flush); + void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { diff --git a/kernel/memremap.c b/kernel/memremap.c index b501e390bb34..9ecedc28b928 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + mem_hotplug_begin(); arch_remove_memory(align_start, align_size); + mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); + mem_hotplug_done(); if (error) goto err_add_memory; diff --git a/kernel/module.c b/kernel/module.c index 5088784c0cf9..38d4270925d4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1145,7 +1145,7 @@ static size_t module_flags_taint(struct module *mod, char *buf) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { if (taint_flags[i].module && test_bit(i, &mod->taints)) - buf[l++] = taint_flags[i].true; + buf[l++] = taint_flags[i].c_true; } return l; diff --git a/kernel/panic.c b/kernel/panic.c index c51edaa04fce..08aa88dde7de 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -249,7 +249,7 @@ void panic(const char *fmt, ...) * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked. */ - pr_emerg("Rebooting in %d seconds..", panic_timeout); + pr_emerg("Rebooting in %d seconds..\n", panic_timeout); for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { touch_nmi_watchdog(); @@ -355,7 +355,7 @@ const char *print_tainted(void) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { const struct taint_flag *t = &taint_flags[i]; *s++ = test_bit(i, &tainted_mask) ? - t->true : t->false; + t->c_true : t->c_false; } *s = 0; } else diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index df9e8e9e0be7..eef2ce968636 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -151,8 +151,12 @@ out: static void delayed_free_pidns(struct rcu_head *p) { - kmem_cache_free(pid_ns_cachep, - container_of(p, struct pid_namespace, rcu)); + struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu); + + dec_pid_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + + kmem_cache_free(pid_ns_cachep, ns); } static void destroy_pid_namespace(struct pid_namespace *ns) @@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns) ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); - dec_pid_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); call_rcu(&ns->rcu, delayed_free_pidns); } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f67ceb7768b8..15e6baef5c73 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -46,7 +46,7 @@ static const char * const mem_sleep_labels[] = { const char *mem_sleep_states[PM_SUSPEND_MAX]; suspend_state_t mem_sleep_current = PM_SUSPEND_FREEZE; -suspend_state_t mem_sleep_default = PM_SUSPEND_MAX; +static suspend_state_t mem_sleep_default = PM_SUSPEND_MEM; unsigned int pm_suspend_global_flags; EXPORT_SYMBOL_GPL(pm_suspend_global_flags); @@ -168,7 +168,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops) } if (valid_state(PM_SUSPEND_MEM)) { mem_sleep_states[PM_SUSPEND_MEM] = mem_sleep_labels[PM_SUSPEND_MEM]; - if (mem_sleep_default >= PM_SUSPEND_MEM) + if (mem_sleep_default == PM_SUSPEND_MEM) mem_sleep_current = PM_SUSPEND_MEM; } diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 80adef7d4c3d..0d6ff3e471be 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void); #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); +void rcu_test_sync_prims(void); /* * This function really isn't for public consumption, but RCU is special in diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 1898559e6b60..b23a4d076f3d 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -185,9 +185,6 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused * benefits of doing might_sleep() to reduce latency.) * * Cool, huh? (Due to Josh Triplett.) - * - * But we want to make this a static inline later. The cond_resched() - * currently makes this problematic. */ void synchronize_sched(void) { @@ -195,7 +192,6 @@ void synchronize_sched(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_sched() in RCU read-side critical section"); - cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 196f0302e2f4..c64b827ecbca 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * During boot, we forgive RCU lockdep issues. After this function is - * invoked, we start taking RCU lockdep issues seriously. + * invoked, we start taking RCU lockdep issues seriously. Note that unlike + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. + * The reason for this is that Tiny RCU does not need kthreads, so does + * not have to care about the fact that the scheduler is half-initialized + * at a certain phase of the boot process. */ void __init r |
