diff options
Diffstat (limited to 'kernel')
48 files changed, 2174 insertions, 974 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index d8560ee3bab7..9ad37b9e44a7 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -24,7 +24,7 @@ #define CREATE_TRACE_POINTS #include <trace/events/context_tracking.h> -struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; +DEFINE_STATIC_KEY_FALSE(context_tracking_enabled); EXPORT_SYMBOL_GPL(context_tracking_enabled); DEFINE_PER_CPU(struct context_tracking, context_tracking); @@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu) if (!per_cpu(context_tracking.active, cpu)) { per_cpu(context_tracking.active, cpu) = true; - static_key_slow_inc(&context_tracking_enabled); + static_branch_inc(&context_tracking_enabled); } if (initialized) diff --git a/kernel/events/core.c b/kernel/events/core.c index ef2d6ea10736..bf8244190d0f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -126,6 +126,37 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info) return data.ret; } +static void event_function_call(struct perf_event *event, + int (*active)(void *), + void (*inactive)(void *), + void *data) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + cpu_function_call(event->cpu, active, data); + return; + } + +again: + if (!task_function_call(task, active, data)) + return; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; + raw_spin_unlock_irq(&ctx->lock); + goto again; + } + inactive(data); + raw_spin_unlock_irq(&ctx->lock); +} + #define EVENT_OWNER_KERNEL ((void *) -1) static bool is_kernel_event(struct perf_event *event) @@ -1629,6 +1660,17 @@ struct remove_event { bool detach_group; }; +static void ___perf_remove_from_context(void *info) +{ + struct remove_event *re = info; + struct perf_event *event = re->event; + struct perf_event_context *ctx = event->ctx; + + if (re->detach_group) + perf_group_detach(event); + list_del_event(event, ctx); +} + /* * Cross CPU call to remove a performance event * @@ -1656,7 +1698,6 @@ static int __perf_remove_from_context(void *info) return 0; } - /* * Remove the event from a task's (or a CPU's) list of events. * @@ -1673,7 +1714,6 @@ static int __perf_remove_from_context(void *info) static void perf_remove_from_context(struct perf_event *event, bool detach_group) { struct perf_event_context *ctx = event->ctx; - struct task_struct *task = ctx->task; struct remove_event re = { .event = event, .detach_group = detach_group, @@ -1681,44 +1721,8 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group lockdep_assert_held(&ctx->mutex); - if (!task) { - /* - * Per cpu events are removed via an smp call. The removal can - * fail if the CPU is currently offline, but in that case we - * already called __perf_remove_from_context from - * perf_event_exit_cpu. - */ - cpu_function_call(event->cpu, __perf_remove_from_context, &re); - return; - } - -retry: - if (!task_function_call(task, __perf_remove_from_context, &re)) - return; - - raw_spin_lock_irq(&ctx->lock); - /* - * If we failed to find a running task, but find the context active now - * that we've acquired the ctx->lock, retry. - */ - if (ctx->is_active) { - raw_spin_unlock_irq(&ctx->lock); - /* - * Reload the task pointer, it might have been changed by - * a concurrent perf_event_context_sched_out(). - */ - task = ctx->task; - goto retry; - } - - /* - * Since the task isn't running, its safe to remove the event, us - * holding the ctx->lock ensures the task won't get scheduled in. - */ - if (detach_group) - perf_group_detach(event); - list_del_event(event, ctx); - raw_spin_unlock_irq(&ctx->lock); + event_function_call(event, __perf_remove_from_context, + ___perf_remove_from_context, &re); } /* @@ -1762,6 +1766,20 @@ int __perf_event_disable(void *info) return 0; } +void ___perf_event_disable(void *info) +{ + struct perf_event *event = info; + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_OFF; + } +} + /* * Disable a event. * @@ -1778,43 +1796,16 @@ int __perf_event_disable(void *info) static void _perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Disable the event on the cpu that it's on - */ - cpu_function_call(event->cpu, __perf_event_disable, event); - return; - } - -retry: - if (!task_function_call(task, __perf_event_disable, event)) - return; raw_spin_lock_irq(&ctx->lock); - /* - * If the event is still active, we need to retry the cross-call. - */ - if (event->state == PERF_EVENT_STATE_ACTIVE) { + if (event->state <= PERF_EVENT_STATE_OFF) { raw_spin_unlock_irq(&ctx->lock); - /* - * Reload the task pointer, it might have been changed by - * a concurrent perf_event_context_sched_out(). - */ - task = ctx->task; - goto retry; - } - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (event->state == PERF_EVENT_STATE_INACTIVE) { - update_group_times(event); - event->state = PERF_EVENT_STATE_OFF; + return; } raw_spin_unlock_irq(&ctx->lock); + + event_function_call(event, __perf_event_disable, + ___perf_event_disable, event); } /* @@ -2067,6 +2058,18 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); } +static void ___perf_install_in_context(void *info) +{ + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + + /* + * Since the task isn't running, its safe to add the event, us holding + * the ctx->lock ensures the task won't get scheduled in. + */ + add_event_to_ctx(event, ctx); +} + /* * Cross CPU call to install and enable a performance event * @@ -2143,48 +2146,14 @@ perf_install_in_context(struct perf_event_context *ctx, struct perf_event *event, int cpu) { - struct task_struct *task = ctx->task; - lockdep_assert_held(&ctx->mutex); event->ctx = ctx; if (event->cpu != -1) event->cpu = cpu; - if (!task) { - /* - * Per cpu events are installed via an smp call and - * the install is always successful. - */ - cpu_function_call(cpu, __perf_install_in_context, event); - return; - } - -retry: - if (!task_function_call(task, __perf_install_in_context, event)) - return; - - raw_spin_lock_irq(&ctx->lock); - /* - * If we failed to find a running task, but find the context active now - * that we've acquired the ctx->lock, retry. - */ - if (ctx->is_active) { - raw_spin_unlock_irq(&ctx->lock); - /* - * Reload the task pointer, it might have been changed by - * a concurrent perf_event_context_sched_out(). - */ - task = ctx->task; - goto retry; - } - - /* - * Since the task isn't running, its safe to add the event, us holding - * the ctx->lock ensures the task won't get scheduled in. - */ - add_event_to_ctx(event, ctx); - raw_spin_unlock_irq(&ctx->lock); + event_function_call(event, __perf_install_in_context, + ___perf_install_in_context, event); } /* @@ -2287,6 +2256,11 @@ unlock: return 0; } +void ___perf_event_enable(void *info) +{ + __perf_event_mark_enabled((struct perf_event *)info); +} + /* * Enable a event. * @@ -2299,58 +2273,26 @@ unlock: static void _perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; - struct task_struct *task = ctx->task; - if (!task) { - /* - * Enable the event on the cpu that it's on - */ - cpu_function_call(event->cpu, __perf_event_enable, event); + raw_spin_lock_irq(&ctx->lock); + if (event->state >= PERF_EVENT_STATE_INACTIVE) { + raw_spin_unlock_irq(&ctx->lock); return; } - raw_spin_lock_irq(&ctx->lock); - if (event->state >= PERF_EVENT_STATE_INACTIVE) - goto out; - /* * If the event is in error state, clear that first. - * That way, if we see the event in error state below, we - * know that it has gone back into error state, as distinct - * from the task having been scheduled away before the - * cross-call arrived. + * + * That way, if we see the event in error state below, we know that it + * has gone back into error state, as distinct from the task having + * been scheduled away before the cross-call arrived. */ if (event->state == PERF_EVENT_STATE_ERROR) event->state = PERF_EVENT_STATE_OFF; - -retry: - if (!ctx->is_active) { - __perf_event_mark_enabled(event); - goto out; - } - raw_spin_unlock_irq(&ctx->lock); - if (!task_function_call(task, __perf_event_enable, event)) - return; - - raw_spin_lock_irq(&ctx->lock); - - /* - * If the context is active and the event is still off, - * we need to retry the cross-call. - */ - if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) { - /* - * task could have been flipped by a concurrent - * perf_event_context_sched_out() - */ - task = ctx->task; - goto retry; - } - -out: - raw_spin_unlock_irq(&ctx->lock); + event_function_call(event, __perf_event_enable, + ___perf_event_enable, event); } /* @@ -3154,15 +3096,16 @@ static int event_enable_on_exec(struct perf_event *event, * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. */ -static void perf_event_enable_on_exec(struct perf_event_context *ctx) +static void perf_event_enable_on_exec(int ctxn) { - struct perf_event_context *clone_ctx = NULL; + struct perf_event_context *ctx, *clone_ctx = NULL; struct perf_event *event; unsigned long flags; int enabled = 0; int ret; local_irq_save(flags); + ctx = current->perf_event_ctxp[ctxn]; if (!ctx || !ctx->nr_events) goto out; @@ -3205,17 +3148,11 @@ out: void perf_event_exec(void) { - struct perf_event_context *ctx; int ctxn; rcu_read_lock(); - for_each_task_context_nr(ctxn) { - ctx = current->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - - perf_event_enable_on_exec(ctx); - } + for_each_task_context_nr(ctxn) + perf_event_enable_on_exec(ctxn); rcu_read_unlock(); } @@ -4154,6 +4091,22 @@ struct period_event { u64 value; }; +static void ___perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + u64 value = pe->value; + + if (event->attr.freq) { + event->attr.sample_freq = value; + } else { + event->attr.sample_period = value; + event->hw.sample_period = value; + } + + local64_set(&event->hw.period_left, 0); +} + static int __perf_event_period(void *info) { struct period_event *pe = info; @@ -4190,8 +4143,6 @@ static int __perf_event_period(void *info) static int perf_event_period(struct perf_event *event, u64 __user *arg) { struct period_event pe = { .event = event, }; - struct perf_event_context *ctx = event->ctx; - struct task_struct *task; u64 value; if (!is_sampling_event(event)) @@ -4206,34 +4157,10 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (event->attr.freq && value > sysctl_perf_event_sample_rate) return -EINVAL; - task = ctx->task; pe.value = value; - if (!task) { - cpu_function_call(event->cpu, __perf_event_period, &pe); - return 0; - } - -retry: - if (!task_function_call(task, __perf_event_period, &pe)) - return 0; - - raw_spin_lock_irq(&ctx->lock); - if (ctx->is_active) { - raw_spin_unlock_irq(&ctx->lock); - task = ctx->task; - goto retry; - } - - if (event->attr.freq) { - event->attr.sample_freq = value; - } else { - event->attr.sample_period = value; - event->hw.sample_period = value; - } - - local64_set(&event->hw.period_left, 0); - raw_spin_unlock_irq(&ctx->lock); + event_function_call(event, __perf_event_period, + ___perf_event_period, &pe); return 0; } @@ -6493,9 +6420,6 @@ struct swevent_htable { /* Recursion avoidance in each contexts */ int recursion[PERF_NR_CONTEXTS]; - - /* Keeps track of cpu being initialized/exited */ - bool online; }; static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); @@ -6753,14 +6677,8 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); head = find_swevent_head(swhash, event); - if (!head) { - /* - * We can race with cpu hotplug code. Do not - * WARN if the cpu just got unplugged. - */ - WARN_ON_ONCE(swhash->online); + if (WARN_ON_ONCE(!head)) return -EINVAL; - } hlist_add_head_rcu(&event->hlist_entry, head); perf_event_update_userpage(event); @@ -6828,7 +6746,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) int err = 0; mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; @@ -9291,7 +9208,6 @@ static void perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); - swhash->online = true; if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -9333,14 +9249,7 @@ static void perf_event_exit_cpu_context(int cpu) static void perf_event_exit_cpu(int cpu) { - struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - perf_event_exit_cpu_context(cpu); - - mutex_lock(&swhash->hlist_mutex); - swhash->online = false; - swevent_hlist_release(swhash); - mutex_unlock(&swhash->hlist_mutex); } #else static inline void perf_event_exit_cpu(int cpu) { } diff --git a/kernel/fork.c b/kernel/fork.c index fce002ee3ddf..291b08cc817b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -380,6 +380,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) #endif tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; + tsk->wake_q.next = NULL; account_kernel_stack(ti, 1); @@ -1348,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, prev_cputime_init(&p->prev_cputime); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqlock_init(&p->vtime_seqlock); + seqcount_init(&p->vtime_seqcount); p->vtime_snap = 0; - p->vtime_snap_whence = VTIME_SLEEPING; + p->vtime_snap_whence = VTIME_INACTIVE; #endif #if defined(SPLIT_RSS_COUNTING) diff --git a/kernel/futex.c b/kernel/futex.c index 684d7549825a..8a310e240cda 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void) } /* + * Drops a reference to the pi_state object and frees or caches it + * when the last reference is gone. + * * Must be called with the hb lock held. */ -static void free_pi_state(struct futex_pi_state *pi_state) +static void put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) return; @@ -1706,31 +1709,35 @@ retry_private: * exist yet, look it up one more time to ensure we have a * reference to it. If the lock was taken, ret contains the * vpid of the top waiter task. + * If the lock was not taken, we have pi_state and an initial + * refcount on it. In case of an error we have nothing. */ if (ret > 0) { WARN_ON(pi_state); drop_count++; task_count++; /* - * If we acquired the lock, then the user - * space value of uaddr2 should be vpid. It - * cannot be changed by the top waiter as it - * is blocked on hb2 lock if it tries to do - * so. If something fiddled with it behind our - * back the pi state lookup might unearth - * it. So we rather use the known value than - * rereading and handing potential crap to - * lookup_pi_state. + * If we acquired the lock, then the user space value + * of uaddr2 should be vpid. It cannot be changed by + * the top waiter as it is blocked on hb2 lock if it + * tries to do so. If something fiddled with it behind + * our back the pi state lookup might unearth it. So + * we rather use the known value than rereading and + * handing potential crap to lookup_pi_state. + * + * If that call succeeds then we have pi_state and an + * initial refcount on it. */ ret = lookup_pi_state(ret, hb2, &key2, &pi_state); } switch (ret) { case 0: + /* We hold a reference on the pi state. */ break; + + /* If the above failed, then pi_state is NULL */ case -EFAULT: - free_pi_state(pi_state); - pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1746,8 +1753,6 @@ retry_private: * exit to complete. * - The user space value changed. */ - free_pi_state(pi_state); - pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1801,30 +1806,58 @@ retry_private: * of requeue_pi if we couldn't acquire the lock atomically. */ if (requeue_pi) { - /* Prepare the waiter to take the rt_mutex. */ + /* + * Prepare the waiter to take the rt_mutex. Take a + * refcount on the pi_state and store the pointer in + * the futex_q object of the waiter. + */ atomic_inc(&pi_state->refcount); this->pi_state = pi_state; ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, this->rt_waiter, this->task); if (ret == 1) { - /* We got the lock. */ + /* + * We got the lock. We do neither drop the + * refcount on pi_state nor clear + * this->pi_state because the waiter needs the + * pi_state for cleaning up the user space + * value. It will drop the refcount after + * doing so. + */ requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; } else if (ret) { - /* -EDEADLK */ + /* + * rt_mutex_start_proxy_lock() detected a + * potential deadlock when we tried to queue + * that waiter. Drop the pi_state reference + * which we took above and remove the pointer + * to the state from the waiters futex_q + * object. + */ this->pi_state = NULL; - free_pi_state(pi_state); - goto out_unlock; + put_pi_state(pi_state); + /* + * We stop queueing more waiters and let user + * space deal with the mess. + */ + break; } } requeue_futex(this, hb1, hb2, &key2); drop_count++; } + /* + * We took an extra initial reference to the pi_state either + * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We + * need to drop it here again. + */ + put_pi_state(pi_state); + out_unlock: - free_pi_state(pi_state); double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); hb_waiters_dec(hb2); @@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q) __unqueue_futex(q); BUG_ON(!q->pi_state); - free_pi_state(q->pi_state); + put_pi_state(q->pi_state); q->pi_state = NULL; spin_unlock(q->lock_ptr); @@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user * |