summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c4
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/cgroup/cpuset.c15
-rw-r--r--kernel/events/core.c492
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/irq/Kconfig6
-rw-r--r--kernel/irq/Makefile1
-rw-r--r--kernel/irq/autoprobe.c2
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/debugfs.c12
-rw-r--r--kernel/irq/internals.h19
-rw-r--r--kernel/irq/irqdesc.c9
-rw-r--r--kernel/irq/irqdomain.c60
-rw-r--r--kernel/irq/manage.c23
-rw-r--r--kernel/irq/matrix.c443
-rw-r--r--kernel/irq/msi.c32
-rw-r--r--kernel/irq/proc.c5
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/irq/timings.c2
-rw-r--r--kernel/irq_work.c21
-rw-r--r--kernel/jump_label.c14
-rw-r--r--kernel/kexec_file.c5
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/kthread.c10
-rw-r--r--kernel/locking/lockdep.c23
-rw-r--r--kernel/locking/qrwlock.c86
-rw-r--r--kernel/locking/qspinlock_paravirt.h47
-rw-r--r--kernel/locking/rwsem.c16
-rw-r--r--kernel/locking/spinlock.c9
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/rcu/rcu.h21
-rw-r--r--kernel/rcu/rcu_segcblist.c1
-rw-r--r--kernel/rcu/rcutorture.c28
-rw-r--r--kernel/rcu/tree.c175
-rw-r--r--kernel/rcu/tree.h5
-rw-r--r--kernel/rcu/tree_plugin.h36
-rw-r--r--kernel/rcu/update.c28
-rw-r--r--kernel/resource.c76
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/clock.c2
-rw-r--r--kernel/sched/core.c61
-rw-r--r--kernel/sched/cputime.c3
-rw-r--r--kernel/sched/deadline.c21
-rw-r--r--kernel/sched/debug.c18
-rw-r--r--kernel/sched/fair.c1049
-rw-r--r--kernel/sched/idle.c4
-rw-r--r--kernel/sched/isolation.c155
-rw-r--r--kernel/sched/rt.c316
-rw-r--r--kernel/sched/sched.h73
-rw-r--r--kernel/sched/topology.c49
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c10
-rw-r--r--kernel/task_work.c2
-rw-r--r--kernel/test_kprobes.c29
-rw-r--r--kernel/time/Kconfig2
-rw-r--r--kernel/time/clockevents.c21
-rw-r--r--kernel/time/hrtimer.c4
-rw-r--r--kernel/time/ntp.c227
-rw-r--r--kernel/time/ntp_internal.h1
-rw-r--r--kernel/time/posix-cpu-timers.c6
-rw-r--r--kernel/time/posix-stubs.c20
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c38
-rw-r--r--kernel/time/time.c71
-rw-r--r--kernel/time/timekeeping.c182
-rw-r--r--kernel/time/timekeeping.h2
-rw-r--r--kernel/time/timer.c82
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_output.c12
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--kernel/watchdog.c13
-rw-r--r--kernel/workqueue.c54
79 files changed, 2776 insertions, 1574 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 6670fbd3e466..d15c0ee4d955 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -147,7 +147,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
again:
smp_rmb();
rcu_read_lock();
- res = to_acct(ACCESS_ONCE(ns->bacct));
+ res = to_acct(READ_ONCE(ns->bacct));
if (!res) {
rcu_read_unlock();
return NULL;
@@ -159,7 +159,7 @@ again:
}
rcu_read_unlock();
mutex_lock(&res->lock);
- if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
+ if (res != to_acct(READ_ONCE(ns->bacct))) {
mutex_unlock(&res->lock);
acct_put(res);
goto again;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e2636737b69b..c4b9ab01bba5 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -492,7 +492,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
ee = ERR_PTR(-EOPNOTSUPP);
event = perf_file->private_data;
- if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
+ if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
goto err_out;
ee = bpf_event_entry_gen(perf_file, map_file);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4657e2924ecb..f7efa7b4d825 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -57,7 +57,7 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h>
-
+#include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
@@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
- cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
@@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
dattr = NULL;
csa = NULL;
- if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
- goto done;
- cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
@@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_and(doms[0], top_cpuset.effective_cpus,
- non_isolated_cpus);
+ housekeeping_cpumask(HK_FLAG_DOMAIN));
goto done;
}
@@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
*/
if (!cpumask_empty(cp->cpus_allowed) &&
!(is_sched_load_balance(cp) &&
- cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
+ cpumask_intersects(cp->cpus_allowed,
+ housekeeping_cpumask(HK_FLAG_DOMAIN))))
continue;
if (is_sched_load_balance(cp))
@@ -789,7 +785,7 @@ restart:
if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
- cpumask_and(dp, dp, non_isolated_cpus);
+ cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -802,7 +798,6 @@ restart:
BUG_ON(nslot != ndoms);
done:
- free_cpumask_var(non_isolated_cpus);
kfree(csa);
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 10cdb9c26b5d..4c39c05e029a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -209,7 +209,7 @@ static int event_function(void *info)
struct perf_event_context *task_ctx = cpuctx->task_ctx;
int ret = 0;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
perf_ctx_lock(cpuctx, task_ctx);
/*
@@ -306,7 +306,7 @@ static void event_function_local(struct perf_event *event, event_f func, void *d
struct task_struct *task = READ_ONCE(ctx->task);
struct perf_event_context *task_ctx = NULL;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
if (task) {
if (task == TASK_TOMBSTONE)
@@ -582,6 +582,88 @@ static inline u64 perf_event_clock(struct perf_event *event)
return event->clock();
}
+/*
+ * State based event timekeeping...
+ *
+ * The basic idea is to use event->state to determine which (if any) time
+ * fields to increment with the current delta. This means we only need to
+ * update timestamps when we change state or when they are explicitly requested
+ * (read).
+ *
+ * Event groups make things a little more complicated, but not terribly so. The
+ * rules for a group are that if the group leader is OFF the entire group is
+ * OFF, irrespecive of what the group member states are. This results in
+ * __perf_effective_state().
+ *
+ * A futher ramification is that when a group leader flips between OFF and
+ * !OFF, we need to update all group member times.
+ *
+ *
+ * NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
+ * need to make sure the relevant context time is updated before we try and
+ * update our timestamps.
+ */
+
+static __always_inline enum perf_event_state
+__perf_effective_state(struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+
+ if (leader->state <= PERF_EVENT_STATE_OFF)
+ return leader->state;
+
+ return event->state;
+}
+
+static __always_inline void
+__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
+{
+ enum perf_event_state state = __perf_effective_state(event);
+ u64 delta = now - event->tstamp;
+
+ *enabled = event->total_time_enabled;
+ if (state >= PERF_EVENT_STATE_INACTIVE)
+ *enabled += delta;
+
+ *running = event->total_time_running;
+ if (state >= PERF_EVENT_STATE_ACTIVE)
+ *running += delta;
+}
+
+static void perf_event_update_time(struct perf_event *event)
+{
+ u64 now = perf_event_time(event);
+
+ __perf_update_times(event, now, &event->total_time_enabled,
+ &event->total_time_running);
+ event->tstamp = now;
+}
+
+static void perf_event_update_sibling_time(struct perf_event *leader)
+{
+ struct perf_event *sibling;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry)
+ perf_event_update_time(sibling);
+}
+
+static void
+perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+{
+ if (event->state == state)
+ return;
+
+ perf_event_update_time(event);
+ /*
+ * If a group leader gets enabled/disabled all its siblings
+ * are affected too.
+ */
+ if ((event->state < 0) ^ (state < 0))
+ perf_event_update_sibling_time(event);
+
+ WRITE_ONCE(event->state, state);
+}
+
#ifdef CONFIG_CGROUP_PERF
static inline bool
@@ -841,40 +923,6 @@ perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
event->shadow_ctx_time = now - t->timestamp;
}
-static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
- /*
- * when the current task's perf cgroup does not match
- * the event's, we need to remember to call the
- * perf_mark_enable() function the first time a task with
- * a matching perf cgroup is scheduled in.
- */
- if (is_cgroup_event(event) && !perf_cgroup_match(event))
- event->cgrp_defer_enabled = 1;
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- struct perf_event *sub;
- u64 tstamp = perf_event_time(event);
-
- if (!event->cgrp_defer_enabled)
- return;
-
- event->cgrp_defer_enabled = 0;
-
- event->tstamp_enabled = tstamp - event->total_time_enabled;
- list_for_each_entry(sub, &event->sibling_list, group_entry) {
- if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- sub->tstamp_enabled = tstamp - sub->total_time_enabled;
- sub->cgrp_defer_enabled = 0;
- }
- }
-}
-
/*
* Update cpuctx->cgrp so that it is set when first cgroup event is added and
* cleared when last cgroup event is removed.
@@ -975,17 +1023,6 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
}
static inline void
-perf_cgroup_defer_enabled(struct perf_event *event)
-{
-}
-
-static inline void
-perf_cgroup_mark_enabled(struct perf_event *event,
- struct perf_event_context *ctx)
-{
-}
-
-static inline void
list_update_cgroup_event(struct perf_event *event,
struct perf_event_context *ctx, bool add)
{
@@ -1006,7 +1043,7 @@ static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
struct perf_cpu_context *cpuctx;
int rotations = 0;
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
rotations = perf_rotate_context(cpuctx);
@@ -1093,7 +1130,7 @@ static void perf_event_ctx_activate(struct perf_event_context *ctx)
{
struct list_head *head = this_cpu_ptr(&active_ctx_list);
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
WARN_ON(!list_empty(&ctx->active_ctx_list));
@@ -1102,7 +1139,7 @@ static void perf_event_ctx_activate(struct perf_event_context *ctx)
static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
{
- WARN_ON(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
WARN_ON(list_empty(&ctx->active_ctx_list));
@@ -1202,7 +1239,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
again:
rcu_read_lock();
- ctx = ACCESS_ONCE(event->ctx);
+ ctx = READ_ONCE(event->ctx);
if (!atomic_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
@@ -1398,60 +1435,6 @@ static u64 perf_event_time(struct perf_event *event)
return ctx ? ctx->time : 0;
}
-/*
- * Update the total_time_enabled and total_time_running fields for a event.
- */
-static void update_event_times(struct perf_event *event)
-{
- struct perf_event_context *ctx = event->ctx;
- u64 run_end;
-
- lockdep_assert_held(&ctx->lock);
-
- if (event->state < PERF_EVENT_STATE_INACTIVE ||
- event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
- return;
-
- /*
- * in cgroup mode, time_enabled represents
- * the time the event was enabled AND active
- * tasks were in the monitored cgroup. This is
- * independent of the activity of the context as
- * there may be a mix of cgroup and non-cgroup events.
- *
- * That is why we treat cgroup events differently
- * here.
- */
- if (is_cgroup_event(event))
- run_end = perf_cgroup_event_time(event);
- else if (ctx->is_active)
- run_end = ctx->time;
- else
- run_end = event->tstamp_stopped;
-
- event->total_time_enabled = run_end - event->tstamp_enabled;
-
- if (event->state == PERF_EVENT_STATE_INACTIVE)
- run_end = event->tstamp_stopped;
- else
- run_end = perf_event_time(event);
-
- event->total_time_running = run_end - event->tstamp_running;
-
-}
-
-/*
- * Update total_time_enabled and total_time_running for all events in a group.
- */
-static void update_group_times(struct perf_event *leader)
-{
- struct perf_event *event;
-
- update_event_times(leader);
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- update_event_times(event);
-}
-
static enum event_type_t get_event_type(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
@@ -1494,6 +1477,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
event->attach_state |= PERF_ATTACH_CONTEXT;
+ event->tstamp = perf_event_time(event);
+
/*
* If we're a stand alone event or group leader, we go to the context
* list, group events are kept attached to the group so that
@@ -1701,8 +1686,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
if (event->group_leader == event)
list_del_init(&event->group_entry);
- update_group_times(event);
-
/*
* If event was in error state, then keep it
* that way, otherwise bogus counts will be
@@ -1711,7 +1694,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
* of the event
*/
if (event->state > PERF_EVENT_STATE_OFF)
- event->state = PERF_EVENT_STATE_OFF;
+ perf_event_set_state(event, PERF_EVENT_STATE_OFF);
ctx->generation++;
}
@@ -1810,38 +1793,24 @@ event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
- u64 tstamp = perf_event_time(event);
- u64 delta;
+ enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
WARN_ON_ONCE(event->ctx != ctx);
lockdep_assert_held(&ctx->lock);
- /*
- * An event which could not be activated because of
- * filter mismatch still needs to have its timings
- * maintained, otherwise bogus information is return
- * via read() for time_enabled, time_running:
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE &&
- !event_filter_match(event)) {
- delta = tstamp - event->tstamp_stopped;
- event->tstamp_running += delta;
- event->tstamp_stopped = tstamp;
- }
-
if (event->state != PERF_EVENT_STATE_ACTIVE)
return;
perf_pmu_disable(event->pmu);
- event->tstamp_stopped = tstamp;
event->pmu->del(event, 0);
event->oncpu = -1;
- event->state = PERF_EVENT_STATE_INACTIVE;
+
if (event->pending_disable) {
event->pending_disable = 0;
- event->state = PERF_EVENT_STATE_OFF;
+ state = PERF_EVENT_STATE_OFF;
}
+ perf_event_set_state(event, state);
if (!is_software_event(event))
cpuctx->active_oncpu--;
@@ -1861,7 +1830,9 @@ group_sched_out(struct perf_event *group_event,
struct perf_event_context *ctx)
{
struct perf_event *event;
- int state = group_event->state;
+
+ if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+ return;
perf_pmu_disable(ctx->pmu);
@@ -1875,7 +1846,7 @@ group_sched_out(struct perf_event *group_event,
perf_pmu_enable(ctx->pmu);
- if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
+ if (group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
@@ -1895,6 +1866,11 @@ __perf_remove_from_context(struct perf_event *event,
{
unsigned long flags = (unsigned long)info;
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+ update_cgrp_time_from_cpuctx(cpuctx);
+ }
+
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
@@ -1957,14 +1933,17 @@ static void __perf_event_disable(struct perf_event *event,
if (event->state < PERF_EVENT_STATE_INACTIVE)
return;
- update_context_time(ctx);
- update_cgrp_time_from_event(event);
- update_group_times(event);
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+ update_cgrp_time_from_event(event);
+ }
+
if (event == event->group_leader)
group_sched_out(event, cpuctx, ctx);
else
event_sched_out(event, cpuctx, ctx);
- event->state = PERF_EVENT_STATE_OFF;
+
+ perf_event_set_state(event, PERF_EVENT_STATE_OFF);
}
/*
@@ -2021,8 +2000,7 @@ void perf_event_disable_inatomic(struct perf_event *event)
}
static void perf_set_shadow_time(struct perf_event *event,
- struct perf_event_context *ctx,
- u64 tstamp)
+ struct perf_event_context *ctx)
{
/*
* use the correct time source for the time snapshot
@@ -2050,9 +2028,9 @@ static void perf_set_shadow_time(struct perf_event *event,
* is cleaner and simpler to understand.
*/
if (is_cgroup_event(event))</