summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/scheduler/schedutil.txt169
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/preempt.h48
-rw-r--r--arch/x86/include/asm/static_call.h7
-rw-r--r--arch/x86/kernel/static_call.c17
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--drivers/thermal/cpufreq_cooling.c69
-rw-r--r--include/asm-generic/vmlinux.lds.h5
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/entry-common.h4
-rw-r--r--include/linux/entry-kvm.h14
-rw-r--r--include/linux/kernel.h23
-rw-r--r--include/linux/rbtree.h206
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/sched.h34
-rw-r--r--include/linux/sched/prio.h18
-rw-r--r--include/linux/static_call.h77
-rw-r--r--include/linux/static_call_types.h50
-rw-r--r--include/linux/topology.h1
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/Kconfig.preempt19
-rw-r--r--kernel/entry/common.c17
-rw-r--r--kernel/events/core.c195
-rw-r--r--kernel/events/uprobes.c80
-rw-r--r--kernel/locking/rtmutex.c54
-rw-r--r--kernel/rcu/tree.c53
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_plugin.h31
-rw-r--r--kernel/sched/core.c357
-rw-r--r--kernel/sched/cpufreq_schedutil.c108
-rw-r--r--kernel/sched/deadline.c94
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c322
-rw-r--r--kernel/sched/features.h2
-rw-r--r--kernel/sched/idle.c1
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h51
-rw-r--r--kernel/sched/topology.c99
-rw-r--r--kernel/smp.c4
-rw-r--r--kernel/static_call.c60
-rw-r--r--lib/timerqueue.c28
-rw-r--r--tools/include/linux/rbtree.h192
-rw-r--r--tools/include/linux/static_call_types.h50
-rw-r--r--tools/objtool/check.c17
-rw-r--r--tools/objtool/elf.c73
48 files changed, 1898 insertions, 785 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 36d6ce7cc886..b93aaa374266 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3903,6 +3903,13 @@
Format: {"off"}
Disable Hardware Transactional Memory
+ preempt= [KNL]
+ Select preemption mode if you have CONFIG_PREEMPT_DYNAMIC
+ none - Limited to cond_resched() calls
+ voluntary - Limited to cond_resched() and might_sleep() calls
+ full - Any section that isn't explicitly preempt disabled
+ can be preempted anytime.
+
print-fatal-signals=
[KNL] debug: print fatal signals
diff --git a/Documentation/scheduler/schedutil.txt b/Documentation/scheduler/schedutil.txt
new file mode 100644
index 000000000000..78f6b91e2291
--- /dev/null
+++ b/Documentation/scheduler/schedutil.txt
@@ -0,0 +1,169 @@
+
+
+NOTE; all this assumes a linear relation between frequency and work capacity,
+we know this is flawed, but it is the best workable approximation.
+
+
+PELT (Per Entity Load Tracking)
+-------------------------------
+
+With PELT we track some metrics across the various scheduler entities, from
+individual tasks to task-group slices to CPU runqueues. As the basis for this
+we use an Exponentially Weighted Moving Average (EWMA), each period (1024us)
+is decayed such that y^32 = 0.5. That is, the most recent 32ms contribute
+half, while the rest of history contribute the other half.
+
+Specifically:
+
+ ewma_sum(u) := u_0 + u_1*y + u_2*y^2 + ...
+
+ ewma(u) = ewma_sum(u) / ewma_sum(1)
+
+Since this is essentially a progression of an infinite geometric series, the
+results are composable, that is ewma(A) + ewma(B) = ewma(A+B). This property
+is key, since it gives the ability to recompose the averages when tasks move
+around.
+
+Note that blocked tasks still contribute to the aggregates (task-group slices
+and CPU runqueues), which reflects their expected contribution when they
+resume running.
+
+Using this we track 2 key metrics: 'running' and 'runnable'. 'Running'
+reflects the time an entity spends on the CPU, while 'runnable' reflects the
+time an entity spends on the runqueue. When there is only a single task these
+two metrics are the same, but once there is contention for the CPU 'running'
+will decrease to reflect the fraction of time each task spends on the CPU
+while 'runnable' will increase to reflect the amount of contention.
+
+For more detail see: kernel/sched/pelt.c
+
+
+Frequency- / CPU Invariance
+---------------------------
+
+Because consuming the CPU for 50% at 1GHz is not the same as consuming the CPU
+for 50% at 2GHz, nor is running 50% on a LITTLE CPU the same as running 50% on
+a big CPU, we allow architectures to scale the time delta with two ratios, one
+Dynamic Voltage and Frequency Scaling (DVFS) ratio and one microarch ratio.
+
+For simple DVFS architectures (where software is in full control) we trivially
+compute the ratio as:
+
+ f_cur
+ r_dvfs := -----
+ f_max
+
+For more dynamic systems where the hardware is in control of DVFS we use
+hardware counters (Intel APERF/MPERF, ARMv8.4-AMU) to provide us this ratio.
+For Intel specifically, we use:
+
+ APERF
+ f_cur := ----- * P0
+ MPERF
+
+ 4C-turbo; if available and turbo enabled
+ f_max := { 1C-turbo; if turbo enabled
+ P0; otherwise
+
+ f_cur
+ r_dvfs := min( 1, ----- )
+ f_max
+
+We pick 4C turbo over 1C turbo to make it slightly more sustainable.
+
+r_cpu is determined as the ratio of highest performance level of the current
+CPU vs the highest performance level of any other CPU in the system.
+
+ r_tot = r_dvfs * r_cpu
+
+The result is that the above 'running' and 'runnable' metrics become invariant
+of DVFS and CPU type. IOW. we can transfer and compare them between CPUs.
+
+For more detail see:
+
+ - kernel/sched/pelt.h:update_rq_clock_pelt()
+ - arch/x86/kernel/smpboot.c:"APERF/MPERF frequency ratio computation."
+ - Documentation/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
+
+
+UTIL_EST / UTIL_EST_FASTUP
+--------------------------
+
+Because periodic tasks have their averages decayed while they sleep, even
+though when running their expected utilization will be the same, they suffer a
+(DVFS) ramp-up after they are running again.
+
+To alleviate this (a default enabled option) UTIL_EST drives an Infinite
+Impulse Response (IIR) EWMA with the 'running' value on dequeue -- when it is
+highest. A further default enabled option UTIL_EST_FASTUP modifies the IIR
+filter to instantly increase and only decay on decrease.
+
+A further runqueue wide sum (of runnable tasks) is maintained of:
+
+ util_est := \Sum_t max( t_running, t_util_est_ewma )
+
+For more detail see: kernel/sched/fair.c:util_est_dequeue()
+
+
+UCLAMP
+------
+
+It is possible to set effective u_min and u_max clamps on each CFS or RT task;
+the runqueue keeps an max aggregate of these clamps for all running tasks.
+
+For more detail see: include/uapi/linux/sched/types.h
+
+
+Schedutil / DVFS
+----------------
+
+Every time the scheduler load tracking is updated (task wakeup, task
+migration, time progression) we call out to schedutil to update the hardware
+DVFS state.
+
+The basis is the CPU runqueue's 'running' metric, which per the above it is
+the frequency invariant utilization estimate of the CPU. From this we compute
+a desired frequency like:
+
+ max( running, util_est ); if UTIL_EST
+ u_cfs := { running; otherwise
+
+ clamp( u_cfs + u_rt , u_min, u_max ); if UCLAMP_TASK
+ u_clamp := { u_cfs + u_rt; otherwise
+
+ u := u_clamp + u_irq + u_dl; [approx. see source for more detail]
+
+ f_des := min( f_max, 1.25 u * f_max )
+
+XXX IO-wait; when the update is due to a task wakeup from IO-completion we
+boost 'u' above.
+
+This frequency is then used to select a P-state/OPP or directly munged into a
+CPPC style request to the hardware.
+
+XXX: deadline tasks (Sporadic Task Model) allows us to calculate a hard f_min
+required to satisfy the workload.
+
+Because these callbacks are directly from the scheduler, the DVFS hardware
+interaction should be 'fast' and non-blocking. Schedutil supports
+rate-limiting DVFS requests for when hardware interaction is slow and
+expensive, this reduces effectiveness.
+
+For more information see: kernel/sched/cpufreq_schedutil.c
+
+
+NOTES
+-----
+
+ - On low-load scenarios, where DVFS is most relevant, the 'running' numbers
+ will closely reflect utilization.
+
+ - In saturated scenarios task movement will cause some transient dips,
+ suppose we have a CPU saturated with 4 tasks, then when we migrate a task
+ to an idle CPU, the old CPU will have a 'running' value of 0.75 while the
+ new CPU will gain 0.25. This is inevitable and time progression will
+ correct this. XXX do we still guarantee f_max due to no idle-time?
+
+ - Much of the above is about avoiding DVFS dips, and independent DVFS domains
+ having to re-learn / ramp-up when load shifts.
+
diff --git a/arch/Kconfig b/arch/Kconfig
index 87608c2fa027..4790a5f23d9f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1058,6 +1058,15 @@ config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
+config HAVE_PREEMPT_DYNAMIC
+ bool
+ depends on HAVE_STATIC_CALL
+ depends on GENERIC_ENTRY
+ help
+ Select this if the architecture support boot time preempt setting
+ on top of static calls. It is strongly advised to support inline
+ static call to avoid any overhead.
+
config ARCH_WANT_LD_ORPHAN_WARN
bool
help
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9d06fffb1526..369206489895 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -72,7 +72,7 @@ static struct timer_list spuloadavg_timer;
#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))
#define SCALE_PRIO(x, prio) \
- max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+ max(x * (MAX_PRIO - prio) / (NICE_WIDTH / 2), MIN_SPU_TIMESLICE)
/*
* scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b934a591df2..595193bc2d31 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -224,6 +224,7 @@ config X86
select HAVE_STACK_VALIDATION if X86_64
select HAVE_STATIC_CALL
select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
+ select HAVE_PREEMPT_DYNAMIC
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 69485ca13665..f8cb8af4de5c 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -5,6 +5,7 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
#include <linux/thread_info.h>
+#include <linux/static_call_types.h>
DECLARE_PER_CPU(int, __preempt_count);
@@ -103,16 +104,45 @@ static __always_inline bool should_resched(int preempt_offset)
}
#ifdef CONFIG_PREEMPTION
- extern asmlinkage void preempt_schedule_thunk(void);
-# define __preempt_schedule() \
- asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT)
- extern asmlinkage void preempt_schedule(void);
- extern asmlinkage void preempt_schedule_notrace_thunk(void);
-# define __preempt_schedule_notrace() \
- asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT)
+extern asmlinkage void preempt_schedule(void);
+extern asmlinkage void preempt_schedule_thunk(void);
- extern asmlinkage void preempt_schedule_notrace(void);
-#endif
+#define __preempt_schedule_func preempt_schedule_thunk
+
+extern asmlinkage void preempt_schedule_notrace(void);
+extern asmlinkage void preempt_schedule_notrace_thunk(void);
+
+#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
+
+#define __preempt_schedule() \
+do { \
+ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \
+ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
+
+#define __preempt_schedule_notrace() \
+do { \
+ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \
+ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+#else /* PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() \
+ asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT);
+
+#define __preempt_schedule_notrace() \
+ asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT);
+
+#endif /* PREEMPT_DYNAMIC */
+
+#endif /* PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index c37f11999d0c..cbb67b6030f9 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -37,4 +37,11 @@
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
+
+#define ARCH_ADD_TRAMP_KEY(name) \
+ asm(".pushsection .static_call_tramp_key, \"a\" \n" \
+ ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ca9a380d9c0b..9442c4136c38 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -11,14 +11,26 @@ enum insn_type {
RET = 3, /* tramp / site cond-tail-call */
};
+/*
+ * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
+ * The REX.W cancels the effect of any data16.
+ */
+static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
+
static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
{
+ const void *emulate = NULL;
int size = CALL_INSN_SIZE;
const void *code;
switch (type) {
case CALL:
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+ if (func == &__static_call_return0) {
+ emulate = code;
+ code = &xor5rax;
+ }
+
break;
case NOP:
@@ -41,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
if (unlikely(system_state == SYSTEM_BOOTING))
return text_poke_early(insn, code, size);
- text_poke_bp(insn, code, size, NULL);
+ text_poke_bp(insn, code, size, emulate);
}
static void __static_call_validate(void *insn, bool tail)
@@ -54,7 +66,8 @@ static void __static_call_validate(void *insn, bool tail)
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
- !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5))
+ !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
+ !memcmp(insn, xor5rax, 5))
return;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b404e4d7dd8..b967c1c774a1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1782,6 +1782,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
+ xfer_to_guest_mode_prepare();
return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending();
}
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 612f063c1cfc..f5af2571f9b7 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -76,7 +76,9 @@ struct cpufreq_cooling_device {
struct em_perf_domain *em;
struct cpufreq_policy *policy;
struct list_head node;
+#ifndef CONFIG_SMP
struct time_in_idle *idle_time;
+#endif
struct freq_qos_request qos_req;
};
@@ -132,14 +134,25 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
}
/**
- * get_load() - get load for a cpu since last updated
- * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
- * @cpu: cpu number
- * @cpu_idx: index of the cpu in time_in_idle*
+ * get_load() - get load for a cpu
+ * @cpufreq_cdev: struct cpufreq_cooling_device for the cpu
+ * @cpu: cpu number
+ * @cpu_idx: index of the cpu in time_in_idle array
*
* Return: The average load of cpu @cpu in percentage since this
* function was last called.
*/
+#ifdef CONFIG_SMP
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
+ int cpu_idx)
+{
+ unsigned long max = arch_scale_cpu_capacity(cpu);
+ unsigned long util;
+
+ util = sched_cpu_util(cpu, max);
+ return (util * 100) / max;
+}
+#else /* !CONFIG_SMP */
static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
int cpu_idx)
{
@@ -161,6 +174,7 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
return load;
}
+#endif /* CONFIG_SMP */
/**
* get_dynamic_power() - calculate the dynamic power
@@ -346,6 +360,36 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
}
#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
+#ifdef CONFIG_SMP
+static inline int allocate_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
+{
+ return 0;
+}
+
+static inline void free_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
+{
+}
+#else
+static int allocate_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
+{
+ unsigned int num_cpus = cpumask_weight(cpufreq_cdev->policy->related_cpus);
+
+ cpufreq_cdev->idle_time = kcalloc(num_cpus,
+ sizeof(*cpufreq_cdev->idle_time),
+ GFP_KERNEL);
+ if (!cpufreq_cdev->idle_time)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void free_idle_time(struct cpufreq_cooling_device *cpufreq_cdev)
+{
+ kfree(cpufreq_cdev->idle_time);
+ cpufreq_cdev->idle_time = NULL;
+}
+#endif /* CONFIG_SMP */
+
static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
unsigned long state)
{
@@ -485,7 +529,7 @@ __cpufreq_cooling_register(struct device_node *np,
struct thermal_cooling_device *cdev;
struct cpufreq_cooling_device *cpufreq_cdev;
char dev_name[THERMAL_NAME_LENGTH];
- unsigned int i, num_cpus;
+ unsigned int i;
struct device *dev;
int ret;
struct thermal_cooling_device_ops *cooling_ops;
@@ -496,7 +540,6 @@ __cpufreq_cooling_register(struct device_node *np,
return ERR_PTR(-ENODEV);
}
-
if (IS_ERR_OR_NULL(policy)) {
pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
return ERR_PTR(-EINVAL);
@@ -514,12 +557,10 @@ __cpufreq_cooling_register(struct device_node *np,
return ERR_PTR(-ENOMEM);
cpufreq_cdev->policy = policy;
- num_cpus = cpumask_weight(policy->related_cpus);
- cpufreq_cdev->idle_time = kcalloc(num_cpus,
- sizeof(*cpufreq_cdev->idle_time),
- GFP_KERNEL);
- if (!cpufreq_cdev->idle_time) {
- cdev = ERR_PTR(-ENOMEM);
+
+ ret = allocate_idle_time(cpufreq_cdev);
+ if (ret) {
+ cdev = ERR_PTR(ret);
goto free_cdev;
}
@@ -579,7 +620,7 @@ remove_qos_req:
remove_ida:
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_idle_time:
- kfree(cpufreq_cdev->idle_time);
+ free_idle_time(cpufreq_cdev);
free_cdev:
kfree(cpufreq_cdev);
return cdev;
@@ -672,7 +713,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
thermal_cooling_device_unregister(cdev);
freq_qos_remove_request(&cpufreq_cdev->qos_req);
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
- kfree(cpufreq_cdev->idle_time);
+ free_idle_time(cpufreq_cdev);
kfree(cpufreq_cdev);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 52dbd58f6810..a54e08d77789 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -403,7 +403,10 @@
. = ALIGN(8); \
__start_static_call_sites = .; \
KEEP(*(.static_call_sites)) \
- __stop_static_call_sites = .;
+ __stop_static_call_sites = .; \
+ __start_static_call_tramp_key = .; \
+ KEEP(*(.static_call_tramp_key)) \
+ __stop_static_call_tramp_key = .;
/*
* Allow architectures to handle ro_after_init data on their
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 451c2d26a5db..4f2f79de083e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -307,7 +307,7 @@ void css_task_iter_end(struct css_task_iter *it);
* Inline functions.
*/
-static inline u64 cgroup_id(struct cgroup *cgrp)
+static inline u64 cgroup_id(const struct cgroup *cgrp)
{
return cgrp->kn->id;
}
@@ -701,7 +701,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
struct cgroup_subsys_state;
struct cgroup;
-static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; }
+static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index a104b298019a..883acef895bc 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -2,6 +2,7 @@
#ifndef __LINUX_ENTRYCOMMON_H
#define __LINUX_ENTRYCOMMON_H
+#include <linux/static_call_types.h>
#include <linux/tracehook.h>
#include <linux/syscalls.h>
#include <linux/seccomp.h>
@@ -454,6 +455,9 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
* Conditional reschedule with additional sanity checks.
*/
void irqentry_exit_cond_resched(void);
+#ifdef CONFIG_PREEMPT_DYNAMIC
+DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
+#endif
/**
* irqentry_exit - Handle return from exception that used irqentry_enter()
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 9b93f8584ff7..8b2b1d68b954 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -47,6 +47,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
/**
+ * xfer_to_guest_mode_prepare - Perform last minute preparation work that
+ * need to be handled while IRQs are disabled
+ * upon entering to guest.
+ *
+ * Has to be invoked with interrupts disabled before the last call
+ * to xfer_to_guest_mode_work_pending().
+ */
+static inline void xfer_to_guest_mode_prepare(void)
+{
+ lockdep_assert_irqs_disabled();
+ rcu_nocb_flush_deferred_wakeup();
+}
+
+/**
* __xfer_to_guest_mode_work_pending - Check if work is pending
*
* Returns: True if work pending, False otherwise.
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f7902d8c1048..5b7ed6dc99ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,7 +15,7 @@
#include <linux/typecheck.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
-
+#include <linux/static_call_types.h>
#include <asm/byteorder.h>
#include <uapi/linux/kernel.h>
@@ -81,11 +81,26 @@ struct pt_regs;
struct user;
#ifdef CONFIG_PREEMPT_VOLUNTARY
-extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+
+extern int __cond_resched(void);
+# define might_resched() __cond_resched()
+
+#elif defined(CONFIG_PREEMPT_DYNAMIC)
+
+extern int __cond_resched(void);
+
+DECLARE_STATIC_CALL(might_resched, __cond_resched);
+
+static __always_inline void might_resched(void)
+{
+ static_call_mod(might_resched)();
+}
+