diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 13:37:52 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 13:37:52 -0800 |
| commit | 3e2014637c50e5d6a77cd63d5db6c209fe29d1b1 (patch) | |
| tree | a672ed603262aeddda4490056b27b09791d0cbbb | |
| parent | f2be8bd52e7410c70145f73511a2e80f4797e1a5 (diff) | |
| parent | 765cc3a4b224e22bf524fabe40284a524f37cdd0 (diff) | |
| download | linux-3e2014637c50e5d6a77cd63d5db6c209fe29d1b1.tar.gz linux-3e2014637c50e5d6a77cd63d5db6c209fe29d1b1.tar.bz2 linux-3e2014637c50e5d6a77cd63d5db6c209fe29d1b1.zip | |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main updates in this cycle were:
- Group balancing enhancements and cleanups (Brendan Jackman)
- Move CPU isolation related functionality into its separate
kernel/sched/isolation.c file, with related 'housekeeping_*()'
namespace and nomenclature et al. (Frederic Weisbecker)
- Improve the interactive/cpu-intense fairness calculation (Josef
Bacik)
- Improve the PELT code and related cleanups (Peter Zijlstra)
- Improve the logic of pick_next_task_fair() (Uladzislau Rezki)
- Improve the RT IPI based balancing logic (Steven Rostedt)
- Various micro-optimizations:
- better !CONFIG_SCHED_DEBUG optimizations (Patrick Bellasi)
- better idle loop (Cheng Jian)
- ... plus misc fixes, cleanups and updates"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits)
sched/core: Optimize sched_feat() for !CONFIG_SCHED_DEBUG builds
sched/sysctl: Fix attributes of some extern declarations
sched/isolation: Document isolcpus= boot parameter flags, mark it deprecated
sched/isolation: Add basic isolcpus flags
sched/isolation: Move isolcpus= handling to the housekeeping code
sched/isolation: Handle the nohz_full= parameter
sched/isolation: Introduce housekeeping flags
sched/isolation: Split out new CONFIG_CPU_ISOLATION=y config from CONFIG_NO_HZ_FULL
sched/isolation: Rename is_housekeeping_cpu() to housekeeping_cpu()
sched/isolation: Use its own static key
sched/isolation: Make the housekeeping cpumask private
sched/isolation: Provide a dynamic off-case to housekeeping_any_cpu()
sched/isolation, watchdog: Use housekeeping_cpumask() instead of ad-hoc version
sched/isolation: Move housekeeping related code to its own file
sched/idle: Micro-optimize the idle loop
sched/isolcpus: Fix "isolcpus=" boot parameter handling when !CONFIG_CPUMASK_OFFSTACK
x86/tsc: Append the 'tsc=' description for the 'tsc=unstable' boot parameter
sched/rt: Simplify the IPI based RT balancing logic
block/ioprio: Use a helper to check for RT prio
sched/rt: Add a helper to test for a RT task
...
31 files changed, 1270 insertions, 774 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 116e798b61e6..38ed8787261b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1730,20 +1730,33 @@ isapnp= [ISAPNP] Format: <RDP>,<reset>,<pci_scan>,<verbosity> - isolcpus= [KNL,SMP] Isolate CPUs from the general scheduler. - The argument is a cpu list, as described above. + isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. + [Deprecated - use cpusets instead] + Format: [flag-list,]<cpu-list> + + Specify one or more CPUs to isolate from disturbances + specified in the flag list (default: domain): + + nohz + Disable the tick when a single task runs. + domain + Isolate from the general SMP balancing and scheduling + algorithms. Note that performing domain isolation this way + is irreversible: it's not possible to bring back a CPU to + the domains once isolated through isolcpus. It's strongly + advised to use cpusets instead to disable scheduler load + balancing through the "cpuset.sched_load_balance" file. + It offers a much more flexible interface where CPUs can + move in and out of an isolated set anytime. + + You can move a process onto or off an "isolated" CPU via + the CPU affinity syscalls or cpuset. + <cpu number> begins at 0 and the maximum value is + "number of CPUs in system - 1". + + The format of <cpu-list> is described above. - This option can be used to specify one or more CPUs - to isolate from the general SMP balancing and scheduling - algorithms. You can move a process onto or off an - "isolated" CPU via the CPU affinity syscalls or cpuset. - <cpu number> begins at 0 and the maximum value is - "number of CPUs in system - 1". - This option is the preferred way to isolate CPUs. The - alternative -- manually setting the CPU mask of all - tasks in the system -- can cause problems and - suboptimal load balancer performance. iucv= [HW,NET] @@ -4209,6 +4222,9 @@ Used to run time disable IRQ_TIME_ACCOUNTING on any platforms where RDTSC is slow and this accounting can add overhead. + [x86] unstable: mark the TSC clocksource as unstable, this + marks the TSC unconditionally unstable at bootup and + avoids any further wobbles once the TSC watchdog notices. turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b4d817..a73ab95558f5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -18,6 +18,7 @@ #include <linux/cpufeature.h> #include <linux/tick.h> #include <linux/pm_qos.h> +#include <linux/sched/isolation.h> #include "base.h" @@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { int n = 0, len = PAGE_SIZE-2; + cpumask_var_t isolated; - n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map)); + if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) + return -ENOMEM; + + cpumask_andnot(isolated, cpu_possible_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); + + free_cpumask_var(isolated); return n; } diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index c00102b8145a..b3e5816a4678 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -40,7 +40,7 @@ #include <linux/tcp.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> -#include <linux/tick.h> +#include <linux/sched/isolation.h> #include <asm/checksum.h> #include <asm/homecache.h> @@ -2270,8 +2270,8 @@ static int __init tile_net_init_module(void) tile_net_dev_init(name, mac); if (!network_cpus_init()) - cpumask_and(&network_cpus_map, housekeeping_cpumask(), - cpu_online_mask); + cpumask_and(&network_cpus_map, + housekeeping_cpumask(HK_FLAG_MISC), cpu_online_mask); return 0; } diff --git a/fs/proc/array.c b/fs/proc/array.c index d82549e80402..6f6fc1672ad1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -138,7 +138,7 @@ static const char * const task_state_array[] = { static inline const char *get_task_state(struct task_struct *tsk) { BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); - return task_state_array[__get_task_state(tsk)]; + return task_state_array[task_state_index(tsk)]; } static inline int get_task_umask(struct task_struct *tsk) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8d3125c493b2..75b565194437 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -131,6 +131,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return 0; +} + /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { @@ -179,6 +184,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_last - get the last CPU in a cpumask + * @srcp: - the cpumask pointer + * + * Returns >= nr_cpumask_bits if no CPUs set. + */ +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + unsigned int cpumask_next(int n, const struct cpumask *srcp); /** diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 2cdd74809899..627efac73e6d 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -3,6 +3,7 @@ #define IOPRIO_H #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/iocontext.h> /* @@ -63,7 +64,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR) + else if (task_is_realtime(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/sched.h b/include/linux/sched.h index fdf74f27acf1..a5dc7c98b0a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -166,8 +166,6 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX @@ -332,9 +330,11 @@ struct load_weight { struct sched_avg { u64 last_update_time; u64 load_sum; + u64 runnable_load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; + unsigned long runnable_load_avg; unsigned long util_avg; }; @@ -377,6 +377,7 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; + unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; @@ -472,10 +473,10 @@ struct sched_dl_entity { * conditions between the inactive timer handler and the wakeup * code. */ - int dl_throttled; - int dl_boosted; - int dl_yielded; - int dl_non_contending; + int dl_throttled : 1; + int dl_boosted : 1; + int dl_yielded : 1; + int dl_non_contending : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -1246,7 +1247,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int __get_task_state(struct task_struct *tsk) +static inline unsigned int task_state_index(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; @@ -1259,7 +1260,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) return fls(state); } -static inline char __task_state_to_char(unsigned int state) +static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1270,7 +1271,7 @@ static inline char __task_state_to_char(unsigned int state) static inline char task_state_to_char(struct task_struct *tsk) { - return __task_state_to_char(__get_task_state(tsk)); + return task_index_to_char(task_state_index(tsk)); } /** diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h new file mode 100644 index 000000000000..d849431c8060 --- /dev/null +++ b/include/linux/sched/isolation.h @@ -0,0 +1,51 @@ +#ifndef _LINUX_SCHED_ISOLATION_H +#define _LINUX_SCHED_ISOLATION_H + +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/tick.h> + +enum hk_flags { + HK_FLAG_TIMER = 1, + HK_FLAG_RCU = (1 << 1), + HK_FLAG_MISC = (1 << 2), + HK_FLAG_SCHED = (1 << 3), + HK_FLAG_TICK = (1 << 4), + HK_FLAG_DOMAIN = (1 << 5), +}; + +#ifdef CONFIG_CPU_ISOLATION +DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); +extern int housekeeping_any_cpu(enum hk_flags flags); +extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); +extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); +extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); +extern void __init housekeeping_init(void); + +#else + +static inline int housekeeping_any_cpu(enum hk_flags flags) +{ + return smp_processor_id(); +} + +static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) +{ + return cpu_possible_mask; +} + +static inline void housekeeping_affine(struct task_struct *t, + enum hk_flags flags) { } +static inline void housekeeping_init(void) { } +#endif /* CONFIG_CPU_ISOLATION */ + +static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) +{ +#ifdef CONFIG_CPU_ISOLATION + if (static_branch_unlikely(&housekeeping_overriden)) + return housekeeping_test_cpu(cpu, flags); +#endif + return true; +} + +#endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index db865ed25ef3..e5af028c08b4 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,6 +18,17 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } +static inline bool task_is_realtime(struct task_struct *tsk) +{ + int policy = tsk->policy; + + if (policy == SCHED_FIFO || policy == SCHED_RR) + return true; + if (policy == SCHED_DEADLINE) + return true; + return false; +} + #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d6a18a3839cc..1c1a1512ec55 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -38,9 +38,9 @@ extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; +extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_nr_migrate; +extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, diff --git a/include/linux/tick.h b/include/linux/tick.h index cf413b344ddb..f442d1a42025 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -138,7 +138,6 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; extern cpumask_var_t tick_nohz_full_mask; -extern cpumask_var_t housekeeping_mask; static inline bool tick_nohz_full_enabled(void) { @@ -162,11 +161,6 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -235,11 +229,8 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); +extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else -static inline int housekeeping_any_cpu(void) -{ - return smp_processor_id(); -} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } @@ -259,35 +250,9 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } +static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { } #endif -static inline const struct cpumask *housekeeping_cpumask(void) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif - return cpu_possible_mask; -} - -static inline bool is_housekeeping_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); -#endif - return true; -} - -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index da10aa21bebc..306b31de5194 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -118,7 +118,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * if (preempt) return TASK_STATE_MAX; - return __get_task_state(p); + return task_state_index(p); } #endif /* CREATE_TRACE_POINTS */ diff --git a/init/Kconfig b/init/Kconfig index 3c1faaa2af4a..c1fd2863d4ba 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -472,6 +472,13 @@ config TASK_IO_ACCOUNTING endmenu # "CPU/Task time and stats accounting" +config CPU_ISOLATION + bool "CPU isolation" + help + Make sure that CPUs running critical tasks are not disturbed by + any source of "noise" such as unbound workqueues, timers, kthreads... + Unbound jobs get offloaded to housekeeping CPUs. + source "kernel/rcu/Kconfig" config BUILD_BIN2C diff --git a/init/main.c b/init/main.c index 0ee9c6866ada..4610c99ae306 100644 --- a/init/main.c +++ b/init/main.c @@ -46,6 +46,7 @@ #include <linux/cgroup.h> #include <linux/efi.h> #include <linux/tick.h> +#include <linux/sched/isolation.h> #include <linux/interrupt.h> #include <linux/taskstats_kern.h> #include <linux/delayacct.h> @@ -606,6 +607,7 @@ asmlinkage __visible void __init start_kernel(void) early_irq_init(); init_IRQ(); tick_init(); + housekeeping_init(); rcu_init_nohz(); init_timers(); hrtimers_init(); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4657e2924ecb..f7efa7b4d825 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -57,7 +57,7 @@ #include <linux/backing-dev.h> #include <linux/sort.h> #include <linux/oom.h> - +#include <linux/sched/isolation.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <linux/mutex.h> @@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains, int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ - cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains, dattr = NULL; csa = NULL; - if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) - goto done; - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { ndoms = 1; @@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains, update_domain_attr_tree(dattr, &top_cpuset); } cpumask_and(doms[0], top_cpuset.effective_cpus, - non_isolated_cpus); + housekeeping_cpumask(HK_FLAG_DOMAIN)); goto done; } @@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains, */ if (!cpumask_empty(cp->cpus_allowed) && !(is_sched_load_balance(cp) && - cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) + cpumask_intersects(cp->cpus_allowed, + housekeeping_cpumask(HK_FLAG_DOMAIN)))) continue; if (is_sched_load_balance(cp)) @@ -789,7 +785,7 @@ restart: if (apn == b->pn) { cpumask_or(dp, dp, b->effective_cpus); - cpumask_and(dp, dp, non_isolated_cpus); + cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN)); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -802,7 +798,6 @@ restart: BUG_ON(nslot != ndoms); done: - free_cpumask_var(non_isolated_cpus); kfree(csa); /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index dd4d0d390e5b..910405dc6e5c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -29,6 +29,7 @@ #include <linux/oom.h> #include <linux/sched/debug.h> #include <linux/smpboot.h> +#include <linux/sched/isolation.h> #include <uapi/linux/sched/types.h> #include "../time/tick-internal.h" @@ -2587,7 +2588,7 @@ static void rcu_bind_gp_kthread(void) if (!tick_nohz_full_enabled()) return; - housekeeping_affine(current); + housekeeping_affine(current, HK_FLAG_RCU); } /* Record the current task on dyntick-idle entry. */ diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 27694561f769..fbd56d6e575b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -51,6 +51,7 @@ #include <linux/kthread.h> #include <linux/tick.h> #include <linux/rcupdate_wait.h> +#include <linux/sched/isolation.h> #define CREATE_TRACE_POINTS @@ -714,7 +715,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) LIST_HEAD(rcu_tasks_holdouts); /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ - housekeeping_affine(current); + housekeeping_affine(current, HK_FLAG_RCU); /* * Each pass through the following loop makes one check for diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index a9ee16bbc693..e2f9d4feff40 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -27,3 +27,4 @@ obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o +obj-$(CONFIG_CPU_ISOLATION) += isolation.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9446b2e5eac5..5b82a0073532 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -26,6 +26,7 @@ #include <linux/profile.h> #include <linux/security.h> #include <linux/syscalls.h> +#include <linux/sched/isolation.h> #include <asm/switch_to.h> #include <asm/tlb.h> @@ -42,18 +43,21 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) /* * Debugging: various feature bits + * + * If SCHED_DEBUG is disabled, each compilation unit has its own copy of + * sysctl_sched_features, defined in sched.h, to allow constants propagation + * at compile time and compiler optimization based on features default. */ - #define SCHED_FEAT(name, enabled) \ (1UL << __SCHED_FEAT_##name) * enabled | - const_debug unsigned int sysctl_sched_features = #include "features.h" 0; - #undef SCHED_FEAT +#endif /* * Number of tasks to iterate in a single balance run. @@ -83,9 +87,6 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* CPUs with isolated domains */ -cpumask_var_t cpu_isolated_map; - /* * __task_rq_lock - lock the rq @p resides on. */ @@ -525,7 +526,7 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) + if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) return cpu; rcu_read_lock(); @@ -534,15 +535,15 @@ int get_nohz_timer_target(void) if (cpu == i) continue; - if (!idle_cpu(i) && is_housekeeping_cpu(i)) { + if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { cpu = i; goto unlock; } } } - if (!is_housekeeping_cpu(cpu)) - cpu = housekeeping_any_cpu(); + if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) + cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); return cpu; @@ -732,7 +733,7 @@ int tg_nop(struct task_group *tg, void *data) } #endif -static void set_load_weight(struct task_struct *p) +static void set_load_weight(struct task_struct *p, bool update_load) { int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -746,8 +747,16 @@ static void set_load_weight(struct task_struct *p) return; } - load->weight = scale_load(sched_prio_to_weight[prio]); - load->inv_weight = sched_prio_to_wmult[prio]; + /* + * SCHED_OTHER tasks have to update their load when changing their + * weight + */ + if (update_load && p->sched_class == &fair_sched_class) { + reweight_task(p, prio); + } else { + load->weight = scale_load(sched_prio_to_weight[prio]); + load->inv_weight = sched_prio_to_wmult[prio]; + } } static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) @@ -2357,7 +2366,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = __normal_prio(p); - set_load_weight(p); + set_load_weight(p, false); /* * We don't need the reset flag anymore after the fork. It has @@ -3804,7 +3813,7 @@ void set_user_nice(struct task_struct *p, long nice) put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p); + set_load_weight(p, true); old_prio = p->prio; p->prio = effective_prio(p); delta = p->prio - old_prio; @@ -3961,7 +3970,7 @@ static void __setscheduler_params(struct task_struct *p, */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p); + set_load_weight(p, true); } /* Actually do priority change: must hold pi & rq lock. */ @@ -5727,10 +5736,6 @@ static inline void sched_init_smt(void) { } void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); - sched_init_numa(); /* @@ -5740,16 +5745,12 @@ void __init sched_init_smp(void) */ mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); /* Move init over to a non-iso |
