#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/stop_machine.h>
#include <linux/tick.h>
#include <linux/slab.h>
#include "cpupri.h"
#include "cpudeadline.h"
#include "cpuacct.h"
struct rq;
extern __read_mostly int scheduler_running;
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;
extern long calc_load_fold_active(struct rq *this_rq);
extern void update_cpu_load_active(struct rq *this_rq);
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
* hierarchies, especially on larger systems. This is not a user-visible change
* and does not change the user-interface for setting shares/weights.
*
* We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
* when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
* increased costs.
*/
#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */
# define SCHED_LOAD_RESOLUTION 10
# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION)
# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)
#else
# define SCHED_LOAD_RESOLUTION 0
# define scale_load(w) (w)
# define scale_load_down(w) (w)
#endif
#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION)
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
#define NICE_0_LOAD SCHED_LOAD_SCALE
#define NICE_0_SHIFT SCHED_LOAD_SHIFT
/*
* Single value that decides SCHED_DEADLINE internal math precision.
* 10 -> just above 1us
* 9 -> just above 0.5us
*/
#define DL_SCALE (10)
/*
* These are the 'tuning knobs' of the scheduler:
*/
/*
* single value that denotes runtime == period, ie unlimited time.
*/
#define RUNTIME_INF ((u64)~0ULL)
static inline int fair_policy(int policy)
{
return policy == SCHED_NORMAL || policy == SCHED_BATCH;
}
static inline int rt_policy(int policy)
{
return policy == SCHED_FIFO || policy == SCHED_RR;
}
static inline int dl_policy(int policy)
{
return policy == SCHED_DEADLINE;
}
static inline int task_has_rt_policy(struct task_struct *p)
{
return rt_policy(p->policy);
}
static inline int task_has_dl_policy(struct task_struct *p)
{
return dl_policy(p->policy);
}
static inline bool dl_time_before(u64 a, u64 b)
{
return (s64)(a - b) < 0;
}
/*
* Tells if entity @a should preempt entity @b.
*/
static inline bool
dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
{
return dl_time_before(a->deadline, b->deadline);
}
/*
* This is the priority-queue data structure of the RT scheduling class:
*/
struct rt_prio_array {
DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
struct list_head queue[MAX_RT_PRIO];
};
struct rt_bandwidth {
/* nests inside the rq lock: */
raw_spinlock_t rt_runtime_lock;
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
};
/*
* To keep the bandwidth of -deadline tasks and groups under control
* we need some place where:
* - store the maximum -deadline bandwidth of the system (the group);
* - cache the fraction of that bandwidth that is currently allocated.
*
* This is all done in the data structure below. It is similar to the
* one used for RT-throttling (rt_bandwidth), with the main difference
* that, since here we are only interested in admission control, we
* do not decrease any runtime while the group "executes", neither we
* need a timer to replenish it.
*
* With respect to SMP, the bandwidth is given on a per-CPU basis,
* meaning that:
* - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
* - dl_total_bw array contains, in the i-eth element, the currently
* allocated bandwidth on the i-eth CPU.
* Moreover, groups consume bandwidth on each CPU, while tasks only
* consume bandwidth on the CPU they're running on.
* Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
* that will be shown the next time the proc or cgroup controls will
* be red. It on its turn can be changed by writing on its own
* control.
*/
struct dl_bandwidth {
raw_spinlock_t dl_runtime_lock;
u64 dl_runtime;
u64 dl_period;
};
static inline int dl_bandwidth_enabled(void)
{
return sysctl_sched_rt_runtime >= 0;
}
extern struct dl_bw *dl_bw_of(int i);
struct dl_bw {
raw_spinlock_t lock;
u64 bw, total_bw;
};
extern struct mutex sched_domains_mutex;
#ifdef CONFIG_CGROUP_SCHED
#include <linux/cgroup.h>
struct cfs_rq;
struct rt_rq;
extern struct list_head task_groups;
struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH
raw_spinlock_t lock;
ktime_t period;
u64 quota, runtime;
s64 hierarchal_quota;
u64 runtime_expires;
int idle, timer_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;
/* statistics */
int nr_periods, nr_throttled;
u64 throttled_time;
#endif
};
/* task group related information */
struct task_group {
struct cgroup_subsys_state css;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
unsigned long shares;
#ifdef CONFIG_SMP
atomic_long_t load_avg;
atomic_t runnable_avg;
#endif
#endif
#ifdef CONFIG_RT_GROUP_SCHED
struct sched_rt_entity **rt_se;
struct rt_rq **rt_rq;
struct rt_bandwidth rt_bandwidth;
#endif
struct rcu_head rcu;
struct list_head list;
struct task_group *parent;
struct list_head siblings;
struct list_head children;
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup *autogroup;
#endif
struct cfs_bandwidth cfs_bandwidth;
};
#ifdef CONFIG_FAIR_GROUP_SCHED
#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
/*
* A weight of 0 or 1 can cause arithmetics problems.
* A weight of a cfs_rq is the sum of weights of which entities
* are queued on this cfs_rq, so a weight of a entity should not be
* too large, so as the shares value of a task group.
* (The default weight is 1024 - so there's no practical
* limitation from this.)
*/
#define MIN_SHARES (1UL << 1)
#define MAX_SHARES (1UL << 18)
#endif
typedef int (*tg_visitor)(struct task_group *, void *);
extern int walk_tg_tree_from(struct task_group *from,
tg_visitor down, tg_visitor up, void *data);
/*
* Iterate the full tree, cal
|