summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-design-CFS.rst8
-rw-r--r--Documentation/scheduler/schedutil.rst7
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst8
-rw-r--r--Documentation/translations/zh_CN/scheduler/schedutil.rst7
-rw-r--r--arch/arm/include/asm/topology.h1
-rw-r--r--arch/arm64/include/asm/topology.h1
-rw-r--r--arch/arm64/kernel/topology.c26
-rw-r--r--arch/riscv/include/asm/topology.h1
-rw-r--r--drivers/acpi/cppc_acpi.c104
-rw-r--r--drivers/base/arch_topology.c56
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c139
-rw-r--r--drivers/cpufreq/cpufreq.c4
-rw-r--r--include/acpi/cppc_acpi.h2
-rw-r--r--include/linux/arch_topology.h8
-rw-r--r--include/linux/cpufreq.h1
-rw-r--r--include/linux/energy_model.h7
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/sched.h75
-rw-r--r--include/linux/sched/topology.h8
-rw-r--r--include/trace/events/sched.h15
-rw-r--r--kernel/freezer.c1
-rw-r--r--kernel/sched/core.c140
-rw-r--r--kernel/sched/cpufreq_schedutil.c90
-rw-r--r--kernel/sched/deadline.c479
-rw-r--r--kernel/sched/debug.c18
-rw-r--r--kernel/sched/fair.c449
-rw-r--r--kernel/sched/features.h1
-rw-r--r--kernel/sched/idle.c30
-rw-r--r--kernel/sched/pelt.h4
-rw-r--r--kernel/sched/rt.c15
-rw-r--r--kernel/sched/sched.h135
-rw-r--r--kernel/sched/stop_task.c13
32 files changed, 1055 insertions, 801 deletions
diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst
index f68919800f05..6cffffe26500 100644
--- a/Documentation/scheduler/sched-design-CFS.rst
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -180,7 +180,7 @@ This is the (partial) list of the hooks:
compat_yield sysctl is turned on; in that case, it places the scheduling
entity at the right-most end of the red-black tree.
- - check_preempt_curr(...)
+ - wakeup_preempt(...)
This function checks if a task that entered the runnable state should
preempt the currently running task.
@@ -189,10 +189,10 @@ This is the (partial) list of the hooks:
This function chooses the most appropriate task eligible to run next.
- - set_curr_task(...)
+ - set_next_task(...)
- This function is called when a task changes its scheduling class or changes
- its task group.
+ This function is called when a task changes its scheduling class, changes
+ its task group or is scheduled.
- task_tick(...)
diff --git a/Documentation/scheduler/schedutil.rst b/Documentation/scheduler/schedutil.rst
index 32c7d69fc86c..803fba8fc714 100644
--- a/Documentation/scheduler/schedutil.rst
+++ b/Documentation/scheduler/schedutil.rst
@@ -90,8 +90,8 @@ For more detail see:
- Documentation/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
-UTIL_EST / UTIL_EST_FASTUP
-==========================
+UTIL_EST
+========
Because periodic tasks have their averages decayed while they sleep, even
though when running their expected utilization will be the same, they suffer a
@@ -99,8 +99,7 @@ though when running their expected utilization will be the same, they suffer a
To alleviate this (a default enabled option) UTIL_EST drives an Infinite
Impulse Response (IIR) EWMA with the 'running' value on dequeue -- when it is
-highest. A further default enabled option UTIL_EST_FASTUP modifies the IIR
-filter to instantly increase and only decay on decrease.
+highest. UTIL_EST filters to instantly increase and only decay on decrease.
A further runqueue wide sum (of runnable tasks) is maintained of:
diff --git a/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst b/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst
index 3076402406c4..abc6709ec3b2 100644
--- a/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst
+++ b/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst
@@ -80,7 +80,7 @@ p->se.vruntime。一旦p->se.vruntime变得足够大,其它的任务将成为
CFS使用纳秒粒度的计时,不依赖于任何jiffies或HZ的细节。因此CFS并不像之前的调度器那样
有“时间片”的概念,也没有任何启发式的设计。唯一可调的参数(你需要打开CONFIG_SCHED_DEBUG)是:
- /sys/kernel/debug/sched/min_granularity_ns
+ /sys/kernel/debug/sched/base_slice_ns
它可以用来将调度器从“桌面”模式(也就是低时延)调节为“服务器”(也就是高批处理)模式。
它的默认设置是适合桌面的工作负载。SCHED_BATCH也被CFS调度器模块处理。
@@ -147,7 +147,7 @@ array)。
这个函数的行为基本上是出队,紧接着入队,除非compat_yield sysctl被开启。在那种情况下,
它将调度实体放在红黑树的最右端。
- - check_preempt_curr(...)
+ - wakeup_preempt(...)
这个函数检查进入可运行状态的任务能否抢占当前正在运行的任务。
@@ -155,9 +155,9 @@ array)。
这个函数选择接下来最适合运行的任务。
- - set_curr_task(...)
+ - set_next_task(...)
- 这个函数在任务改变调度类或改变任务组时被调用。
+ 这个函数在任务改变调度类,改变任务组时,或者任务被调度时被调用。
- task_tick(...)
diff --git a/Documentation/translations/zh_CN/scheduler/schedutil.rst b/Documentation/translations/zh_CN/scheduler/schedutil.rst
index d1ea68007520..7c8d87f21c42 100644
--- a/Documentation/translations/zh_CN/scheduler/schedutil.rst
+++ b/Documentation/translations/zh_CN/scheduler/schedutil.rst
@@ -89,16 +89,15 @@ r_cpu被定义为当前CPU的最高性能水平与系统中任何其它CPU的最
- Documentation/translations/zh_CN/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
-UTIL_EST / UTIL_EST_FASTUP
-==========================
+UTIL_EST
+========
由于周期性任务的平均数在睡眠时会衰减,而在运行时其预期利用率会和睡眠前相同,
因此它们在再次运行后会面临(DVFS)的上涨。
为了缓解这个问题,(一个默认使能的编译选项)UTIL_EST驱动一个无限脉冲响应
(Infinite Impulse Response,IIR)的EWMA,“运行”值在出队时是最高的。
-另一个默认使能的编译选项UTIL_EST_FASTUP修改了IIR滤波器,使其允许立即增加,
-仅在利用率下降时衰减。
+UTIL_EST滤波使其在遇到更高值时立刻增加,而遇到低值时会缓慢衰减。
进一步,运行队列的(可运行任务的)利用率之和由下式计算:
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index c7d2510e5a78..853c4f81ba4a 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -13,6 +13,7 @@
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
#endif
/* Replace task scheduler's default cpu-invariant accounting */
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 9fab663dd2de..a323b109b9c4 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -23,6 +23,7 @@ void update_freq_counters_refs(void);
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
#ifdef CONFIG_ACPI_CPPC_LIB
#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 817d788cd866..1a2c72f3e7f8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -82,7 +82,12 @@ int __init parse_acpi_topology(void)
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
+/*
+ * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
+ * the CPU capacity and its associated frequency have been correctly
+ * initialized.
+ */
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
@@ -112,14 +117,14 @@ static inline bool freq_counters_valid(int cpu)
return true;
}
-static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+void freq_inv_set_max_ratio(int cpu, u64 max_rate)
{
- u64 ratio;
+ u64 ratio, ref_rate = arch_timer_get_rate();
if (unlikely(!max_rate || !ref_rate)) {
- pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+ WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n",
cpu);
- return -EINVAL;
+ return;
}
/*
@@ -139,12 +144,10 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
ratio = div64_u64(ratio, max_rate);
if (!ratio) {
WARN_ONCE(1, "Reference frequency too low.\n");
- return -EINVAL;
+ return;
}
- per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
-
- return 0;
+ WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio);
}
static void amu_scale_freq_tick(void)
@@ -195,10 +198,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
return;
for_each_cpu(cpu, cpus) {
- if (!freq_counters_valid(cpu) ||
- freq_inv_set_max_ratio(cpu,
- cpufreq_get_hw_max_freq(cpu) * 1000ULL,
- arch_timer_get_rate()))
+ if (!freq_counters_valid(cpu))
return;
}
diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h
index e316ab3b77f3..61183688bdd5 100644
--- a/arch/riscv/include/asm/topology.h
+++ b/arch/riscv/include/asm/topology.h
@@ -9,6 +9,7 @@
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7ff269a78c20..d155a86a8614 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -39,6 +39,9 @@
#include <linux/rwsem.h>
#include <linux/wait.h>
#include <linux/topology.h>
+#include <linux/dmi.h>
+#include <linux/units.h>
+#include <asm/unaligned.h>
#include <acpi/cppc_acpi.h>
@@ -1760,3 +1763,104 @@ unsigned int cppc_get_transition_latency(int cpu_num)
return latency_ns;
}
EXPORT_SYMBOL_GPL(cppc_get_transition_latency);
+
+/* Minimum struct length needed for the DMI processor entry we want */
+#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48
+
+/* Offset in the DMI processor structure for the max frequency */
+#define DMI_PROCESSOR_MAX_SPEED 0x14
+
+/* Callback function used to retrieve the max frequency from DMI */
+static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
+{
+ const u8 *dmi_data = (const u8 *)dm;
+ u16 *mhz = (u16 *)private;
+
+ if (dm->type == DMI_ENTRY_PROCESSOR &&
+ dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
+ u16 val = (u16)get_unaligned((const u16 *)
+ (dmi_data + DMI_PROCESSOR_MAX_SPEED));
+ *mhz = val > *mhz ? val : *mhz;
+ }
+}
+
+/* Look up the max frequency in DMI */
+static u64 cppc_get_dmi_max_khz(void)
+{
+ u16 mhz = 0;
+
+ dmi_walk(cppc_find_dmi_mhz, &mhz);
+
+ /*
+ * Real stupid fallback value, just in case there is no
+ * actual value set.
+ */
+ mhz = mhz ? mhz : 1;
+
+ return KHZ_PER_MHZ * mhz;
+}
+
+/*
+ * If CPPC lowest_freq and nominal_freq registers are exposed then we can
+ * use them to convert perf to freq and vice versa. The conversion is
+ * extrapolated as an affine function passing by the 2 points:
+ * - (Low perf, Low freq)
+ * - (Nominal perf, Nominal freq)
+ */
+unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf)
+{
+ s64 retval, offset = 0;
+ static u64 max_khz;
+ u64 mul, div;
+
+ if (caps->lowest_freq && caps->nominal_freq) {
+ mul = caps->nominal_freq - caps->lowest_freq;
+ mul *= KHZ_PER_MHZ;
+ div = caps->nominal_perf - caps->lowest_perf;
+ offset = caps->nominal_freq * KHZ_PER_MHZ -
+ div64_u64(caps->nominal_perf * mul, div);
+ } else {
+ if (!max_khz)
+ max_khz = cppc_get_dmi_max_khz();
+ mul = max_khz;
+ div = caps->highest_perf;
+ }
+
+ retval = offset + div64_u64(perf * mul, div);
+ if (retval >= 0)
+ return retval;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cppc_perf_to_khz);
+
+unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq)
+{
+ s64 retval, offset = 0;
+ static u64 max_khz;
+ u64 mul, div;
+
+ if (caps->lowest_freq && caps->nominal_freq) {
+ mul = caps->nominal_perf - caps->lowest_perf;
+ div = caps->nominal_freq - caps->lowest_freq;
+ /*
+ * We don't need to convert to kHz for computing offset and can
+ * directly use nominal_freq and lowest_freq as the div64_u64
+ * will remove the frequency unit.
+ */
+ offset = caps->nominal_perf -
+ div64_u64(caps->nominal_freq * mul, div);
+ /* But we need it for computing the perf level. */
+ div *= KHZ_PER_MHZ;
+ } else {
+ if (!max_khz)
+ max_khz = cppc_get_dmi_max_khz();
+ mul = caps->highest_perf;
+ div = max_khz;
+ }
+
+ retval = offset + div64_u64(freq * mul, div);
+ if (retval >= 0)
+ return retval;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cppc_khz_to_perf);
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index b741b5ba82bd..5aaa0865625d 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
+#include <linux/units.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thermal_pressure.h>
@@ -26,7 +27,8 @@
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant;
-static DEFINE_PER_CPU(u32, freq_factor) = 1;
+DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
+EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
static bool supports_scale_freq_counters(const struct cpumask *cpus)
{
@@ -170,9 +172,9 @@ DEFINE_PER_CPU(unsigned long, thermal_pressure);
* operating on stale data when hot-plug is used for some CPUs. The
* @capped_freq reflects the currently allowed max CPUs frequency due to
* thermal capping. It might be also a boost frequency value, which is bigger
- * than the internal 'freq_factor' max frequency. In such case the pressure
- * value should simply be removed, since this is an indication that there is
- * no thermal throttling. The @capped_freq must be provided in kHz.
+ * than the internal 'capacity_freq_ref' max frequency. In such case the
+ * pressure value should simply be removed, since this is an indication that
+ * there is no thermal throttling. The @capped_freq must be provided in kHz.
*/
void topology_update_thermal_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
@@ -183,10 +185,7 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
cpu = cpumask_first(cpus);
max_capacity = arch_scale_cpu_capacity(cpu);
- max_freq = per_cpu(freq_factor, cpu);
-
- /* Convert to MHz scale which is used in 'freq_factor' */
- capped_freq /= 1000;
+ max_freq = arch_scale_freq_ref(cpu);
/*
* Handle properly the boost frequencies, which should simply clean
@@ -279,13 +278,13 @@ void topology_normalize_cpu_scale(void)
capacity_scale = 1;
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
+ capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
capacity_scale = max(capacity, capacity_scale);
}
pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
+ capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
capacity_scale);
topology_set_cpu_scale(cpu, capacity);
@@ -321,15 +320,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
cpu_node, raw_capacity[cpu]);
/*
- * Update freq_factor for calculating early boot cpu capacities.
+ * Update capacity_freq_ref for calculating early boot CPU capacities.
* For non-clk CPU DVFS mechanism, there's no way to get the
* frequency value now, assuming they are running at the same
- * frequency (by keeping the initial freq_factor value).
+ * frequency (by keeping the initial capacity_freq_ref value).
*/
cpu_clk = of_clk_get(cpu_node, 0);
if (!PTR_ERR_OR_ZERO(cpu_clk)) {
- per_cpu(freq_factor, cpu) =
- clk_get_rate(cpu_clk) / 1000;
+ per_cpu(capacity_freq_ref, cpu) =
+ clk_get_rate(cpu_clk) / HZ_PER_KHZ;
clk_put(cpu_clk);
}
} else {
@@ -345,11 +344,16 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
return !ret;
}
+void __weak freq_inv_set_max_ratio(int cpu, u64 max_rate)
+{
+}
+
#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>
void topology_init_cpu_capacity_cppc(void)
{
+ u64 capacity, capacity_scale = 0;
struct cppc_perf_caps perf_caps;
int cpu;
@@ -366,6 +370,10 @@ void topology_init_cpu_capacity_cppc(void)
(perf_caps.highest_perf >= perf_caps.nominal_perf) &&
(perf_caps.highest_perf >= perf_caps.lowest_perf)) {
raw_capacity[cpu] = perf_caps.highest_perf;
+ capacity_scale = max_t(u64, capacity_scale, raw_capacity[cpu]);
+
+ per_cpu(capacity_freq_ref, cpu) = cppc_perf_to_khz(&perf_caps, raw_capacity[cpu]);
+
pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
cpu, raw_capacity[cpu]);
continue;
@@ -376,7 +384,18 @@ void topology_init_cpu_capacity_cppc(void)
goto exit;
}
- topology_normalize_cpu_scale();
+ for_each_possible_cpu(cpu) {
+ freq_inv_set_max_ratio(cpu,
+ per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ);
+
+ capacity = raw_capacity[cpu];
+ capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
+ capacity_scale);
+ topology_set_cpu_scale(cpu, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ cpu, topology_get_cpu_scale(cpu));
+ }
+
schedule_work(&update_topology_flags_work);
pr_debug("cpu_capacity: cpu_capacity initialization done\n");
@@ -410,8 +429,11 @@ init_cpu_capacity_callback(struct notifier_block *nb,
cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
- for_each_cpu(cpu, policy->related_cpus)
- per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
+ for_each_cpu(cpu, policy->related_cpus) {
+ per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq;
+ freq_inv_set_max_ratio(cpu,
+ per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ);
+ }
if (cpumask_empty(cpus_to_visit)) {
topology_normalize_cpu_scale();
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index fe08ca419b3d..64420d9cfd1e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -16,7 +16,6 @@
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
-#include <linux/dmi.h>
#include <linux/irq_work.h>
#include <linux/kthread.h>
#include <linux/time.h>
@@ -27,12 +26,6 @@
#include <acpi/cppc_acpi.h>
-/* Minimum struct length needed for the DMI processor entry we want */
-#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48
-
-/* Offset in the DMI processor structure for the max frequency */
-#define DMI_PROCESSOR_MAX_SPEED 0x14
-
/*
* This list contains information parsed from per CPU ACPI _CPC and _PSD
* structures: e.g. the highest and lowest supported performance, capabilities,
@@ -291,97 +284,9 @@ static inline void cppc_freq_invariance_exit(void)
}
#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
-/* Callback function used to retrieve the max frequency from DMI */
-static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
-{
- const u8 *dmi_data = (const u8 *)dm;
- u16 *mhz = (u16 *)private;
-
- if (dm->type == DMI_ENTRY_PROCESSOR &&
- dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
- u16 val = (u16)get_unaligned((const u16 *)
- (dmi_data + DMI_PROCESSOR_MAX_SPEED));
- *mhz = val > *mhz ? val : *mhz;
- }
-}
-
-/* Look up the max frequency in DMI */
-static u64 cppc_get_dmi_max_khz(void)
-{
- u16 mhz = 0;
-
- dmi_walk(cppc_find_dmi_mhz, &mhz);
-
- /*
- * Real stupid fallback value, just in case there is no
- * actual value set.
- */
- mhz = mhz ? mhz : 1;
-
- return (1000 * mhz);
-}
-
-/*
- * If CPPC lowest_freq and nominal_freq registers are exposed then we can
- * use them to convert perf to freq and vice versa. The conversion is
- * extrapolated as an affine function passing by the 2 points:
- * - (Low perf, Low freq)
- * - (Nominal perf, Nominal perf)
- */
-static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data,
- unsigned int perf)
-{
- struct cppc_perf_caps *caps = &cpu_data->perf_caps;
- s64 retval, offset = 0;
- static u64 max_khz;
- u64 mul, div;
-
- if (caps->lowest_freq && caps->nominal_freq) {
- mul = caps->nominal_freq - caps->lowest_freq;
- div = caps->nominal_perf - caps->lowest_perf;
- offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div);
- } else {
- if (!max_khz)
- max_khz = cppc_get_dmi_max_khz();
- mul = max_khz;
- div = caps->highest_perf;
- }
-
- retval = offset + div64_u64(perf * mul, div);
- if (retval >= 0)
- return retval;
- return 0;
-}
-
-static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data,
- unsigned int freq)
-{
- struct cppc_perf_caps *caps = &cpu_data->perf_caps;
- s64 retval, offset = 0;
- static u64 max_khz;
- u64 mul, div;
-
- if (caps->lowest_freq && caps->nominal_freq) {
- mul = caps->nominal_perf - caps->lowest_perf;
- div = caps->nominal_freq - caps->lowest_freq;
- offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div);
- } else {
- if (!max_khz)
- max_khz = cppc_get_dmi_max_khz();
- mul = caps->highest_perf;
- div = max_khz;
- }
-
- retval = offset + div64_u64(freq * mul, div);
- if (retval >= 0)
- return retval;
- return 0;
-}
-
static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
-
{
struct cppc_cpudata *cpu_data = policy->driver_data;
unsigned int cpu = policy->cpu;
@@ -389,7 +294,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
u32 desired_perf;
int ret = 0;
- desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+ desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq);
/* Return if it is exactly the same perf */
if (desired_perf == cpu_data->perf_ctrls.desired_perf)
return ret;
@@ -417,7 +322,7 @@ static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
u32 desired_perf;
int ret;
- desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+ desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq);
cpu_data->perf_ctrls.desired_perf = desired_perf;
ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
@@ -530,7 +435,7 @@ static int cppc_get_cpu_power(struct device *cpu_dev,
min_step = min_cap / CPPC_EM_CAP_STEP;
max_step = max_cap / CPPC_EM_CAP_STEP;
- perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ perf_prev = cppc_khz_to_perf(perf_caps, *KHz);
step = perf_prev / perf_step;
if (step > max_step)
@@ -550,8 +455,8 @@ static int cppc_get_cpu_power(struct device *cpu_dev,
perf = step * perf_step;
}
- *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
- perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ *KHz = cppc_perf_to_khz(perf_caps, perf);
+ perf_check = cppc_khz_to_perf(perf_caps, *KHz);
step_check = perf_check / perf_step;
/*
@@ -561,8 +466,8 @@ static int cppc_get_cpu_power(struct device *cpu_dev,
*/
while ((*KHz == prev_freq) || (step_check != step)) {
perf++;
- *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
- perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ *KHz = cppc_perf_to_khz(perf_caps, perf);
+ perf_check = cppc_khz_to_perf(perf_caps, *KHz);
step_check = perf_check / perf_step;
}
@@ -591,7 +496,7 @@ static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
perf_caps = &cpu_data->perf_caps;
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
- perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
+ perf_prev = cppc_khz_to_perf(perf_caps, KHz);
perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
step = perf_prev / perf_step;
@@ -679,10 +584,6 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu)
goto free_mask;
}
- /* Convert the lowest and nominal freq from MHz to KHz */
- cpu_data->perf_caps.lowest_freq *= 1000;
- cpu_data->perf_caps.nominal_freq *= 1000;
-
list_add(&cpu_data->node, &cpu_data_list);
return cpu_data;
@@ -724,20 +625,16 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
* Set min to lowest nonlinear perf to avoid any efficiency penalty (see
* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
*/
- policy->min = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->lowest_nonlinear_perf);
- policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->nominal_perf);
+ policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
+ policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
/*
* Set cpuinfo.min_freq to Lowest to make the full range of performance
* available if userspace wants to use any perf between lowest & lowest
* nonlinear perf
*/
- policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->lowest_perf);
- policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->nominal_perf);
+ policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
+ policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
policy->shared_type = cpu_data->shared_type;
@@ -773,7 +670,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
boost_supported = true;
/* Set policy->cur to max now. The governors will adjust later. */
- policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, caps->highest_perf);
+ policy->cur = cppc_perf_to_khz(caps, caps->highest_perf);
cpu_data->perf_ctrls.desired_perf = caps->highest_perf;
ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
@@ -863,7 +760,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
&fb_ctrs_t1);
- return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
+ return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
}
static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
@@ -878,11 +775,9 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
}
if (state)
- policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->highest_perf);
+ policy->max = cppc_perf_to_khz(caps, caps->highest_perf);
else
- policy->max = cppc_cpufreq_perf_to_khz(cpu_data,
- caps->nominal_perf);
+ policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
policy->cpuinfo.max_freq = policy->max;
ret = freq_qos_update_request(policy->max_freq_req, policy->max);
@@ -937,7 +832,7 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
if (ret < 0)
return -EIO;
- return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf);
+ return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf);
}
static void cppc_check_hisi_workaround(void)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 934d35f570b7..44db4f59c4cc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -454,7 +454,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
arch_set_freq_scale(policy->related_cpus,
policy->cur,
- policy->cpuinfo.max_freq);
+ arch_scale_freq_ref(policy->cpu));
spin_lock(&policy->transition_lock);
policy->transition_ongoing = false;
@@ -2174,7 +2174,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,