summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-01-12 01:11:25 +0100
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-01-12 01:11:25 +0100
commitb366f976323d5d9ccb612e20afaaddf1fb84418d (patch)
tree3d3a357a309d7ed82dbd14ce7de1ea6c34653856
parent7f4a3702bda0f9f5d34f0241cc81467a55162d7a (diff)
parenta032d2de0b5f17631844b34481c61cb799d0af6b (diff)
downloadlinux-b366f976323d5d9ccb612e20afaaddf1fb84418d.tar.gz
linux-b366f976323d5d9ccb612e20afaaddf1fb84418d.tar.bz2
linux-b366f976323d5d9ccb612e20afaaddf1fb84418d.zip
Merge branch 'pm-cpufreq'
* pm-cpufreq: (30 commits) Documentation: cpufreq: intel_pstate: enhance documentation cpufreq-dt: fix handling regulator_get_voltage() result cpufreq: governor: Fix negative idle_time when configured with CONFIG_HZ_PERIODIC cpufreq: mt8173: migrate to use operating-points-v2 bindings cpufreq: Simplify core code related to boost support cpufreq: acpi-cpufreq: Simplify boost-related code cpufreq: Make cpufreq_boost_supported() static blackfin-cpufreq: Mark cpu_set_cclk() as static blackfin-cpufreq: Change return type of cpu_set_cclk() to that of clk_set_rate() dt: cpufreq: st: Provide bindings for ST's CPUFreq implementation cpufreq: st: Provide runtime initialised driver for ST's platforms cpufreq: mt8173: Move resources allocation into ->probe() cpufreq: intel_pstate: Account for IO wait time cpufreq: intel_pstate: Account for non C0 time cpufreq: intel_pstate: Configurable algorithm to get target pstate cpufreq: mt8173: check return value of regulator_get_voltage() call cpufreq: mt8173: remove redundant regulator_get_voltage() call cpufreq: mt8173: add CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag cpufreq: qoriq: Register cooling device based on device tree cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency ...
-rw-r--r--Documentation/cpu-freq/intel-pstate.txt241
-rw-r--r--Documentation/cpu-freq/pcc-cpufreq.txt4
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.txt17
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt91
-rw-r--r--drivers/cpufreq/Kconfig.arm12
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c24
-rw-r--r--drivers/cpufreq/arm_big_little.c41
-rw-r--r--drivers/cpufreq/blackfin-cpufreq.c2
-rw-r--r--drivers/cpufreq/cpufreq-dt.c14
-rw-r--r--drivers/cpufreq/cpufreq.c28
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c6
-rw-r--r--drivers/cpufreq/cpufreq_governor.c146
-rw-r--r--drivers/cpufreq/cpufreq_governor.h18
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c61
-rw-r--r--drivers/cpufreq/intel_pstate.c73
-rw-r--r--drivers/cpufreq/mt8173-cpufreq.c135
-rw-r--r--drivers/cpufreq/pcc-cpufreq.c2
-rw-r--r--drivers/cpufreq/qoriq-cpufreq.c24
-rw-r--r--drivers/cpufreq/sti-cpufreq.c294
-rw-r--r--include/linux/cpufreq.h6
21 files changed, 1014 insertions, 226 deletions
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
index be8d4006bf76..f7b12c071d53 100644
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ b/Documentation/cpu-freq/intel-pstate.txt
@@ -1,61 +1,131 @@
-Intel P-state driver
+Intel P-State driver
--------------------
-This driver provides an interface to control the P state selection for
-SandyBridge+ Intel processors. The driver can operate two different
-modes based on the processor model, legacy mode and Hardware P state (HWP)
-mode.
-
-In legacy mode, the Intel P-state implements two internal governors,
-performance and powersave, that differ from the general cpufreq governors of
-the same name (the general cpufreq governors implement target(), whereas the
-internal Intel P-state governors implement setpolicy()). The internal
-performance governor sets the max_perf_pct and min_perf_pct to 100; that is,
-the governor selects the highest available P state to maximize the performance
-of the core. The internal powersave governor selects the appropriate P state
-based on the current load on the CPU.
-
-In HWP mode P state selection is implemented in the processor
-itself. The driver provides the interfaces between the cpufreq core and
-the processor to control P state selection based on user preferences
-and reporting frequency to the cpufreq core. In this mode the
-internal Intel P-state governor code is disabled.
-
-In addition to the interfaces provided by the cpufreq core for
-controlling frequency the driver provides sysfs files for
-controlling P state selection. These files have been added to
-/sys/devices/system/cpu/intel_pstate/
-
- max_perf_pct: limits the maximum P state that will be requested by
- the driver stated as a percentage of the available performance. The
- available (P states) performance may be reduced by the no_turbo
+This driver provides an interface to control the P-State selection for the
+SandyBridge+ Intel processors.
+
+The following document explains P-States:
+http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+As stated in the document, P-State doesn’t exactly mean a frequency. However, for
+the sake of the relationship with cpufreq, P-State and frequency are used
+interchangeably.
+
+Understanding the cpufreq core governors and policies are important before
+discussing more details about the Intel P-State driver. Based on what callbacks
+a cpufreq driver provides to the cpufreq core, it can support two types of
+drivers:
+- with target_index() callback: In this mode, the drivers using cpufreq core
+simply provide the minimum and maximum frequency limits and an additional
+interface target_index() to set the current frequency. The cpufreq subsystem
+has a number of scaling governors ("performance", "powersave", "ondemand",
+etc.). Depending on which governor is in use, cpufreq core will call for
+transitions to a specific frequency using target_index() callback.
+- setpolicy() callback: In this mode, drivers do not provide target_index()
+callback, so cpufreq core can't request a transition to a specific frequency.
+The driver provides minimum and maximum frequency limits and callbacks to set a
+policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
+The cpufreq core can request the driver to operate in any of the two policies:
+"performance: and "powersave". The driver decides which frequency to use based
+on the above policy selection considering minimum and maximum frequency limits.
+
+The Intel P-State driver falls under the latter category, which implements the
+setpolicy() callback. This driver decides what P-State to use based on the
+requested policy from the cpufreq core. If the processor is capable of
+selecting its next P-State internally, then the driver will offload this
+responsibility to the processor (aka HWP: Hardware P-States). If not, the
+driver implements algorithms to select the next P-State.
+
+Since these policies are implemented in the driver, they are not same as the
+cpufreq scaling governors implementation, even if they have the same name in
+the cpufreq sysfs (scaling_governors). For example the "performance" policy is
+similar to cpufreq’s "performance" governor, but "powersave" is completely
+different than the cpufreq "powersave" governor. The strategy here is similar
+to cpufreq "ondemand", where the requested P-State is related to the system load.
+
+Sysfs Interface
+
+In addition to the frequency-controlling interfaces provided by the cpufreq
+core, the driver provides its own sysfs files to control the P-State selection.
+These files have been added to /sys/devices/system/cpu/intel_pstate/.
+Any changes made to these files are applicable to all CPUs (even in a
+multi-package system).
+
+ max_perf_pct: Limits the maximum P-State that will be requested by
+ the driver. It states it as a percentage of the available performance. The
+ available (P-State) performance may be reduced by the no_turbo
setting described below.
- min_perf_pct: limits the minimum P state that will be requested by
- the driver stated as a percentage of the max (non-turbo)
+ min_perf_pct: Limits the minimum P-State that will be requested by
+ the driver. It states it as a percentage of the max (non-turbo)
performance level.
- no_turbo: limits the driver to selecting P states below the turbo
+ no_turbo: Limits the driver to selecting P-State below the turbo
frequency range.
- turbo_pct: displays the percentage of the total performance that
- is supported by hardware that is in the turbo range. This number
+ turbo_pct: Displays the percentage of the total performance that
+ is supported by hardware that is in the turbo range. This number
is independent of whether turbo has been disabled or not.
- num_pstates: displays the number of pstates that are supported
- by hardware. This number is independent of whether turbo has
+ num_pstates: Displays the number of P-States that are supported
+ by hardware. This number is independent of whether turbo has
been disabled or not.
+For example, if a system has these parameters:
+ Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
+ Max non turbo ratio: 0x17
+ Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
+
+Sysfs will show :
+ max_perf_pct:100, which corresponds to 1 core ratio
+ min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
+ no_turbo:0, turbo is not disabled
+ num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
+ turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
+
+Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
+Volume 3: System Programming Guide" to understand ratios.
+
+cpufreq sysfs for Intel P-State
+
+Since this driver registers with cpufreq, cpufreq sysfs is also presented.
+There are some important differences, which need to be considered.
+
+scaling_cur_freq: This displays the real frequency which was used during
+the last sample period instead of what is requested. Some other cpufreq driver,
+like acpi-cpufreq, displays what is requested (Some changes are on the
+way to fix this for acpi-cpufreq driver). The same is true for frequencies
+displayed at /proc/cpuinfo.
+
+scaling_governor: This displays current active policy. Since each CPU has a
+cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
+is not possible with Intel P-States, as there is one common policy for all
+CPUs. Here, the last requested policy will be applicable to all CPUs. It is
+suggested that one use the cpupower utility to change policy to all CPUs at the
+same time.
+
+scaling_setspeed: This attribute can never be used with Intel P-State.
+
+scaling_max_freq/scaling_min_freq: This interface can be used similarly to
+the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
+are converted to nearest possible P-State, this is prone to rounding errors.
+This method is not preferred to limit performance.
+
+affected_cpus: Not used
+related_cpus: Not used
+
For contemporary Intel processors, the frequency is controlled by the
-processor itself and the P-states exposed to software are related to
+processor itself and the P-State exposed to software is related to
performance levels. The idea that frequency can be set to a single
-frequency is fiction for Intel Core processors. Even if the scaling
-driver selects a single P state the actual frequency the processor
+frequency is fictional for Intel Core processors. Even if the scaling
+driver selects a single P-State, the actual frequency the processor
will run at is selected by the processor itself.
-For legacy mode debugfs files have also been added to allow tuning of
-the internal governor algorythm. These files are located at
-/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode.
+Tuning Intel P-State driver
+
+When HWP mode is not used, debugfs files have also been added to allow the
+tuning of the internal governor algorithm. These files are located at
+/sys/kernel/debug/pstate_snb/. The algorithm uses a PID (Proportional
+Integral Derivative) controller. The PID tunable parameters are:
deadband
d_gain_pct
@@ -63,3 +133,90 @@ the internal governor algorythm. These files are located at
p_gain_pct
sample_rate_ms
setpoint
+
+To adjust these parameters, some understanding of driver implementation is
+necessary. There are some tweeks described here, but be very careful. Adjusting
+them requires expert level understanding of power and performance relationship.
+These limits are only useful when the "powersave" policy is active.
+
+-To make the system more responsive to load changes, sample_rate_ms can
+be adjusted (current default is 10ms).
+-To make the system use higher performance, even if the load is lower, setpoint
+can be adjusted to a lower number. This will also lead to faster ramp up time
+to reach the maximum P-State.
+If there are no derivative and integral coefficients, The next P-State will be
+equal to:
+ current P-State - ((setpoint - current cpu load) * p_gain_pct)
+
+For example, if the current PID parameters are (Which are defaults for the core
+processors like SandyBridge):
+ deadband = 0
+ d_gain_pct = 0
+ i_gain_pct = 0
+ p_gain_pct = 20
+ sample_rate_ms = 10
+ setpoint = 97
+
+If the current P-State = 0x08 and current load = 100, this will result in the
+next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
+goes up by only 1. If during next sample interval the current load doesn't
+change and still 100, then P-State goes up by one again. This process will
+continue as long as the load is more than the setpoint until the maximum P-State
+is reached.
+
+For the same load at setpoint = 60, this will result in the next P-State
+= 0x08 - ((60 - 100) * 0.2) = 16
+So by changing the setpoint from 97 to 60, there is an increase of the
+next P-State from 9 to 16. So this will make processor execute at higher
+P-State for the same CPU load. If the load continues to be more than the
+setpoint during next sample intervals, then P-State will go up again till the
+maximum P-State is reached. But the ramp up time to reach the maximum P-State
+will be much faster when the setpoint is 60 compared to 97.
+
+Debugging Intel P-State driver
+
+Event tracing
+To debug P-State transition, the Linux event tracing interface can be used.
+There are two specific events, which can be enabled (Provided the kernel
+configs related to event tracing are enabled).
+
+# cd /sys/kernel/debug/tracing/
+# echo 1 > events/power/pstate_sample/enable
+# echo 1 > events/power/cpu_frequency/enable
+# cat trace
+gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107
+ scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
+ freq=2474476
+cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
+
+
+Using ftrace
+
+If function level tracing is required, the Linux ftrace interface can be used.
+For example if we want to check how often a function to set a P-State is
+called, we can set ftrace filter to intel_pstate_set_pstate.
+
+# cd /sys/kernel/debug/tracing/
+# cat available_filter_functions | grep -i pstate
+intel_pstate_set_pstate
+intel_pstate_cpu_init
+...
+
+# echo intel_pstate_set_pstate > set_ftrace_filter
+# echo function > current_tracer
+# cat trace | head -15
+# tracer: function
+#
+# entries-in-buffer/entries-written: 80/80 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
+ gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
+ gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
+ <idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
index 9e3c3b33514c..0a94224ad296 100644
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
2.2 cpuinfo_transition_latency:
-------------------------------
-The cpuinfo_transition_latency field is 0. The PCC specification does
-not include a field to expose this value currently.
+The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
+does not include a field to expose this value currently.
2.3 cpuinfo_cur_freq:
---------------------
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index 3a07a87fef20..6aca64f289b6 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -242,6 +242,23 @@ nodes to be present and contain the properties described below.
Definition: Specifies the syscon node controlling the cpu core
power domains.
+ - dynamic-power-coefficient
+ Usage: optional
+ Value type: <prop-encoded-array>
+ Definition: A u32 value that represents the running time dynamic
+ power coefficient in units of mW/MHz/uVolt^2. The
+ coefficient can either be calculated from power
+ measurements or derived by analysis.
+
+ The dynamic power consumption of the CPU is
+ proportional to the square of the Voltage (V) and
+ the clock frequency (f). The coefficient is used to
+ calculate the dynamic power as below -
+
+ Pdyn = dynamic-power-coefficient * V^2 * f
+
+ where voltage is in uV, frequency is in MHz.
+
Example 1 (dual-cluster big.LITTLE system 32-bit):
cpus {
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
new file mode 100644
index 000000000000..d91a02a3b6b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
@@ -0,0 +1,91 @@
+Binding for ST's CPUFreq driver
+===============================
+
+ST's CPUFreq driver attempts to read 'process' and 'version' attributes
+from the SoC, then supplies the OPP framework with 'prop' and 'supported
+hardware' information respectively. The framework is then able to read
+the DT and operate in the usual way.
+
+For more information about the expected DT format [See: ../opp/opp.txt].
+
+Frequency Scaling only
+----------------------
+
+No vendor specific driver required for this.
+
+Located in CPU's node:
+
+- operating-points : [See: ../power/opp.txt]
+
+Example [safe]
+--------------
+
+cpus {
+ cpu@0 {
+ /* kHz uV */
+ operating-points = <1500000 0
+ 1200000 0
+ 800000 0
+ 500000 0>;
+ };
+};
+
+Dynamic Voltage and Frequency Scaling (DVFS)
+--------------------------------------------
+
+This requires the ST CPUFreq driver to supply 'process' and 'version' info.
+
+Located in CPU's node:
+
+- operating-points-v2 : [See ../power/opp.txt]
+
+Example [unsafe]
+----------------
+
+cpus {
+ cpu@0 {
+ operating-points-v2 = <&cpu0_opp_table>;
+ };
+};
+
+cpu0_opp_table: opp_table {
+ compatible = "operating-points-v2";
+
+ /* ############################################################### */
+ /* # WARNING: Do not attempt to copy/replicate these nodes, # */
+ /* # they are only to be supplied by the bootloader !!! # */
+ /* ############################################################### */
+ opp0 {
+ /* Major Minor Substrate */
+ /* 2 all all */
+ opp-supported-hw = <0x00000004 0xffffffff 0xffffffff>;
+ opp-hz = /bits/ 64 <1500000000>;
+ clock-latency-ns = <10000000>;
+
+ opp-microvolt-pcode0 = <1200000>;
+ opp-microvolt-pcode1 = <1200000>;
+ opp-microvolt-pcode2 = <1200000>;
+ opp-microvolt-pcode3 = <1200000>;
+ opp-microvolt-pcode4 = <1170000>;
+ opp-microvolt-pcode5 = <1140000>;
+ opp-microvolt-pcode6 = <1100000>;
+ opp-microvolt-pcode7 = <1070000>;
+ };
+
+ opp1 {
+ /* Major Minor Substrate */
+ /* all all all */
+ opp-supported-hw = <0xffffffff 0xffffffff 0xffffffff>;
+ opp-hz = /bits/ 64 <1200000000>;
+ clock-latency-ns = <10000000>;
+
+ opp-microvolt-pcode0 = <1110000>;
+ opp-microvolt-pcode1 = <1150000>;
+ opp-microvolt-pcode2 = <1100000>;
+ opp-microvolt-pcode3 = <1080000>;
+ opp-microvolt-pcode4 = <1040000>;
+ opp-microvolt-pcode5 = <1020000>;
+ opp-microvolt-pcode6 = <980000>;
+ opp-microvolt-pcode7 = <930000>;
+ };
+};
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index b1f8a73e5a94..0031069b64c9 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -6,6 +6,8 @@
config ARM_BIG_LITTLE_CPUFREQ
tristate "Generic ARM big LITTLE CPUfreq driver"
depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK
+ # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y
+ depends on !CPU_THERMAL || THERMAL
select PM_OPP
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
@@ -217,6 +219,16 @@ config ARM_SPEAR_CPUFREQ
help
This adds the CPUFreq driver support for SPEAr SOCs.
+config ARM_STI_CPUFREQ
+ tristate "STi CPUFreq support"
+ depends on SOC_STIH407
+ help
+ This driver uses the generic OPP framework to match the running
+ platform with a predefined set of suitable values. If not provided
+ we will fall-back so safe-values contained in Device Tree. Enable
+ this config option if you wish to add CPUFreq support for STi based
+ SoCs.
+
config ARM_TEGRA20_CPUFREQ
bool "Tegra20 CPUFreq support"
depends on ARCH_TEGRA
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index c0af1a1281c8..9e63fb1b09f8 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -73,6 +73,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o
obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o
obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o
obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o
+obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o
obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o
obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o
obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index cec1ee2d2f74..51eef87bbc37 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -135,7 +135,7 @@ static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
wrmsr_on_cpus(cpumask, msr_addr, msrs);
}
-static int _store_boost(int val)
+static int set_boost(int val)
{
get_online_cpus();
boost_set_msrs(val, cpu_online_mask);
@@ -158,29 +158,24 @@ static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
cpufreq_freq_attr_ro(freqdomain_cpus);
#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
-static ssize_t store_boost(const char *buf, size_t count)
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
{
int ret;
- unsigned long val = 0;
+ unsigned int val = 0;
- if (!acpi_cpufreq_driver.boost_supported)
+ if (!acpi_cpufreq_driver.set_boost)
return -EINVAL;
- ret = kstrtoul(buf, 10, &val);
- if (ret || (val > 1))
+ ret = kstrtouint(buf, 10, &val);
+ if (ret || val > 1)
return -EINVAL;
- _store_boost((int) val);
+ set_boost(val);
return count;
}
-static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
- size_t count)
-{
- return store_boost(buf, count);
-}
-
static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
{
return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
@@ -905,7 +900,6 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
.resume = acpi_cpufreq_resume,
.name = "acpi-cpufreq",
.attr = acpi_cpufreq_attr,
- .set_boost = _store_boost,
};
static void __init acpi_cpufreq_boost_init(void)
@@ -916,7 +910,7 @@ static void __init acpi_cpufreq_boost_init(void)
if (!msrs)
return;
- acpi_cpufreq_driver.boost_supported = true;
+ acpi_cpufreq_driver.set_boost = set_boost;
acpi_cpufreq_driver.boost_enabled = boost_state(0);
cpu_notifier_register_begin();
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index c5d256caa664..c251247ae661 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -23,6 +23,7 @@
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -55,6 +56,7 @@ static bool bL_switching_enabled;
#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
+static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
static struct cpufreq_arm_bL_ops *arm_bL_ops;
static struct clk *clk[MAX_CLUSTERS];
static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
@@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
static int bL_cpufreq_exit(struct cpufreq_policy *policy)
{
struct device *cpu_dev;
+ int cur_cluster = cpu_to_cluster(policy->cpu);
+
+ if (cur_cluster < MAX_CLUSTERS) {
+ cpufreq_cooling_unregister(cdev[cur_cluster]);
+ cdev[cur_cluster] = NULL;
+ }
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
@@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy)
return 0;
}
+static void bL_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ struct device *cpu_dev = get_cpu_device(policy->cpu);
+ int cur_cluster = cpu_to_cluster(policy->cpu);
+ struct device_node *np;
+
+ /* Do not register a cpu_cooling device if we are in IKS mode */
+ if (cur_cluster >= MAX_CLUSTERS)
+ return;
+
+ np = of_node_get(cpu_dev->of_node);
+ if (WARN_ON(!np))
+ return;
+
+ if (of_find_property(np, "#cooling-cells", NULL)) {
+ u32 power_coefficient = 0;
+
+ of_property_read_u32(np, "dynamic-power-coefficient",
+ &power_coefficient);
+
+ cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
+ policy->related_cpus, power_coefficient, NULL);
+ if (IS_ERR(cdev[cur_cluster])) {
+ dev_err(cpu_dev,
+ "running cpufreq without cooling device: %ld\n",
+ PTR_ERR(cdev[cur_cluster]));
+ cdev[cur_cluster] = NULL;
+ }
+ }
+ of_node_put(np);
+}
+
static struct cpufreq_driver bL_cpufreq_driver = {
.name = "arm-big-little",
.flags = CPUFREQ_STICKY |
@@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
.get = bL_cpufreq_get_rate,
.init = bL_cpufreq_init,
.exit = bL_cpufreq_exit,
+ .ready = bL_cpufreq_ready,
.attr = cpufreq_generic_attr,
};
diff --git a/drivers/cpufreq/blackfin-cpufreq.c b/drivers/cpufreq/blackfin-cpufreq.c
index a9f8e5bd0716..12e97d8a9db0 100644
--- a/drivers/cpufreq/blackfin-cpufreq.c
+++ b/drivers/cpufreq/blackfin-cpufreq.c
@@ -112,7 +112,7 @@ static unsigned int bfin_getfreq_khz(unsigned int cpu)
}
#ifdef CONFIG_BF60x
-unsigned long cpu_set_cclk(int cpu, unsigned long new)
+static int cpu_set_cclk(int cpu, unsigned long new)
{
struct clk *clk;
int ret;
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 90d64081ddb3..9bc37c437874 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -50,7 +50,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
struct private_data *priv = policy->driver_data;
struct device *cpu_dev = priv->cpu_dev;
struct regulator *cpu_reg = priv->cpu_reg;
- unsigned long volt = 0, volt_old = 0, tol = 0;
+ unsigned long volt = 0, tol = 0;
+ int volt_old = 0;
unsigned int old_freq, new_freq;
long freq_Hz, freq_exact;
int ret;
@@ -83,7 +84,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
opp_freq / 1000, volt);
}
- dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n",
+ dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n",
old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1,
new_freq / 1000, volt ? volt / 1000 : -1);
@@ -407,8 +408,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
* thermal DT code takes care of matching them.
*/
if (of_find_property(np, "#cooling-cells", NULL)) {
- priv->cdev = of_cpufreq_cooling_register(np,
- policy->related_cpus);
+ u32 power_coefficient = 0;
+
+ of_property_read_u32(np, "dynamic-power-coefficient",
+ &power_coefficient);
+
+ priv->cdev = of_cpufreq_power_cooling_register(np,
+ policy->related_cpus, power_coefficient, NULL);
if (IS_ERR(priv->cdev)) {
dev_err(priv->cpu_dev,
"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8412ce5f93a7..c35e7da1ed7a 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2330,29 +2330,15 @@ int cpufreq_boost_trigger_state(int state)
return ret;
}
-int cpufreq_boost_supported(void)
+static bool cpufreq_boost_supported(void)
{
- if (likely(cpufreq_driver))
- return cpufreq_driver->boost_supported;
-
- return 0;
+ return likely(cpufreq_driver) && cpufreq_driver->set_boost;
}
-EXPORT_SYMBOL_GPL(cpufreq_boost_supported);
static int create_boost_sysfs_file(void)
{
int ret;
- if (!cpufreq_boost_supported())
- return 0;
-
- /*
- * Check if driver provides function to enable boost -
- * if not, use cpufreq_boost_set_sw as default
- */
- if (!cpufreq_driver->set_boost)
- cpufreq_driver->set_boost = cpufreq_boost_set_sw;
-
ret = sysfs_create_file(cpufreq_global_kobject, &boost.attr);
if (ret)
pr_err("%s: cannot register global BOOST sysfs file\n",
@@ -2375,7 +2361,7 @@ int cpufreq_enable_boost_support(void)
if (cpufreq_boost_supported())
return 0;
- cpufreq_driver->boost_supported = true;
+ cpufreq_driver->set_boost = cpufreq_boost_set_sw;
/* This will get removed on driver unregister */
return create_boost_sysfs_file();
@@ -2435,9 +2421,11 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
if (driver_data->setpolicy)
driver_data->flags |= CPUFREQ_CONST_LOOPS;
- ret = create_boost_sysfs_file();
- if (ret)
- goto err_null_driver;
+ if (cpufreq_boost_supported()) {
+ ret = create_boost_sysfs_file();
+ if (ret)
+ goto err_null_driver;
+ }
ret = subsys_interface_register(&cpufreq_interface);
if (ret)
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 1fa1deb6e91f..606ad74abe6e 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -115,13 +115,13 @@ static void cs_check_cpu(int cpu, unsigned int load)
}
}
-static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs,
- struct dbs_data *dbs_data, bool modify_all)
+static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
{
+ struct dbs_data *dbs_data = policy->governor_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
if (modify_all)
- dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu);
+ dbs_check_cpu(dbs_data, policy->cpu);
return delay_for_sampling_rate(cs_tuners->sampling_rate);
}
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index b260576ddb12..bab3a514ec12 100644
--- a/