diff options
| author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-01-12 01:11:25 +0100 |
|---|---|---|
| committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-01-12 01:11:25 +0100 |
| commit | b366f976323d5d9ccb612e20afaaddf1fb84418d (patch) | |
| tree | 3d3a357a309d7ed82dbd14ce7de1ea6c34653856 | |
| parent | 7f4a3702bda0f9f5d34f0241cc81467a55162d7a (diff) | |
| parent | a032d2de0b5f17631844b34481c61cb799d0af6b (diff) | |
| download | linux-b366f976323d5d9ccb612e20afaaddf1fb84418d.tar.gz linux-b366f976323d5d9ccb612e20afaaddf1fb84418d.tar.bz2 linux-b366f976323d5d9ccb612e20afaaddf1fb84418d.zip | |
Merge branch 'pm-cpufreq'
* pm-cpufreq: (30 commits)
Documentation: cpufreq: intel_pstate: enhance documentation
cpufreq-dt: fix handling regulator_get_voltage() result
cpufreq: governor: Fix negative idle_time when configured with CONFIG_HZ_PERIODIC
cpufreq: mt8173: migrate to use operating-points-v2 bindings
cpufreq: Simplify core code related to boost support
cpufreq: acpi-cpufreq: Simplify boost-related code
cpufreq: Make cpufreq_boost_supported() static
blackfin-cpufreq: Mark cpu_set_cclk() as static
blackfin-cpufreq: Change return type of cpu_set_cclk() to that of clk_set_rate()
dt: cpufreq: st: Provide bindings for ST's CPUFreq implementation
cpufreq: st: Provide runtime initialised driver for ST's platforms
cpufreq: mt8173: Move resources allocation into ->probe()
cpufreq: intel_pstate: Account for IO wait time
cpufreq: intel_pstate: Account for non C0 time
cpufreq: intel_pstate: Configurable algorithm to get target pstate
cpufreq: mt8173: check return value of regulator_get_voltage() call
cpufreq: mt8173: remove redundant regulator_get_voltage() call
cpufreq: mt8173: add CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag
cpufreq: qoriq: Register cooling device based on device tree
cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency
...
| -rw-r--r-- | Documentation/cpu-freq/intel-pstate.txt | 241 | ||||
| -rw-r--r-- | Documentation/cpu-freq/pcc-cpufreq.txt | 4 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/arm/cpus.txt | 17 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt | 91 | ||||
| -rw-r--r-- | drivers/cpufreq/Kconfig.arm | 12 | ||||
| -rw-r--r-- | drivers/cpufreq/Makefile | 1 | ||||
| -rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 24 | ||||
| -rw-r--r-- | drivers/cpufreq/arm_big_little.c | 41 | ||||
| -rw-r--r-- | drivers/cpufreq/blackfin-cpufreq.c | 2 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq-dt.c | 14 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq.c | 28 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 6 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 146 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 18 | ||||
| -rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 61 | ||||
| -rw-r--r-- | drivers/cpufreq/intel_pstate.c | 73 | ||||
| -rw-r--r-- | drivers/cpufreq/mt8173-cpufreq.c | 135 | ||||
| -rw-r--r-- | drivers/cpufreq/pcc-cpufreq.c | 2 | ||||
| -rw-r--r-- | drivers/cpufreq/qoriq-cpufreq.c | 24 | ||||
| -rw-r--r-- | drivers/cpufreq/sti-cpufreq.c | 294 | ||||
| -rw-r--r-- | include/linux/cpufreq.h | 6 |
21 files changed, 1014 insertions, 226 deletions
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index be8d4006bf76..f7b12c071d53 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -1,61 +1,131 @@ -Intel P-state driver +Intel P-State driver -------------------- -This driver provides an interface to control the P state selection for -SandyBridge+ Intel processors. The driver can operate two different -modes based on the processor model, legacy mode and Hardware P state (HWP) -mode. - -In legacy mode, the Intel P-state implements two internal governors, -performance and powersave, that differ from the general cpufreq governors of -the same name (the general cpufreq governors implement target(), whereas the -internal Intel P-state governors implement setpolicy()). The internal -performance governor sets the max_perf_pct and min_perf_pct to 100; that is, -the governor selects the highest available P state to maximize the performance -of the core. The internal powersave governor selects the appropriate P state -based on the current load on the CPU. - -In HWP mode P state selection is implemented in the processor -itself. The driver provides the interfaces between the cpufreq core and -the processor to control P state selection based on user preferences -and reporting frequency to the cpufreq core. In this mode the -internal Intel P-state governor code is disabled. - -In addition to the interfaces provided by the cpufreq core for -controlling frequency the driver provides sysfs files for -controlling P state selection. These files have been added to -/sys/devices/system/cpu/intel_pstate/ - - max_perf_pct: limits the maximum P state that will be requested by - the driver stated as a percentage of the available performance. The - available (P states) performance may be reduced by the no_turbo +This driver provides an interface to control the P-State selection for the +SandyBridge+ Intel processors. + +The following document explains P-States: +http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf +As stated in the document, P-State doesn’t exactly mean a frequency. However, for +the sake of the relationship with cpufreq, P-State and frequency are used +interchangeably. + +Understanding the cpufreq core governors and policies are important before +discussing more details about the Intel P-State driver. Based on what callbacks +a cpufreq driver provides to the cpufreq core, it can support two types of +drivers: +- with target_index() callback: In this mode, the drivers using cpufreq core +simply provide the minimum and maximum frequency limits and an additional +interface target_index() to set the current frequency. The cpufreq subsystem +has a number of scaling governors ("performance", "powersave", "ondemand", +etc.). Depending on which governor is in use, cpufreq core will call for +transitions to a specific frequency using target_index() callback. +- setpolicy() callback: In this mode, drivers do not provide target_index() +callback, so cpufreq core can't request a transition to a specific frequency. +The driver provides minimum and maximum frequency limits and callbacks to set a +policy. The policy in cpufreq sysfs is referred to as the "scaling governor". +The cpufreq core can request the driver to operate in any of the two policies: +"performance: and "powersave". The driver decides which frequency to use based +on the above policy selection considering minimum and maximum frequency limits. + +The Intel P-State driver falls under the latter category, which implements the +setpolicy() callback. This driver decides what P-State to use based on the +requested policy from the cpufreq core. If the processor is capable of +selecting its next P-State internally, then the driver will offload this +responsibility to the processor (aka HWP: Hardware P-States). If not, the +driver implements algorithms to select the next P-State. + +Since these policies are implemented in the driver, they are not same as the +cpufreq scaling governors implementation, even if they have the same name in +the cpufreq sysfs (scaling_governors). For example the "performance" policy is +similar to cpufreq’s "performance" governor, but "powersave" is completely +different than the cpufreq "powersave" governor. The strategy here is similar +to cpufreq "ondemand", where the requested P-State is related to the system load. + +Sysfs Interface + +In addition to the frequency-controlling interfaces provided by the cpufreq +core, the driver provides its own sysfs files to control the P-State selection. +These files have been added to /sys/devices/system/cpu/intel_pstate/. +Any changes made to these files are applicable to all CPUs (even in a +multi-package system). + + max_perf_pct: Limits the maximum P-State that will be requested by + the driver. It states it as a percentage of the available performance. The + available (P-State) performance may be reduced by the no_turbo setting described below. - min_perf_pct: limits the minimum P state that will be requested by - the driver stated as a percentage of the max (non-turbo) + min_perf_pct: Limits the minimum P-State that will be requested by + the driver. It states it as a percentage of the max (non-turbo) performance level. - no_turbo: limits the driver to selecting P states below the turbo + no_turbo: Limits the driver to selecting P-State below the turbo frequency range. - turbo_pct: displays the percentage of the total performance that - is supported by hardware that is in the turbo range. This number + turbo_pct: Displays the percentage of the total performance that + is supported by hardware that is in the turbo range. This number is independent of whether turbo has been disabled or not. - num_pstates: displays the number of pstates that are supported - by hardware. This number is independent of whether turbo has + num_pstates: Displays the number of P-States that are supported + by hardware. This number is independent of whether turbo has been disabled or not. +For example, if a system has these parameters: + Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State) + Max non turbo ratio: 0x17 + Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio) + +Sysfs will show : + max_perf_pct:100, which corresponds to 1 core ratio + min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio + no_turbo:0, turbo is not disabled + num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1) + turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates + +Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual +Volume 3: System Programming Guide" to understand ratios. + +cpufreq sysfs for Intel P-State + +Since this driver registers with cpufreq, cpufreq sysfs is also presented. +There are some important differences, which need to be considered. + +scaling_cur_freq: This displays the real frequency which was used during +the last sample period instead of what is requested. Some other cpufreq driver, +like acpi-cpufreq, displays what is requested (Some changes are on the +way to fix this for acpi-cpufreq driver). The same is true for frequencies +displayed at /proc/cpuinfo. + +scaling_governor: This displays current active policy. Since each CPU has a +cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this +is not possible with Intel P-States, as there is one common policy for all +CPUs. Here, the last requested policy will be applicable to all CPUs. It is +suggested that one use the cpupower utility to change policy to all CPUs at the +same time. + +scaling_setspeed: This attribute can never be used with Intel P-State. + +scaling_max_freq/scaling_min_freq: This interface can be used similarly to +the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies +are converted to nearest possible P-State, this is prone to rounding errors. +This method is not preferred to limit performance. + +affected_cpus: Not used +related_cpus: Not used + For contemporary Intel processors, the frequency is controlled by the -processor itself and the P-states exposed to software are related to +processor itself and the P-State exposed to software is related to performance levels. The idea that frequency can be set to a single -frequency is fiction for Intel Core processors. Even if the scaling -driver selects a single P state the actual frequency the processor +frequency is fictional for Intel Core processors. Even if the scaling +driver selects a single P-State, the actual frequency the processor will run at is selected by the processor itself. -For legacy mode debugfs files have also been added to allow tuning of -the internal governor algorythm. These files are located at -/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode. +Tuning Intel P-State driver + +When HWP mode is not used, debugfs files have also been added to allow the +tuning of the internal governor algorithm. These files are located at +/sys/kernel/debug/pstate_snb/. The algorithm uses a PID (Proportional +Integral Derivative) controller. The PID tunable parameters are: deadband d_gain_pct @@ -63,3 +133,90 @@ the internal governor algorythm. These files are located at p_gain_pct sample_rate_ms setpoint + +To adjust these parameters, some understanding of driver implementation is +necessary. There are some tweeks described here, but be very careful. Adjusting +them requires expert level understanding of power and performance relationship. +These limits are only useful when the "powersave" policy is active. + +-To make the system more responsive to load changes, sample_rate_ms can +be adjusted (current default is 10ms). +-To make the system use higher performance, even if the load is lower, setpoint +can be adjusted to a lower number. This will also lead to faster ramp up time +to reach the maximum P-State. +If there are no derivative and integral coefficients, The next P-State will be +equal to: + current P-State - ((setpoint - current cpu load) * p_gain_pct) + +For example, if the current PID parameters are (Which are defaults for the core +processors like SandyBridge): + deadband = 0 + d_gain_pct = 0 + i_gain_pct = 0 + p_gain_pct = 20 + sample_rate_ms = 10 + setpoint = 97 + +If the current P-State = 0x08 and current load = 100, this will result in the +next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State +goes up by only 1. If during next sample interval the current load doesn't +change and still 100, then P-State goes up by one again. This process will +continue as long as the load is more than the setpoint until the maximum P-State +is reached. + +For the same load at setpoint = 60, this will result in the next P-State += 0x08 - ((60 - 100) * 0.2) = 16 +So by changing the setpoint from 97 to 60, there is an increase of the +next P-State from 9 to 16. So this will make processor execute at higher +P-State for the same CPU load. If the load continues to be more than the +setpoint during next sample intervals, then P-State will go up again till the +maximum P-State is reached. But the ramp up time to reach the maximum P-State +will be much faster when the setpoint is 60 compared to 97. + +Debugging Intel P-State driver + +Event tracing +To debug P-State transition, the Linux event tracing interface can be used. +There are two specific events, which can be enabled (Provided the kernel +configs related to event tracing are enabled). + +# cd /sys/kernel/debug/tracing/ +# echo 1 > events/power/pstate_sample/enable +# echo 1 > events/power/cpu_frequency/enable +# cat trace +gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 + scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 + freq=2474476 +cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 + + +Using ftrace + +If function level tracing is required, the Linux ftrace interface can be used. +For example if we want to check how often a function to set a P-State is +called, we can set ftrace filter to intel_pstate_set_pstate. + +# cd /sys/kernel/debug/tracing/ +# cat available_filter_functions | grep -i pstate +intel_pstate_set_pstate +intel_pstate_cpu_init +... + +# echo intel_pstate_set_pstate > set_ftrace_filter +# echo function > current_tracer +# cat trace | head -15 +# tracer: function +# +# entries-in-buffer/entries-written: 80/80 #P:4 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func + <idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt index 9e3c3b33514c..0a94224ad296 100644 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ b/Documentation/cpu-freq/pcc-cpufreq.txt @@ -159,8 +159,8 @@ to be strictly associated with a P-state. 2.2 cpuinfo_transition_latency: ------------------------------- -The cpuinfo_transition_latency field is 0. The PCC specification does -not include a field to expose this value currently. +The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification +does not include a field to expose this value currently. 2.3 cpuinfo_cur_freq: --------------------- diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index 3a07a87fef20..6aca64f289b6 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -242,6 +242,23 @@ nodes to be present and contain the properties described below. Definition: Specifies the syscon node controlling the cpu core power domains. + - dynamic-power-coefficient + Usage: optional + Value type: <prop-encoded-array> + Definition: A u32 value that represents the running time dynamic + power coefficient in units of mW/MHz/uVolt^2. The + coefficient can either be calculated from power + measurements or derived by analysis. + + The dynamic power consumption of the CPU is + proportional to the square of the Voltage (V) and + the clock frequency (f). The coefficient is used to + calculate the dynamic power as below - + + Pdyn = dynamic-power-coefficient * V^2 * f + + where voltage is in uV, frequency is in MHz. + Example 1 (dual-cluster big.LITTLE system 32-bit): cpus { diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt new file mode 100644 index 000000000000..d91a02a3b6b0 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt @@ -0,0 +1,91 @@ +Binding for ST's CPUFreq driver +=============================== + +ST's CPUFreq driver attempts to read 'process' and 'version' attributes +from the SoC, then supplies the OPP framework with 'prop' and 'supported +hardware' information respectively. The framework is then able to read +the DT and operate in the usual way. + +For more information about the expected DT format [See: ../opp/opp.txt]. + +Frequency Scaling only +---------------------- + +No vendor specific driver required for this. + +Located in CPU's node: + +- operating-points : [See: ../power/opp.txt] + +Example [safe] +-------------- + +cpus { + cpu@0 { + /* kHz uV */ + operating-points = <1500000 0 + 1200000 0 + 800000 0 + 500000 0>; + }; +}; + +Dynamic Voltage and Frequency Scaling (DVFS) +-------------------------------------------- + +This requires the ST CPUFreq driver to supply 'process' and 'version' info. + +Located in CPU's node: + +- operating-points-v2 : [See ../power/opp.txt] + +Example [unsafe] +---------------- + +cpus { + cpu@0 { + operating-points-v2 = <&cpu0_opp_table>; + }; +}; + +cpu0_opp_table: opp_table { + compatible = "operating-points-v2"; + + /* ############################################################### */ + /* # WARNING: Do not attempt to copy/replicate these nodes, # */ + /* # they are only to be supplied by the bootloader !!! # */ + /* ############################################################### */ + opp0 { + /* Major Minor Substrate */ + /* 2 all all */ + opp-supported-hw = <0x00000004 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1500000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1200000>; + opp-microvolt-pcode1 = <1200000>; + opp-microvolt-pcode2 = <1200000>; + opp-microvolt-pcode3 = <1200000>; + opp-microvolt-pcode4 = <1170000>; + opp-microvolt-pcode5 = <1140000>; + opp-microvolt-pcode6 = <1100000>; + opp-microvolt-pcode7 = <1070000>; + }; + + opp1 { + /* Major Minor Substrate */ + /* all all all */ + opp-supported-hw = <0xffffffff 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1200000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1110000>; + opp-microvolt-pcode1 = <1150000>; + opp-microvolt-pcode2 = <1100000>; + opp-microvolt-pcode3 = <1080000>; + opp-microvolt-pcode4 = <1040000>; + opp-microvolt-pcode5 = <1020000>; + opp-microvolt-pcode6 = <980000>; + opp-microvolt-pcode7 = <930000>; + }; +}; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index b1f8a73e5a94..0031069b64c9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -6,6 +6,8 @@ config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK + # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y + depends on !CPU_THERMAL || THERMAL select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. @@ -217,6 +219,16 @@ config ARM_SPEAR_CPUFREQ help This adds the CPUFreq driver support for SPEAr SOCs. +config ARM_STI_CPUFREQ + tristate "STi CPUFreq support" + depends on SOC_STIH407 + help + This driver uses the generic OPP framework to match the running + platform with a predefined set of suitable values. If not provided + we will fall-back so safe-values contained in Device Tree. Enable + this config option if you wish to add CPUFreq support for STi based + SoCs. + config ARM_TEGRA20_CPUFREQ bool "Tegra20 CPUFreq support" depends on ARCH_TEGRA diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index c0af1a1281c8..9e63fb1b09f8 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -73,6 +73,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o +obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index cec1ee2d2f74..51eef87bbc37 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -135,7 +135,7 @@ static void boost_set_msrs(bool enable, const struct cpumask *cpumask) wrmsr_on_cpus(cpumask, msr_addr, msrs); } -static int _store_boost(int val) +static int set_boost(int val) { get_online_cpus(); boost_set_msrs(val, cpu_online_mask); @@ -158,29 +158,24 @@ static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) cpufreq_freq_attr_ro(freqdomain_cpus); #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB -static ssize_t store_boost(const char *buf, size_t count) +static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, + size_t count) { int ret; - unsigned long val = 0; + unsigned int val = 0; - if (!acpi_cpufreq_driver.boost_supported) + if (!acpi_cpufreq_driver.set_boost) return -EINVAL; - ret = kstrtoul(buf, 10, &val); - if (ret || (val > 1)) + ret = kstrtouint(buf, 10, &val); + if (ret || val > 1) return -EINVAL; - _store_boost((int) val); + set_boost(val); return count; } -static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, - size_t count) -{ - return store_boost(buf, count); -} - static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) { return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled); @@ -905,7 +900,6 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .resume = acpi_cpufreq_resume, .name = "acpi-cpufreq", .attr = acpi_cpufreq_attr, - .set_boost = _store_boost, }; static void __init acpi_cpufreq_boost_init(void) @@ -916,7 +910,7 @@ static void __init acpi_cpufreq_boost_init(void) if (!msrs) return; - acpi_cpufreq_driver.boost_supported = true; + acpi_cpufreq_driver.set_boost = set_boost; acpi_cpufreq_driver.boost_enabled = boost_state(0); cpu_notifier_register_begin(); diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index c5d256caa664..c251247ae661 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -23,6 +23,7 @@ #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/cpumask.h> +#include <linux/cpu_cooling.h> #include <linux/export.h> #include <linux/module.h> #include <linux/mutex.h> @@ -55,6 +56,7 @@ static bool bL_switching_enabled; #define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) #define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) +static struct thermal_cooling_device *cdev[MAX_CLUSTERS]; static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; @@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) static int bL_cpufreq_exit(struct cpufreq_policy *policy) { struct device *cpu_dev; + int cur_cluster = cpu_to_cluster(policy->cpu); + + if (cur_cluster < MAX_CLUSTERS) { + cpufreq_cooling_unregister(cdev[cur_cluster]); + cdev[cur_cluster] = NULL; + } cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy) return 0; } +static void bL_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct device *cpu_dev = get_cpu_device(policy->cpu); + int cur_cluster = cpu_to_cluster(policy->cpu); + struct device_node *np; + + /* Do not register a cpu_cooling device if we are in IKS mode */ + if (cur_cluster >= MAX_CLUSTERS) + return; + + np = of_node_get(cpu_dev->of_node); + if (WARN_ON(!np)) + return; + + if (of_find_property(np, "#cooling-cells", NULL)) { + u32 power_coefficient = 0; + + of_property_read_u32(np, "dynamic-power-coefficient", + &power_coefficient); + + cdev[cur_cluster] = of_cpufreq_power_cooling_register(np, + policy->related_cpus, power_coefficient, NULL); + if (IS_ERR(cdev[cur_cluster])) { + dev_err(cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(cdev[cur_cluster])); + cdev[cur_cluster] = NULL; + } + } + of_node_put(np); +} + static struct cpufreq_driver bL_cpufreq_driver = { .name = "arm-big-little", .flags = CPUFREQ_STICKY | @@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = { .get = bL_cpufreq_get_rate, .init = bL_cpufreq_init, .exit = bL_cpufreq_exit, + .ready = bL_cpufreq_ready, .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/blackfin-cpufreq.c b/drivers/cpufreq/blackfin-cpufreq.c index a9f8e5bd0716..12e97d8a9db0 100644 --- a/drivers/cpufreq/blackfin-cpufreq.c +++ b/drivers/cpufreq/blackfin-cpufreq.c @@ -112,7 +112,7 @@ static unsigned int bfin_getfreq_khz(unsigned int cpu) } #ifdef CONFIG_BF60x -unsigned long cpu_set_cclk(int cpu, unsigned long new) +static int cpu_set_cclk(int cpu, unsigned long new) { struct clk *clk; int ret; diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 90d64081ddb3..9bc37c437874 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -50,7 +50,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) struct private_data *priv = policy->driver_data; struct device *cpu_dev = priv->cpu_dev; struct regulator *cpu_reg = priv->cpu_reg; - unsigned long volt = 0, volt_old = 0, tol = 0; + unsigned long volt = 0, tol = 0; + int volt_old = 0; unsigned int old_freq, new_freq; long freq_Hz, freq_exact; int ret; @@ -83,7 +84,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index) opp_freq / 1000, volt); } - dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n", + dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, new_freq / 1000, volt ? volt / 1000 : -1); @@ -407,8 +408,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy) * thermal DT code takes care of matching them. */ if (of_find_property(np, "#cooling-cells", NULL)) { - priv->cdev = of_cpufreq_cooling_register(np, - policy->related_cpus); + u32 power_coefficient = 0; + + of_property_read_u32(np, "dynamic-power-coefficient", + &power_coefficient); + + priv->cdev = of_cpufreq_power_cooling_register(np, + policy->related_cpus, power_coefficient, NULL); if (IS_ERR(priv->cdev)) { dev_err(priv->cpu_dev, "running cpufreq without cooling device: %ld\n", diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8412ce5f93a7..c35e7da1ed7a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2330,29 +2330,15 @@ int cpufreq_boost_trigger_state(int state) return ret; } -int cpufreq_boost_supported(void) +static bool cpufreq_boost_supported(void) { - if (likely(cpufreq_driver)) - return cpufreq_driver->boost_supported; - - return 0; + return likely(cpufreq_driver) && cpufreq_driver->set_boost; } -EXPORT_SYMBOL_GPL(cpufreq_boost_supported); static int create_boost_sysfs_file(void) { int ret; - if (!cpufreq_boost_supported()) - return 0; - - /* - * Check if driver provides function to enable boost - - * if not, use cpufreq_boost_set_sw as default - */ - if (!cpufreq_driver->set_boost) - cpufreq_driver->set_boost = cpufreq_boost_set_sw; - ret = sysfs_create_file(cpufreq_global_kobject, &boost.attr); if (ret) pr_err("%s: cannot register global BOOST sysfs file\n", @@ -2375,7 +2361,7 @@ int cpufreq_enable_boost_support(void) if (cpufreq_boost_supported()) return 0; - cpufreq_driver->boost_supported = true; + cpufreq_driver->set_boost = cpufreq_boost_set_sw; /* This will get removed on driver unregister */ return create_boost_sysfs_file(); @@ -2435,9 +2421,11 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (driver_data->setpolicy) driver_data->flags |= CPUFREQ_CONST_LOOPS; - ret = create_boost_sysfs_file(); - if (ret) - goto err_null_driver; + if (cpufreq_boost_supported()) { + ret = create_boost_sysfs_file(); + if (ret) + goto err_null_driver; + } ret = subsys_interface_register(&cpufreq_interface); if (ret) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 1fa1deb6e91f..606ad74abe6e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -115,13 +115,13 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, - struct dbs_data *dbs_data, bool modify_all) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) { + struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; if (modify_all) - dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); + dbs_check_cpu(dbs_data, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } |
