summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/acpi/x86/s2idle.c52
-rw-r--r--drivers/base/power/domain.c15
-rw-r--r--drivers/base/power/wakeup.c5
-rw-r--r--drivers/cpufreq/Kconfig2
-rw-r--r--drivers/cpufreq/Kconfig.x8617
-rw-r--r--drivers/cpufreq/amd-pstate.c131
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--drivers/devfreq/exynos-bus.c1
-rw-r--r--drivers/devfreq/mtk-cci-devfreq.c3
-rw-r--r--drivers/idle/intel_idle.c231
-rw-r--r--drivers/pinctrl/pinctrl-amd.c6
-rw-r--r--drivers/platform/x86/amd/pmc.c4
-rw-r--r--drivers/powercap/Kconfig18
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/intel_rapl_common.c883
-rw-r--r--drivers/powercap/intel_rapl_msr.c31
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c325
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c11
-rw-r--r--include/acpi/actbl.h3
-rw-r--r--include/linux/amd-pstate.h4
-rw-r--r--include/linux/cpufreq.h5
-rw-r--r--include/linux/devfreq.h3
-rw-r--r--include/linux/intel_rapl.h40
-rw-r--r--include/linux/suspend.h14
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/power/snapshot.c2
27 files changed, 1324 insertions, 494 deletions
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 7214197c15a0..ce62e61a9605 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -59,6 +59,7 @@ static int lps0_dsm_func_mask;
static guid_t lps0_dsm_guid_microsoft;
static int lps0_dsm_func_mask_microsoft;
+static int lps0_dsm_state;
/* Device constraint entry structure */
struct lpi_device_info {
@@ -320,6 +321,44 @@ static void lpi_check_constraints(void)
}
}
+static bool acpi_s2idle_vendor_amd(void)
+{
+ return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+}
+
+static const char *acpi_sleep_dsm_state_to_str(unsigned int state)
+{
+ if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_OFF:
+ return "screen off";
+ case ACPI_LPS0_SCREEN_ON:
+ return "screen on";
+ case ACPI_LPS0_ENTRY:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT:
+ return "lps0 exit";
+ case ACPI_LPS0_MS_ENTRY:
+ return "lps0 ms entry";
+ case ACPI_LPS0_MS_EXIT:
+ return "lps0 ms exit";
+ }
+ } else {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_ON_AMD:
+ return "screen on";
+ case ACPI_LPS0_SCREEN_OFF_AMD:
+ return "screen off";
+ case ACPI_LPS0_ENTRY_AMD:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT_AMD:
+ return "lps0 exit";
+ }
+ }
+
+ return "unknown";
+}
+
static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid)
{
union acpi_object *out_obj;
@@ -331,14 +370,15 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, g
rev_id, func, NULL);
ACPI_FREE(out_obj);
- acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
- func, out_obj ? "successful" : "failed");
+ lps0_dsm_state = func;
+ if (pm_debug_messages_on) {
+ acpi_handle_info(lps0_device_handle,
+ "%s transitioned to state %s\n",
+ out_obj ? "Successfully" : "Failed to",
+ acpi_sleep_dsm_state_to_str(lps0_dsm_state));
+ }
}
-static bool acpi_s2idle_vendor_amd(void)
-{
- return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
-}
static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid)
{
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 32084e38b73d..5cb2023581d4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1632,9 +1632,6 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_dbg(dev, "%s()\n", __func__);
- if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
- return -EINVAL;
-
gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
@@ -1676,6 +1673,9 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
{
int ret;
+ if (!genpd || !dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
ret = genpd_add_device(genpd, dev, dev);
mutex_unlock(&gpd_list_lock);
@@ -2523,6 +2523,9 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
struct generic_pm_domain *genpd;
int ret;
+ if (!dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
genpd = genpd_get_from_provider(genpdspec);
@@ -2939,10 +2942,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
err = of_property_read_u32(state_node, "min-residency-us", &residency);
if (!err)
- genpd_state->residency_ns = 1000 * residency;
+ genpd_state->residency_ns = 1000LL * residency;
- genpd_state->power_on_latency_ns = 1000 * exit_latency;
- genpd_state->power_off_latency_ns = 1000 * entry_latency;
+ genpd_state->power_on_latency_ns = 1000LL * exit_latency;
+ genpd_state->power_off_latency_ns = 1000LL * entry_latency;
genpd_state->fwnode = &state_node->fwnode;
return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 7cc0c0cf8eaa..a917219feea6 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -19,11 +19,6 @@
#include "power.h"
-#ifndef CONFIG_SUSPEND
-suspend_state_t pm_suspend_target_state;
-#define pm_suspend_target_state (PM_SUSPEND_ON)
-#endif
-
#define list_for_each_entry_rcu_locked(pos, head, member) \
list_for_each_entry_rcu(pos, head, member, \
srcu_read_lock_held(&wakeup_srcu))
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c839bd2b051..a1c51abddbc5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -38,7 +38,7 @@ choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM
- default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP
+ default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP
default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
help
This option sets which CPUFreq governor shall be loaded at
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 00476e94db90..438c9e75a04d 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -51,6 +51,23 @@ config X86_AMD_PSTATE
If in doubt, say N.
+config X86_AMD_PSTATE_DEFAULT_MODE
+ int "AMD Processor P-State default mode"
+ depends on X86_AMD_PSTATE
+ default 3 if X86_AMD_PSTATE
+ range 1 4
+ help
+ Select the default mode the amd-pstate driver will use on
+ supported hardware.
+ The value set has the following meanings:
+ 1 -> Disabled
+ 2 -> Passive
+ 3 -> Active (EPP)
+ 4 -> Guided
+
+ For details, take a look at:
+ <file:Documentation/admin-guide/pm/amd-pstate.rst>.
+
config X86_AMD_PSTATE_UT
tristate "selftest for AMD Processor P-State driver"
depends on X86 && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ddd346a239e0..81fba0dcbee9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -62,7 +62,8 @@
static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
-static int cppc_state = AMD_PSTATE_DISABLE;
+static int cppc_state = AMD_PSTATE_UNDEFINED;
+static bool cppc_enabled;
/*
* AMD Energy Preference Performance (EPP)
@@ -228,7 +229,28 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
static inline int pstate_enable(bool enable)
{
- return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
+ int ret, cpu;
+ unsigned long logical_proc_id_mask = 0;
+
+ if (enable == cppc_enabled)
+ return 0;
+
+ for_each_present_cpu(cpu) {
+ unsigned long logical_id = topology_logical_die_id(cpu);
+
+ if (test_bit(logical_id, &logical_proc_id_mask))
+ continue;
+
+ set_bit(logical_id, &logical_proc_id_mask);
+
+ ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
+ enable);
+ if (ret)
+ return ret;
+ }
+
+ cppc_enabled = enable;
+ return 0;
}
static int cppc_enable(bool enable)
@@ -236,6 +258,9 @@ static int cppc_enable(bool enable)
int cpu, ret = 0;
struct cppc_perf_ctrls perf_ctrls;
+ if (enable == cppc_enabled)
+ return 0;
+
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
@@ -251,6 +276,7 @@ static int cppc_enable(bool enable)
}
}
+ cppc_enabled = enable;
return ret;
}
@@ -1045,6 +1071,26 @@ static const struct attribute_group amd_pstate_global_attr_group = {
.attrs = pstate_global_attributes,
};
+static bool amd_pstate_acpi_pm_profile_server(void)
+{
+ switch (acpi_gbl_FADT.preferred_profile) {
+ case PM_ENTERPRISE_SERVER:
+ case PM_SOHO_SERVER:
+ case PM_PERFORMANCE_SERVER:
+ return true;
+ }
+ return false;
+}
+
+static bool amd_pstate_acpi_pm_profile_undefined(void)
+{
+ if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
+ return true;
+ if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
+ return true;
+ return false;
+}
+
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -1102,10 +1148,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->max = policy->cpuinfo.max_freq;
/*
- * Set the policy to powersave to provide a valid fallback value in case
+ * Set the policy to provide a valid fallback value in case
* the default cpufreq governor is neither powersave nor performance.
*/
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ if (amd_pstate_acpi_pm_profile_server() ||
+ amd_pstate_acpi_pm_profile_undefined())
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
@@ -1356,10 +1406,29 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
- .name = "amd_pstate_epp",
+ .name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
+static int __init amd_pstate_set_driver(int mode_idx)
+{
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == AMD_PSTATE_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ current_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+ current_pstate_driver = &amd_pstate_driver;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
static int __init amd_pstate_init(void)
{
struct device *dev_root;
@@ -1367,15 +1436,6 @@ static int __init amd_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
- /*
- * by default the pstate driver is disabled to load
- * enable the amd_pstate passive mode driver explicitly
- * with amd_pstate=passive or other modes in kernel command line
- */
- if (cppc_state == AMD_PSTATE_DISABLE) {
- pr_info("driver load is disabled, boot with specific mode to enable this\n");
- return -ENODEV;
- }
if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
@@ -1386,6 +1446,33 @@ static int __init amd_pstate_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
+ switch (cppc_state) {
+ case AMD_PSTATE_UNDEFINED:
+ /* Disable on the following configs by default:
+ * 1. Undefined platforms
+ * 2. Server platforms
+ * 3. Shared memory designs
+ */
+ if (amd_pstate_acpi_pm_profile_undefined() ||
+ amd_pstate_acpi_pm_profile_server() ||
+ !boot_cpu_has(X86_FEATURE_CPPC)) {
+ pr_info("driver load is disabled, boot with specific mode to enable this\n");
+ return -ENODEV;
+ }
+ ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
+ if (ret)
+ return ret;
+ break;
+ case AMD_PSTATE_DISABLE:
+ return -ENODEV;
+ case AMD_PSTATE_PASSIVE:
+ case AMD_PSTATE_ACTIVE:
+ case AMD_PSTATE_GUIDED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
@@ -1438,21 +1525,7 @@ static int __init amd_pstate_param(char *str)
size = strlen(str);
mode_idx = get_mode_idx_from_str(str, size);
- if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
- cppc_state = mode_idx;
- if (cppc_state == AMD_PSTATE_DISABLE)
- pr_info("driver is explicitly disabled\n");
-
- if (cppc_state == AMD_PSTATE_ACTIVE)
- current_pstate_driver = &amd_pstate_epp_driver;
-
- if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
- current_pstate_driver = &amd_pstate_driver;
-
- return 0;
- }
-
- return -EINVAL;
+ return amd_pstate_set_driver(mode_idx);
}
early_param("amd_pstate", amd_pstate_param);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6b52ebe5a890..50bbc969ffe5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)) ||
(!driver_data->get_intermediate != !driver_data->target_intermediate) ||
- (!driver_data->online != !driver_data->offline))
+ (!driver_data->online != !driver_data->offline) ||
+ (driver_data->adjust_perf && !driver_data->fast_switch))
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2548ec92faa2..f29182512b98 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -824,6 +824,8 @@ static ssize_t store_energy_performance_preference(
err = cpufreq_start_governor(policy);
if (!ret)
ret = err;
+ } else {
+ ret = 0;
}
}
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 88414445adf3..245898f1a88e 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -518,6 +518,7 @@ static struct platform_driver exynos_bus_platdrv = {
};
module_platform_driver(exynos_bus_platdrv);
+MODULE_SOFTDEP("pre: exynos_ppmu");
MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c
index e5458ada5197..6354622eda65 100644
--- a/drivers/devfreq/mtk-cci-devfreq.c
+++ b/drivers/devfreq/mtk-cci-devfreq.c
@@ -127,7 +127,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev);
- struct clk *cci_pll = clk_get_parent(drv->cci_clk);
+ struct clk *cci_pll;
struct dev_pm_opp *opp;
unsigned long opp_rate;
int voltage, pre_voltage, inter_voltage, target_voltage, ret;
@@ -139,6 +139,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
return 0;
inter_voltage = drv->inter_voltage;
+ cci_pll = clk_get_parent(drv->cci_clk);
opp_rate = *freq;
opp = devfreq_recommended_opp(dev, &opp_rate, 1);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index aa2d19db2b1d..34201d7ef33e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
+static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ raw_safe_halt();
+ raw_local_irq_disable();
+ return index;
+}
+
+/**
+ * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the HLT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * Must be called under local_irq_disable().
+ */
+static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ return __intel_idle_hlt(dev, drv, index);
+}
+
+static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
+
+ raw_local_irq_enable();
+ ret = __intel_idle_hlt(dev, drv, index);
+ raw_local_irq_disable();
+
+ return ret;
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state vmguest_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
+ .exit_latency = 5,
+ .target_residency = 10,
+ .enter = &intel_idle_hlt, },
+ {
+ .name = "C1L",
+ .desc = "Long HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 5,
+ .target_residency = 200,
+ .enter = &intel_idle_hlt, },
+ {
+ .enter = NULL }
+};
+
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
+static void state_update_enter_method(struct cpuidle_state *state, int cstate)
+{
+ if (state->enter == intel_idle_hlt) {
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_hlt_irq_on;
+ }
+ return;
+ }
+ if (state->enter == intel_idle_hlt_irq_on)
+ return; /* no update scenarios */
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
+ /*
+ * Combining with XSTATE with IBRS or IRQ_ENABLE flags
+ * is not currently supported but this driver.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_xstate;
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ state->flags & CPUIDLE_FLAG_IBRS) {
+ /*
+ * IBRS mitigation requires that C-states are entered
+ * with interrupts disabled.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_ibrs;
+ return;
+ }
+
+ if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
+ state->enter = intel_idle_irq;
+ return;
+ }
+
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_irq;
+ }
+}
+
+/*
+ * For mwait based states, we want to verify the cpuid data to see if the state
+ * is actually supported by this specific CPU.
+ * For non-mwait based states, this check should be skipped.
+ */
+static bool should_verify_mwait(struct cpuidle_state *state)
+{
+ if (state->enter == intel_idle_hlt)
+ return false;
+ if (state->enter == intel_idle_hlt_irq_on)
+ return false;
+
+ return true;
+}
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
- if (!intel_idle_verify_cstate(mwait_hint))
+ if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
state = &drv->states[drv->state_count];
- if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
- /*
- * Combining with XSTATE with IBRS or IRQ_ENABLE flags
- * is not currently supported but this driver.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_xstate;
- } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
- state->flags & CPUIDLE_FLAG_IBRS) {
- /*
- * IBRS mitigation requires that C-states are entered
- * with interrupts disabled.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_ibrs;
- } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
- state->enter = intel_idle_irq;
- } else if (force_irq_on) {
- pr_info("forced intel_idle_irq for state %d\n", cstate);
- state->enter = intel_idle_irq;
- }
+ state_update_enter_method(state, cstate);
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
@@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+/*
+ * Match up the latency and break even point of the bare metal (cpu based)
+ * states with the deepest VM available state.
+ *
+ * We only want to do this for the deepest state, the ones that has
+ * the TLB_FLUSHED flag set on the .
+ *
+ * All our short idle states are dominated by vmexit/vmenter latencies,
+ * not the underlying hardware latencies so we keep our values for these.
+ */
+static void matchup_vm_state_with_baremetal(void)
+{
+ int cstate;
+
+ for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+ int matching_cstate;
+
+ if (intel_idle_max_cstate_reached(cstate))
+ break;
+
+ if (!cpuidle_state_table[cstate].enter)
+ break;
+
+ if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
+ continue;
+
+ for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
+ if (!icpu->state_table[matching_cstate].enter)
+ break;
+ if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
+ cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
+ cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
+ }
+ }
+
+ }
+}
+
+
+static int __init intel_idle_vminit(const struct x86_cpu_id *id)
+{
+ int retval;
+
+ cpuidle_state_table = vmguest_cstates;
+
+ icpu = (const struct idle_cpu *)id->driver_data;
+
+ pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
+ boot_cpu_data.x86_model);
+
+ intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (!intel_idle_cpuidle_devices)
+ return -ENOMEM;
+
+ /*
+ * We don't know exactly what the host will do when we go idle, but as a worst estimate
+ * we can assume that the exit latency of the deepest host state will be hit for our
+ * deep (long duration) guest idle state.
+ * The same logic applies to the break even point for the long duration guest idle state.
+ * So lets copy these two properties from the table we found for the host CPU type.
+ */
+ matchup_vm_state_with_baremetal();
+
+ intel_idle_cpuidle_driver_init(&intel_idle_driver);
+
+ retval = cpuidle_register_driver(&intel_idle_driver);
+ if (retval) {
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
+ drv ? drv->name : "none");
+ goto init_driver_fail;
+ }
+
+ retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
+ intel_idle_cpu_online, NULL);
+ if (retval < 0)
+ goto hp_setup_fail;
+
+ return 0;
+hp_setup_fail:
+ intel_idle_cpuidle_devices_uninit();
+ cpuidle_unregister_driver(&intel_idle_driver);
+init_driver_fail:
+ free_percpu(intel_idle_cpuidle_devices);
+ return retval;
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index f279b360c20d..43d3530bab48 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -30,6 +30,7 @@
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinmux.h>
+#include <linux/suspend.h>
#include "core.h"
#include "pinctrl-utils.h"
@@ -636,9 +637,8 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
regval = readl(regs + i);
if (regval & PIN_IRQ_PENDING)
- dev_dbg(&gpio_dev->pdev->dev,
- "GPIO %d is active: 0x%x",
- irqnr + i, regval);
+ pm_pr_dbg("GPIO %d is active: 0x%x",
+ irqnr + i, regval);
/* caused wake on resume context for shared IRQ */
if (irq < 0 && (regval & BIT(WAKE_STS_OFF)))
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index 427905714f79..1304cd6f13f6 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -543,7 +543,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
}
if (dev)
- dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val);
+ pm_pr_dbg("SMU idlemask s0i3: 0x%x\n", val);
if (s)
seq_printf(s, "SMU idlemask : 0x%x\n", val);
@@ -769,7 +769,7 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
*arg |= (duration << 16);
rc = rtc_alarm_irq_enable(rtc_device, 0);
- dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration);
+ pm_pr_dbg("wakeup timer programmed for %lld seconds\n", duration);
return rc;
}
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 90d33cd1b670..69ef8d081c98 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -18,10 +18,12 @@ if POWERCAP
# Client driver configurations go here.
config INTEL_RAPL_CORE
tristate
+ depends on PCI
+ select IOSF_MBI
config INTEL_RAPL
tristate "Intel RAPL Support via MSR Interface"
- depends on X86 && IOSF_MBI
+ depends on X86 && PCI
select INTEL_RAPL_CORE
help
This enables support for the Intel Running Average Power Limit (RAPL)
@@ -33,6 +35,20 @@ config INTEL_RAPL
controller, CPU core (Power Plane 0), graphics uncore (Power Plane
1), etc.
+config INTEL_RAPL_TPMI
+ tristate "Intel RAPL Support via TPMI Interface"
+ depends on X86
+ depends on INTEL_TPMI
+ select INTEL_RAPL_CORE
+ help
+ This enables support for the Intel Running Average Power Limit (RAPL)
+ technology via TPMI interface, which allows power limits to be enforced
+ and monitored.
+
+ In RAPL, the platform level settings are divided into domains for
+ fine grained control. These domains include processor package, DRAM
+ controller, platform, etc.
+
config IDLE_INJECT
bool "Idle injection framework"
depends on CPU_IDLE
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 4474201b4aa7..5ab0dce565b9 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -5,5 +5,6 @@ obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
+obj-$(CONFIG_INTEL_RAPL_TPMI) += intel_rapl_tpmi.o
obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 8970c7b80884..4e646e5e48f6 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -75,6 +75,15 @@
#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
+/* bitmasks for RAPL TPMI, used by primitive access functions */
+#define TPMI_POWER_LIMIT_MASK 0x3FFFF
+#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
+#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
+#define TPMI_INFO_SPEC_MASK 0x3FFFF
+#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
+#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
+#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)