diff options
27 files changed, 1324 insertions, 494 deletions
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 7214197c15a0..ce62e61a9605 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -59,6 +59,7 @@ static int lps0_dsm_func_mask; static guid_t lps0_dsm_guid_microsoft; static int lps0_dsm_func_mask_microsoft; +static int lps0_dsm_state; /* Device constraint entry structure */ struct lpi_device_info { @@ -320,6 +321,44 @@ static void lpi_check_constraints(void) } } +static bool acpi_s2idle_vendor_amd(void) +{ + return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; +} + +static const char *acpi_sleep_dsm_state_to_str(unsigned int state) +{ + if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) { + switch (state) { + case ACPI_LPS0_SCREEN_OFF: + return "screen off"; + case ACPI_LPS0_SCREEN_ON: + return "screen on"; + case ACPI_LPS0_ENTRY: + return "lps0 entry"; + case ACPI_LPS0_EXIT: + return "lps0 exit"; + case ACPI_LPS0_MS_ENTRY: + return "lps0 ms entry"; + case ACPI_LPS0_MS_EXIT: + return "lps0 ms exit"; + } + } else { + switch (state) { + case ACPI_LPS0_SCREEN_ON_AMD: + return "screen on"; + case ACPI_LPS0_SCREEN_OFF_AMD: + return "screen off"; + case ACPI_LPS0_ENTRY_AMD: + return "lps0 entry"; + case ACPI_LPS0_EXIT_AMD: + return "lps0 exit"; + } + } + + return "unknown"; +} + static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid) { union acpi_object *out_obj; @@ -331,14 +370,15 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, g rev_id, func, NULL); ACPI_FREE(out_obj); - acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", - func, out_obj ? "successful" : "failed"); + lps0_dsm_state = func; + if (pm_debug_messages_on) { + acpi_handle_info(lps0_device_handle, + "%s transitioned to state %s\n", + out_obj ? "Successfully" : "Failed to", + acpi_sleep_dsm_state_to_str(lps0_dsm_state)); + } } -static bool acpi_s2idle_vendor_amd(void) -{ - return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; -} static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid) { diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 32084e38b73d..5cb2023581d4 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1632,9 +1632,6 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, dev_dbg(dev, "%s()\n", __func__); - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) - return -EINVAL; - gpd_data = genpd_alloc_dev_data(dev, gd); if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); @@ -1676,6 +1673,9 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { int ret; + if (!genpd || !dev) + return -EINVAL; + mutex_lock(&gpd_list_lock); ret = genpd_add_device(genpd, dev, dev); mutex_unlock(&gpd_list_lock); @@ -2523,6 +2523,9 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) struct generic_pm_domain *genpd; int ret; + if (!dev) + return -EINVAL; + mutex_lock(&gpd_list_lock); genpd = genpd_get_from_provider(genpdspec); @@ -2939,10 +2942,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, err = of_property_read_u32(state_node, "min-residency-us", &residency); if (!err) - genpd_state->residency_ns = 1000 * residency; + genpd_state->residency_ns = 1000LL * residency; - genpd_state->power_on_latency_ns = 1000 * exit_latency; - genpd_state->power_off_latency_ns = 1000 * entry_latency; + genpd_state->power_on_latency_ns = 1000LL * exit_latency; + genpd_state->power_off_latency_ns = 1000LL * entry_latency; genpd_state->fwnode = &state_node->fwnode; return 0; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7cc0c0cf8eaa..a917219feea6 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -19,11 +19,6 @@ #include "power.h" -#ifndef CONFIG_SUSPEND -suspend_state_t pm_suspend_target_state; -#define pm_suspend_target_state (PM_SUSPEND_ON) -#endif - #define list_for_each_entry_rcu_locked(pos, head, member) \ list_for_each_entry_rcu(pos, head, member, \ srcu_read_lock_held(&wakeup_srcu)) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 2c839bd2b051..a1c51abddbc5 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -38,7 +38,7 @@ choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM - default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP + default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP default CPU_FREQ_DEFAULT_GOV_PERFORMANCE help This option sets which CPUFreq governor shall be loaded at diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 00476e94db90..438c9e75a04d 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -51,6 +51,23 @@ config X86_AMD_PSTATE If in doubt, say N. +config X86_AMD_PSTATE_DEFAULT_MODE + int "AMD Processor P-State default mode" + depends on X86_AMD_PSTATE + default 3 if X86_AMD_PSTATE + range 1 4 + help + Select the default mode the amd-pstate driver will use on + supported hardware. + The value set has the following meanings: + 1 -> Disabled + 2 -> Passive + 3 -> Active (EPP) + 4 -> Guided + + For details, take a look at: + <file:Documentation/admin-guide/pm/amd-pstate.rst>. + config X86_AMD_PSTATE_UT tristate "selftest for AMD Processor P-State driver" depends on X86 && ACPI_PROCESSOR diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ddd346a239e0..81fba0dcbee9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -62,7 +62,8 @@ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; -static int cppc_state = AMD_PSTATE_DISABLE; +static int cppc_state = AMD_PSTATE_UNDEFINED; +static bool cppc_enabled; /* * AMD Energy Preference Performance (EPP) @@ -228,7 +229,28 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, static inline int pstate_enable(bool enable) { - return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); + int ret, cpu; + unsigned long logical_proc_id_mask = 0; + + if (enable == cppc_enabled) + return 0; + + for_each_present_cpu(cpu) { + unsigned long logical_id = topology_logical_die_id(cpu); + + if (test_bit(logical_id, &logical_proc_id_mask)) + continue; + + set_bit(logical_id, &logical_proc_id_mask); + + ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, + enable); + if (ret) + return ret; + } + + cppc_enabled = enable; + return 0; } static int cppc_enable(bool enable) @@ -236,6 +258,9 @@ static int cppc_enable(bool enable) int cpu, ret = 0; struct cppc_perf_ctrls perf_ctrls; + if (enable == cppc_enabled) + return 0; + for_each_present_cpu(cpu) { ret = cppc_set_enable(cpu, enable); if (ret) @@ -251,6 +276,7 @@ static int cppc_enable(bool enable) } } + cppc_enabled = enable; return ret; } @@ -1045,6 +1071,26 @@ static const struct attribute_group amd_pstate_global_attr_group = { .attrs = pstate_global_attributes, }; +static bool amd_pstate_acpi_pm_profile_server(void) +{ + switch (acpi_gbl_FADT.preferred_profile) { + case PM_ENTERPRISE_SERVER: + case PM_SOHO_SERVER: + case PM_PERFORMANCE_SERVER: + return true; + } + return false; +} + +static bool amd_pstate_acpi_pm_profile_undefined(void) +{ + if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) + return true; + if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) + return true; + return false; +} + static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -1102,10 +1148,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->max = policy->cpuinfo.max_freq; /* - * Set the policy to powersave to provide a valid fallback value in case + * Set the policy to provide a valid fallback value in case * the default cpufreq governor is neither powersave nor performance. */ - policy->policy = CPUFREQ_POLICY_POWERSAVE; + if (amd_pstate_acpi_pm_profile_server() || + amd_pstate_acpi_pm_profile_undefined()) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; if (boot_cpu_has(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); @@ -1356,10 +1406,29 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, - .name = "amd_pstate_epp", + .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; +static int __init amd_pstate_set_driver(int mode_idx) +{ + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { + cppc_state = mode_idx; + if (cppc_state == AMD_PSTATE_DISABLE) + pr_info("driver is explicitly disabled\n"); + + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) + current_pstate_driver = &amd_pstate_driver; + + return 0; + } + + return -EINVAL; +} + static int __init amd_pstate_init(void) { struct device *dev_root; @@ -1367,15 +1436,6 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; - /* - * by default the pstate driver is disabled to load - * enable the amd_pstate passive mode driver explicitly - * with amd_pstate=passive or other modes in kernel command line - */ - if (cppc_state == AMD_PSTATE_DISABLE) { - pr_info("driver load is disabled, boot with specific mode to enable this\n"); - return -ENODEV; - } if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); @@ -1386,6 +1446,33 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; + switch (cppc_state) { + case AMD_PSTATE_UNDEFINED: + /* Disable on the following configs by default: + * 1. Undefined platforms + * 2. Server platforms + * 3. Shared memory designs + */ + if (amd_pstate_acpi_pm_profile_undefined() || + amd_pstate_acpi_pm_profile_server() || + !boot_cpu_has(X86_FEATURE_CPPC)) { + pr_info("driver load is disabled, boot with specific mode to enable this\n"); + return -ENODEV; + } + ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); + if (ret) + return ret; + break; + case AMD_PSTATE_DISABLE: + return -ENODEV; + case AMD_PSTATE_PASSIVE: + case AMD_PSTATE_ACTIVE: + case AMD_PSTATE_GUIDED: + break; + default: + return -EINVAL; + } + /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); @@ -1438,21 +1525,7 @@ static int __init amd_pstate_param(char *str) size = strlen(str); mode_idx = get_mode_idx_from_str(str, size); - if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { - cppc_state = mode_idx; - if (cppc_state == AMD_PSTATE_DISABLE) - pr_info("driver is explicitly disabled\n"); - - if (cppc_state == AMD_PSTATE_ACTIVE) - current_pstate_driver = &amd_pstate_epp_driver; - - if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) - current_pstate_driver = &amd_pstate_driver; - - return 0; - } - - return -EINVAL; + return amd_pstate_set_driver(mode_idx); } early_param("amd_pstate", amd_pstate_param); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6b52ebe5a890..50bbc969ffe5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) (driver_data->setpolicy && (driver_data->target_index || driver_data->target)) || (!driver_data->get_intermediate != !driver_data->target_intermediate) || - (!driver_data->online != !driver_data->offline)) + (!driver_data->online != !driver_data->offline) || + (driver_data->adjust_perf && !driver_data->fast_switch)) return -EINVAL; pr_debug("trying to register driver %s\n", driver_data->name); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2548ec92faa2..f29182512b98 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -824,6 +824,8 @@ static ssize_t store_energy_performance_preference( err = cpufreq_start_governor(policy); if (!ret) ret = err; + } else { + ret = 0; } } diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 88414445adf3..245898f1a88e 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -518,6 +518,7 @@ static struct platform_driver exynos_bus_platdrv = { }; module_platform_driver(exynos_bus_platdrv); +MODULE_SOFTDEP("pre: exynos_ppmu"); MODULE_DESCRIPTION("Generic Exynos Bus frequency driver"); MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c index e5458ada5197..6354622eda65 100644 --- a/drivers/devfreq/mtk-cci-devfreq.c +++ b/drivers/devfreq/mtk-cci-devfreq.c @@ -127,7 +127,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq, u32 flags) { struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev); - struct clk *cci_pll = clk_get_parent(drv->cci_clk); + struct clk *cci_pll; struct dev_pm_opp *opp; unsigned long opp_rate; int voltage, pre_voltage, inter_voltage, target_voltage, ret; @@ -139,6 +139,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq, return 0; inter_voltage = drv->inter_voltage; + cci_pll = clk_get_parent(drv->cci_clk); opp_rate = *freq; opp = devfreq_recommended_opp(dev, &opp_rate, 1); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index aa2d19db2b1d..34201d7ef33e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, return __intel_idle(dev, drv, index); } +static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + raw_safe_halt(); + raw_local_irq_disable(); + return index; +} + +/** + * intel_idle_hlt - Ask the processor to enter the given idle state using hlt. + * @dev: cpuidle device of the target CPU. + * @drv: cpuidle driver (assumed to point to intel_idle_driver). + * @index: Target idle state index. + * + * Use the HLT instruction to notify the processor that the CPU represented by + * @dev is idle and it can try to enter the idle state corresponding to @index. + * + * Must be called under local_irq_disable(). + */ +static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + return __intel_idle_hlt(dev, drv, index); +} + +static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int ret; + + raw_local_irq_enable(); + ret = __intel_idle_hlt(dev, drv, index); + raw_local_irq_disable(); + + return ret; +} + /** * intel_idle_s2idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state vmguest_cstates[] __initdata = { + { + .name = "C1", + .desc = "HLT", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, + .exit_latency = 5, + .target_residency = 10, + .enter = &intel_idle_hlt, }, + { + .name = "C1L", + .desc = "Long HLT", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 5, + .target_residency = 200, + .enter = &intel_idle_hlt, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) return true; } +static void state_update_enter_method(struct cpuidle_state *state, int cstate) +{ + if (state->enter == intel_idle_hlt) { + if (force_irq_on) { + pr_info("forced intel_idle_irq for state %d\n", cstate); + state->enter = intel_idle_hlt_irq_on; + } + return; + } + if (state->enter == intel_idle_hlt_irq_on) + return; /* no update scenarios */ + + if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { + /* + * Combining with XSTATE with IBRS or IRQ_ENABLE flags + * is not currently supported but this driver. + */ + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); + state->enter = intel_idle_xstate; + return; + } + + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + state->flags & CPUIDLE_FLAG_IBRS) { + /* + * IBRS mitigation requires that C-states are entered + * with interrupts disabled. + */ + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); + state->enter = intel_idle_ibrs; + return; + } + + if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { + state->enter = intel_idle_irq; + return; + } + + if (force_irq_on) { + pr_info("forced intel_idle_irq for state %d\n", cstate); + state->enter = intel_idle_irq; + } +} + +/* + * For mwait based states, we want to verify the cpuid data to see if the state + * is actually supported by this specific CPU. + * For non-mwait based states, this check should be skipped. + */ +static bool should_verify_mwait(struct cpuidle_state *state) +{ + if (state->enter == intel_idle_hlt) + return false; + if (state->enter == intel_idle_hlt_irq_on) + return false; + + return true; +} + static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; @@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) } mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); - if (!intel_idle_verify_cstate(mwait_hint)) + if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint)) continue; /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; state = &drv->states[drv->state_count]; - if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { - /* - * Combining with XSTATE with IBRS or IRQ_ENABLE flags - * is not currently supported but this driver. - */ - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); - state->enter = intel_idle_xstate; - } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && - state->flags & CPUIDLE_FLAG_IBRS) { - /* - * IBRS mitigation requires that C-states are entered - * with interrupts disabled. - */ - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); - state->enter = intel_idle_ibrs; - } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { - state->enter = intel_idle_irq; - } else if (force_irq_on) { - pr_info("forced intel_idle_irq for state %d\n", cstate); - state->enter = intel_idle_irq; - } + state_update_enter_method(state, cstate); + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && @@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void) cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); } +/* + * Match up the latency and break even point of the bare metal (cpu based) + * states with the deepest VM available state. + * + * We only want to do this for the deepest state, the ones that has + * the TLB_FLUSHED flag set on the . + * + * All our short idle states are dominated by vmexit/vmenter latencies, + * not the underlying hardware latencies so we keep our values for these. + */ +static void matchup_vm_state_with_baremetal(void) +{ + int cstate; + + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int matching_cstate; + + if (intel_idle_max_cstate_reached(cstate)) + break; + + if (!cpuidle_state_table[cstate].enter) + break; + + if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED)) + continue; + + for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) { + if (!icpu->state_table[matching_cstate].enter) + break; + if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) { + cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency; + cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency; + } + } + + } +} + + +static int __init intel_idle_vminit(const struct x86_cpu_id *id) +{ + int retval; + + cpuidle_state_table = vmguest_cstates; + + icpu = (const struct idle_cpu *)id->driver_data; + + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", + boot_cpu_data.x86_model); + + intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (!intel_idle_cpuidle_devices) + return -ENOMEM; + + /* + * We don't know exactly what the host will do when we go idle, but as a worst estimate + * we can assume that the exit latency of the deepest host state will be hit for our + * deep (long duration) guest idle state. + * The same logic applies to the break even point for the long duration guest idle state. + * So lets copy these two properties from the table we found for the host CPU type. + */ + matchup_vm_state_with_baremetal(); + + intel_idle_cpuidle_driver_init(&intel_idle_driver); + + retval = cpuidle_register_driver(&intel_idle_driver); + if (retval) { + struct cpuidle_driver *drv = cpuidle_get_driver(); + printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), + drv ? drv->name : "none"); + goto init_driver_fail; + } + + retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", + intel_idle_cpu_online, NULL); + if (retval < 0) + goto hp_setup_fail; + + return 0; +hp_setup_fail: + intel_idle_cpuidle_devices_uninit(); + cpuidle_unregister_driver(&intel_idle_driver); +init_driver_fail: + free_percpu(intel_idle_cpuidle_devices); + return retval; +} + static int __init intel_idle_init(void) { const struct x86_cpu_id *id; @@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void) id = x86_match_cpu(intel_idle_ids); if (id) { if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return intel_idle_vminit(id); pr_debug("Please enable MWAIT in BIOS SETUP\n"); return -ENODEV; } diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index f279b360c20d..43d3530bab48 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -30,6 +30,7 @@ #include <linux/pinctrl/pinconf.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/pinctrl/pinmux.h> +#include <linux/suspend.h> #include "core.h" #include "pinctrl-utils.h" @@ -636,9 +637,8 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id) regval = readl(regs + i); if (regval & PIN_IRQ_PENDING) - dev_dbg(&gpio_dev->pdev->dev, - "GPIO %d is active: 0x%x", - irqnr + i, regval); + pm_pr_dbg("GPIO %d is active: 0x%x", + irqnr + i, regval); /* caused wake on resume context for shared IRQ */ if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 427905714f79..1304cd6f13f6 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -543,7 +543,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, } if (dev) - dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val); + pm_pr_dbg("SMU idlemask s0i3: 0x%x\n", val); if (s) seq_printf(s, "SMU idlemask : 0x%x\n", val); @@ -769,7 +769,7 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg) *arg |= (duration << 16); rc = rtc_alarm_irq_enable(rtc_device, 0); - dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); + pm_pr_dbg("wakeup timer programmed for %lld seconds\n", duration); return rc; } diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 90d33cd1b670..69ef8d081c98 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -18,10 +18,12 @@ if POWERCAP # Client driver configurations go here. config INTEL_RAPL_CORE tristate + depends on PCI + select IOSF_MBI config INTEL_RAPL tristate "Intel RAPL Support via MSR Interface" - depends on X86 && IOSF_MBI + depends on X86 && PCI select INTEL_RAPL_CORE help This enables support for the Intel Running Average Power Limit (RAPL) @@ -33,6 +35,20 @@ config INTEL_RAPL controller, CPU core (Power Plane 0), graphics uncore (Power Plane 1), etc. +config INTEL_RAPL_TPMI + tristate "Intel RAPL Support via TPMI Interface" + depends on X86 + depends on INTEL_TPMI + select INTEL_RAPL_CORE + help + This enables support for the Intel Running Average Power Limit (RAPL) + technology via TPMI interface, which allows power limits to be enforced + and monitored. + + In RAPL, the platform level settings are divided into domains for + fine grained control. These domains include processor package, DRAM + controller, platform, etc. + config IDLE_INJECT bool "Idle injection framework" depends on CPU_IDLE diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 4474201b4aa7..5ab0dce565b9 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o +obj-$(CONFIG_INTEL_RAPL_TPMI) += intel_rapl_tpmi.o obj-$(CONFIG_IDLE_INJECT) += idle_inject.o obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 8970c7b80884..4e646e5e48f6 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -75,6 +75,15 @@ #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) +/* bitmasks for RAPL TPMI, used by primitive access functions */ +#define TPMI_POWER_LIMIT_MASK 0x3FFFF +#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62) +#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18) +#define TPMI_INFO_SPEC_MASK 0x3FFFF +#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18) +#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36) +#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54) |
