diff options
| -rw-r--r-- | Documentation/ABI/testing/sysfs-class-devfreq | 3 | ||||
| -rw-r--r-- | Documentation/power/freezing-of-tasks.rst | 85 | ||||
| -rw-r--r-- | drivers/base/power/main.c | 148 | ||||
| -rw-r--r-- | drivers/cpufreq/armada-8k-cpufreq.c | 4 | ||||
| -rw-r--r-- | drivers/cpufreq/intel_pstate.c | 15 | ||||
| -rw-r--r-- | drivers/cpufreq/scmi-cpufreq.c | 7 | ||||
| -rw-r--r-- | drivers/cpuidle/cpuidle-haltpoll.c | 9 | ||||
| -rw-r--r-- | drivers/devfreq/devfreq.c | 80 | ||||
| -rw-r--r-- | drivers/idle/intel_idle.c | 114 | ||||
| -rw-r--r-- | drivers/opp/core.c | 294 | ||||
| -rw-r--r-- | drivers/opp/of.c | 57 | ||||
| -rw-r--r-- | drivers/opp/opp.h | 24 | ||||
| -rw-r--r-- | drivers/opp/ti-opp-supply.c | 13 | ||||
| -rw-r--r-- | include/linux/async.h | 2 | ||||
| -rw-r--r-- | include/linux/pm_opp.h | 28 | ||||
| -rw-r--r-- | kernel/async.c | 85 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 10 | ||||
| -rw-r--r-- | kernel/power/main.c | 16 | ||||
| -rw-r--r-- | kernel/power/power.h | 2 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 16 | ||||
| -rw-r--r-- | kernel/power/swap.c | 41 |
21 files changed, 658 insertions, 395 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f30406..1e7e0bb4c14e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon <nm@ti.com> diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst index 53b6a56c4635..df9755bfbd94 100644 --- a/Documentation/power/freezing-of-tasks.rst +++ b/Documentation/power/freezing-of-tasks.rst @@ -14,27 +14,28 @@ architectures). II. How does it work? ===================== -There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN -and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have -PF_NOFREEZE unset (all user space processes and some kernel threads) are -regarded as 'freezable' and treated in a special way before the system enters a -suspend state as well as before a hibernation image is created (in what follows -we only consider hibernation, but the description also applies to suspend). +There is one per-task flag (PF_NOFREEZE) and three per-task states +(TASK_FROZEN, TASK_FREEZABLE and __TASK_FREEZABLE_UNSAFE) used for that. +The tasks that have PF_NOFREEZE unset (all user space tasks and some kernel +threads) are regarded as 'freezable' and treated in a special way before the +system enters a sleep state as well as before a hibernation image is created +(hibernation is directly covered by what follows, but the description applies +to system-wide suspend too). Namely, as the first step of the hibernation procedure the function freeze_processes() (defined in kernel/power/process.c) is called. A system-wide -variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate -whether the system is to undergo a freezing operation. And freeze_processes() -sets this variable. After this, it executes try_to_freeze_tasks() that sends a -fake signal to all user space processes, and wakes up all the kernel threads. -All freezable tasks must react to that by calling try_to_freeze(), which -results in a call to __refrigerator() (defined in kernel/freezer.c), which sets -the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes -it loop until PF_FROZEN is cleared for it. Then, we say that the task is -'frozen' and therefore the set of functions handling this mechanism is referred -to as 'the freezer' (these functions are defined in kernel/power/process.c, -kernel/freezer.c & include/linux/freezer.h). User space processes are generally -frozen before kernel threads. +static key freezer_active (as opposed to a per-task flag or state) is used to +indicate whether the system is to undergo a freezing operation. And +freeze_processes() sets this static key. After this, it executes +try_to_freeze_tasks() that sends a fake signal to all user space processes, and +wakes up all the kernel threads. All freezable tasks must react to that by +calling try_to_freeze(), which results in a call to __refrigerator() (defined +in kernel/freezer.c), which changes the task's state to TASK_FROZEN, and makes +it loop until it is woken by an explicit TASK_FROZEN wakeup. Then, that task +is regarded as 'frozen' and so the set of functions handling this mechanism is +referred to as 'the freezer' (these functions are defined in +kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). User space +tasks are generally frozen before kernel threads. __refrigerator() must not be called directly. Instead, use the try_to_freeze() function (defined in include/linux/freezer.h), that checks @@ -43,31 +44,40 @@ if the task is to be frozen and makes the task enter __refrigerator(). For user space processes try_to_freeze() is called automatically from the signal-handling code, but the freezable kernel threads need to call it explicitly in suitable places or use the wait_event_freezable() or -wait_event_freezable_timeout() macros (defined in include/linux/freezer.h) -that combine interruptible sleep with checking if the task is to be frozen and -calling try_to_freeze(). The main loop of a freezable kernel thread may look +wait_event_freezable_timeout() macros (defined in include/linux/wait.h) +that put the task to sleep (TASK_INTERRUPTIBLE) or freeze it (TASK_FROZEN) if +freezer_active is set. The main loop of a freezable kernel thread may look like the following one:: set_freezable(); - do { - hub_events(); - wait_event_freezable(khubd_wait, - !list_empty(&hub_event_list) || - kthread_should_stop()); - } while (!kthread_should_stop() || !list_empty(&hub_event_list)); - -(from drivers/usb/core/hub.c::hub_thread()). - -If a freezable kernel thread fails to call try_to_freeze() after the freezer has -initiated a freezing operation, the freezing of tasks will fail and the entire -hibernation operation will be cancelled. For this reason, freezable kernel -threads must call try_to_freeze() somewhere or use one of the + + while (true) { + struct task_struct *tsk = NULL; + + wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); + spin_lock_irq(&oom_reaper_lock); + if (oom_reaper_list != NULL) { + tsk = oom_reaper_list; + oom_reaper_list = tsk->oom_reaper_list; + } + spin_unlock_irq(&oom_reaper_lock); + + if (tsk) + oom_reap_task(tsk); + } + +(from mm/oom_kill.c::oom_reaper()). + +If a freezable kernel thread is not put to the frozen state after the freezer +has initiated a freezing operation, the freezing of tasks will fail and the +entire system-wide transition will be cancelled. For this reason, freezable +kernel threads must call try_to_freeze() somewhere or use one of the wait_event_freezable() and wait_event_freezable_timeout() macros. After the system memory state has been restored from a hibernation image and devices have been reinitialized, the function thaw_processes() is called in -order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that -have been frozen leave __refrigerator() and continue running. +order to wake up each frozen task. Then, the tasks that have been frozen leave +__refrigerator() and continue running. Rationale behind the functions dealing with freezing and thawing of tasks @@ -96,7 +106,8 @@ III. Which kernel threads are freezable? Kernel threads are not freezable by default. However, a kernel thread may clear PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE directly is not allowed). From this point it is regarded as freezable -and must call try_to_freeze() in a suitable place. +and must call try_to_freeze() or variants of wait_event_freezable() in a +suitable place. IV. Why do we do that? ====================== diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f85f3515c258..9c5a5f4dba5a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -655,7 +655,13 @@ Skip: Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -811,21 +807,31 @@ Out: pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); + + device_resume(dev); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 8afefdea4d80..ce5a5641b6dd 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -57,7 +57,7 @@ static void __init armada_8k_get_sharing_cpus(struct clk *cur_clk, continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_warn("Cannot get clock for CPU %d\n", cpu); } else { @@ -165,7 +165,7 @@ static int __init armada_8k_cpufreq_init(void) continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_err("Cannot get clock for CPU %d\n", cpu); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a534a1f7f1ee..3c69040920b8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1692,13 +1692,6 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); /* - * If this CPU gen doesn't call for change in balance_perf - * EPP return. - */ - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) - return; - - /* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) * - But less performance oriented than performance EPP @@ -1711,6 +1704,13 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) } /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + + /* * Use hard coded value per gen to update the balance_perf * and default EPP. */ @@ -2406,6 +2406,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c8a7ccc42c16..4ee23f4ebf4a 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -334,8 +334,11 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) #ifdef CONFIG_COMMON_CLK /* dummy clock provider as needed by OPP if clocks property is used */ - if (of_property_present(dev->of_node, "#clock-cells")) - devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (of_property_present(dev->of_node, "#clock-cells")) { + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (ret) + return dev_err_probe(dev, ret, "%s: registering clock provider failed\n", __func__); + } #endif ret = cpufreq_register_driver(&scmi_cpufreq_driver); diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index e66df22f9695..d8515d5c0853 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -25,13 +25,12 @@ MODULE_PARM_DESC(force, "Load unconditionally"); static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; -static int default_enter_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int default_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { - if (current_clr_polling_and_test()) { - local_irq_enable(); + if (current_clr_polling_and_test()) return index; - } + arch_cpu_idle(); return index; } diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index b3a68d5833bd..98657d3b9435 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -483,6 +487,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -491,12 +499,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -513,6 +525,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); @@ -1688,7 +1708,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1697,7 +1717,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return sysfs_emit(buf, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1707,31 +1727,49 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += sysfs_emit_at(buf, len, " From : To\n"); + len += sysfs_emit_at(buf, len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10lu", + df->freq_table[i]); + } - len += sprintf(buf + len, " time(ms)\n"); + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; + len += sysfs_emit_at(buf, len, " time(ms)\n"); for (i = 0; i < max_state; i++) { - if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + if (len >= PAGE_SIZE - 1) + break; + if (df->freq_table[2] == df->previous_freq) + len += sysfs_emit_at(buf, len, "*"); else - len += sprintf(buf + len, " "); - - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", + len += sysfs_emit_at(buf, len, " "); + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10lu:", df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10u", df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + if (len < PAGE_SIZE - 1) + len += sysfs_emit_at(buf, len, "Total transition : %u\n", + df->stats.total_trans); + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3e01a6b23e75..bcf1198e8991 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -918,6 +918,35 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state mtl_l_cstates[] __initdata = { + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 420, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 310, + .target_residency = 930, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state gmt_cstates[] __initdata = { { .name = "C1", @@ -1237,6 +1266,72 @@ static struct cpuidle_state snr_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state grr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 10, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6S", + .desc = "MWAIT 0x22", + .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +static struct cpuidle_state srf_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 10, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6S", + .desc = "MWAIT 0x22", + .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 270, + .target_residency = 700, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6SP", + .desc = "MWAIT 0x23", + .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 310, + .target_residency = 900, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1344,6 +1439,10 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = { .state_table = adl_l_cstates, }; +static const struct idle_cpu idle_cpu_mtl_l __initconst = { + .state_table = mtl_l_cstates, +}; + static const struct idle_cpu idle_cpu_gmt __initconst = { .state_table = gmt_cstates, }; @@ -1382,6 +1481,18 @@ static const struct idle_cpu idle_cpu_snr __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_grr __initconst = { + .state_table = grr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + +static const struct idle_cpu idle_cpu_srf __initconst = { + .state_table = srf_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1418,6 +1529,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &idle_cpu_mtl_l), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), @@ -1427,6 +1539,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &idle_cpu_grr), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &idle_cpu_srf), {} }; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84f345c69ea5..c4e0432ae42a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed); * @opp: opp for which level value has to be returned for * * Return: level read from device tree corresponding to the opp, else - * return 0. + * return U32_MAX. */ unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) { @@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); * @index: index of the required opp * * Return: performance state read from device tree corresponding to the - * required opp, else return 0. + * required opp, else return U32_MAX. */ unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, unsigned int index) @@ -808,6 +808,16 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, struct dev_pm_opp *opp; opp = _find_key_ceil(dev, &temp, 0, true, _read_level, NULL); + if (IS_ERR(opp)) + return opp; + + /* False match */ + if (temp == OPP_LEVEL_UNSET) { + dev_err(dev, "%s: OPP levels aren't available\n", __func__); + dev_pm_opp_put(opp); + return ERR_PTR(-ENODEV); + } + *level = temp; return opp; } @@ -832,9 +842,14 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); * use. */ struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level) + unsigned int *level) { - return _find_key_floor(dev, level, 0, true, _read_level, NULL); |
