From b407460ee99033503993ac7437d593451fcdfe44 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Jun 2023 10:50:36 +0200 Subject: iopoll: Call cpu_relax() in busy loops It is considered good practice to call cpu_relax() in busy loops, see Documentation/process/volatile-considered-harmful.rst. This can not only lower CPU power consumption or yield to a hyperthreaded twin processor, but also allows an architecture to mitigate hardware issues (e.g. ARM Erratum 754327 for Cortex-A9 prior to r2p0) in the architecture-specific cpu_relax() implementation. In addition, cpu_relax() is also a compiler barrier. It is not immediately obvious that the @op argument "function" will result in an actual function call (e.g. in case of inlining). Where a function call is a C sequence point, this is lost on inlining. Therefore, with agressive enough optimization it might be possible for the compiler to hoist the: (val) = op(args); "load" out of the loop because it doesn't see the value changing. The addition of cpu_relax() would inhibit this. As the iopoll helpers lack calls to cpu_relax(), people are sometimes reluctant to use them, and may fall back to open-coded polling loops (including cpu_relax() calls) instead. Fix this by adding calls to cpu_relax() to the iopoll helpers: - For the non-atomic case, it is sufficient to call cpu_relax() in case of a zero sleep-between-reads value, as a call to usleep_range() is a safe barrier otherwise. However, it doesn't hurt to add the call regardless, for simplicity, and for similarity with the atomic case below. - For the atomic case, cpu_relax() must be called regardless of the sleep-between-reads value, as there is no guarantee all architecture-specific implementations of udelay() handle this. Signed-off-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Arnd Bergmann Reviewed-by: Tony Lindgren Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/45c87bec3397fdd704376807f0eec5cc71be440f.1685692810.git.geert+renesas@glider.be --- include/linux/iopoll.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 2c8860e406bd..0417360a6db9 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -53,6 +53,7 @@ } \ if (__sleep_us) \ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ + cpu_relax(); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -95,6 +96,7 @@ } \ if (__delay_us) \ udelay(__delay_us); \ + cpu_relax(); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) -- cgit v1.2.3 From 7349a69cf3125e92d48e442d9f400ba446fa314f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Jun 2023 10:50:37 +0200 Subject: iopoll: Do not use timekeeping in read_poll_timeout_atomic() read_poll_timeout_atomic() uses ktime_get() to implement the timeout feature, just like its non-atomic counterpart. However, there are several issues with this, due to its use in atomic contexts: 1. When called in the s2ram path (as typically done by clock or PM domain drivers), timekeeping may be suspended, triggering the WARN_ON(timekeeping_suspended) in ktime_get(): WARNING: CPU: 0 PID: 654 at kernel/time/timekeeping.c:843 ktime_get+0x28/0x78 Calling ktime_get_mono_fast_ns() instead of ktime_get() would get rid of that warning. However, that would break timeout handling, as (at least on systems with an ARM architectured timer), the time returned by ktime_get_mono_fast_ns() does not advance while timekeeping is suspended. Interestingly, (on the same ARM systems) the time returned by ktime_get() does advance while timekeeping is suspended, despite the warning. 2. Depending on the actual clock source, and especially before a high-resolution clocksource (e.g. the ARM architectured timer) becomes available, time may not advance in atomic contexts, thus breaking timeout handling. Fix this by abandoning the idea that one can rely on timekeeping to implement timeout handling in all atomic contexts, and switch from a global time-based to a locally-estimated timeout handling. In most (all?) cases the timeout condition is exceptional and an error condition, hence any additional delays due to underestimating wall clock time are irrelevant. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Reviewed-by: Tony Lindgren Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/3d2a2f4e553489392d871108797c3be08f88300b.1685692810.git.geert+renesas@glider.be --- include/linux/iopoll.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 0417360a6db9..19a7b00baff4 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -74,6 +74,10 @@ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. * + * This macro does not rely on timekeeping. Hence it is safe to call even when + * timekeeping is suspended, at the expense of an underestimation of wall clock + * time, which is rather minimal with a non-zero delay_us. + * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. */ @@ -81,22 +85,30 @@ delay_before_read, args...) \ ({ \ u64 __timeout_us = (timeout_us); \ + s64 __left_ns = __timeout_us * NSEC_PER_USEC; \ unsigned long __delay_us = (delay_us); \ - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ - if (delay_before_read && __delay_us) \ + u64 __delay_ns = __delay_us * NSEC_PER_USEC; \ + if (delay_before_read && __delay_us) { \ udelay(__delay_us); \ + if (__timeout_us) \ + __left_ns -= __delay_ns; \ + } \ for (;;) { \ (val) = op(args); \ if (cond) \ break; \ - if (__timeout_us && \ - ktime_compare(ktime_get(), __timeout) > 0) { \ + if (__timeout_us && __left_ns < 0) { \ (val) = op(args); \ break; \ } \ - if (__delay_us) \ + if (__delay_us) { \ udelay(__delay_us); \ + if (__timeout_us) \ + __left_ns -= __delay_ns; \ + } \ cpu_relax(); \ + if (__timeout_us) \ + __left_ns--; \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) -- cgit v1.2.3 From 14e53669cbf4ab361c849efbe12aa0f5bb5db5f6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Jun 2023 10:50:38 +0200 Subject: clk: renesas: cpg-mssr: Convert to readl_poll_timeout_atomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use readl_poll_timeout_atomic() instead of open-coding the same operation. As typically no retries are needed, 10 µs is a suitable timeout value. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/832d29fd9aa3239ea949535309d2bdb003d40c9e.1685692810.git.geert+renesas@glider.be --- drivers/clk/renesas/renesas-cpg-mssr.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index e9c0e341380e..2772499d2016 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -196,8 +197,8 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) struct device *dev = priv->dev; u32 bitmask = BIT(bit); unsigned long flags; - unsigned int i; u32 value; + int error; dev_dbg(dev, "MSTP %u%02u/%pC %s\n", reg, bit, hw->clk, enable ? "ON" : "OFF"); @@ -228,19 +229,13 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) if (!enable || priv->reg_layout == CLK_REG_LAYOUT_RZ_A) return 0; - for (i = 1000; i > 0; --i) { - if (!(readl(priv->base + priv->status_regs[reg]) & bitmask)) - break; - cpu_relax(); - } - - if (!i) { + error = readl_poll_timeout_atomic(priv->base + priv->status_regs[reg], + value, !(value & bitmask), 0, 10); + if (error) dev_err(dev, "Failed to enable SMSTP %p[%d]\n", priv->base + priv->control_regs[reg], bit); - return -ETIMEDOUT; - } - return 0; + return error; } static int cpg_mstp_clock_enable(struct clk_hw *hw) @@ -896,8 +891,9 @@ static int cpg_mssr_suspend_noirq(struct device *dev) static int cpg_mssr_resume_noirq(struct device *dev) { struct cpg_mssr_priv *priv = dev_get_drvdata(dev); - unsigned int reg, i; + unsigned int reg; u32 mask, oldval, newval; + int error; /* This is the best we can do to check for the presence of PSCI */ if (!psci_ops.cpu_suspend) @@ -935,14 +931,9 @@ static int cpg_mssr_resume_noirq(struct device *dev) if (!mask) continue; - for (i = 1000; i > 0; --i) { - oldval = readl(priv->base + priv->status_regs[reg]); - if (!(oldval & mask)) - break; - cpu_relax(); - } - - if (!i) + error = readl_poll_timeout_atomic(priv->base + priv->status_regs[reg], + oldval, !(oldval & mask), 0, 10); + if (error) dev_warn(dev, "Failed to enable SMSTP%u[0x%x]\n", reg, oldval & mask); } -- cgit v1.2.3 From d0414e762f4d2151e29c1a89fafe0368d8362419 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Jun 2023 10:50:39 +0200 Subject: clk: renesas: mstp: Convert to readl_poll_timeout_atomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use readl_poll_timeout_atomic() instead of open-coding the same operation. As typically no retries are needed, 10 µs is a suitable timeout value. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/bce7d0bdd80c800aa150f1868b610b7d94f4cc66.1685692810.git.geert+renesas@glider.be --- drivers/clk/renesas/clk-mstp.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 90804ac06fa5..6280f4dfed71 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -78,8 +79,8 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) struct mstp_clock_group *group = clock->group; u32 bitmask = BIT(clock->bit_index); unsigned long flags; - unsigned int i; u32 value; + int ret; spin_lock_irqsave(&group->lock, flags); @@ -101,19 +102,14 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) if (!enable || !group->mstpsr) return 0; - for (i = 1000; i > 0; --i) { - if (!(cpg_mstp_read(group, group->mstpsr) & bitmask)) - break; - cpu_relax(); - } - - if (!i) { + /* group->width_8bit is always false if group->mstpsr is present */ + ret = readl_poll_timeout_atomic(group->mstpsr, value, + !(value & bitmask), 0, 10); + if (ret) pr_err("%s: failed to enable %p[%d]\n", __func__, group->smstpcr, clock->bit_index); - return -ETIMEDOUT; - } - return 0; + return ret; } static int cpg_mstp_clock_enable(struct clk_hw *hw) -- cgit v1.2.3 From 7df8eea64a417f1db6777cddc1d7eda3634b7175 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Jun 2023 10:50:40 +0200 Subject: clk: renesas: rzg2l: Convert to readl_poll_timeout_atomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use readl_poll_timeout_atomic() instead of open-coding the same operation. As typically no retries are needed, 10 µs is a suitable timeout value. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/900543d4b9abc1004e6aecdb676f23e5508ae96f.1685692810.git.geert+renesas@glider.be --- drivers/clk/renesas/rzg2l-cpg.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/clk/renesas/rzg2l-cpg.c b/drivers/clk/renesas/rzg2l-cpg.c index ca8b921c7762..bc623515ad84 100644 --- a/drivers/clk/renesas/rzg2l-cpg.c +++ b/drivers/clk/renesas/rzg2l-cpg.c @@ -903,9 +903,9 @@ static int rzg2l_mod_clock_endisable(struct clk_hw *hw, bool enable) unsigned int reg = clock->off; struct device *dev = priv->dev; unsigned long flags; - unsigned int i; u32 bitmask = BIT(clock->bit); u32 value; + int error; if (!clock->off) { dev_dbg(dev, "%pC does not support ON/OFF\n", hw->clk); @@ -930,19 +930,13 @@ static int rzg2l_mod_clock_endisable(struct clk_hw *hw, bool enable) if (!priv->info->has_clk_mon_regs) return 0; - for (i = 1000; i > 0; --i) { - if (((readl(priv->base + CLK_MON_R(reg))) & bitmask)) - break; - cpu_relax(); - } - - if (!i) { + error = readl_poll_timeout_atomic(priv->base + CLK_MON_R(reg), value, + value & bitmask, 0, 10); + if (error) dev_err(dev, "Failed to enable CLK_ON %p\n", priv->base + CLK_ON_R(reg)); - return -ETIMEDOUT; - } - return 0; + return error; } static int rzg2l_mod_clock_enable(struct clk_hw *hw) -- cgit v1.2.3