From 2d0d656700d67239a57afaf617439143d8dac9be Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:39 +0100 Subject: arm64: Add Neoverse-N2, Cortex-A710 CPU part definition Add the CPU Partnumbers for the new Arm designs. Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Acked-by: Catalin Marinas Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-2-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6231e1f0abe7..19b8441aa8f2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,8 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -113,6 +115,8 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3 From b9d216fcef4298de76519e2baeed69ba482467bd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:40 +0100 Subject: arm64: errata: Add detection for TRBE overwrite in FILL mode Arm Neoverse-N2 and the Cortex-A710 cores are affected by a CPU erratum where the TRBE will overwrite the trace buffer in FILL mode. The TRBE doesn't stop (as expected in FILL mode) when it reaches the limit and wraps to the base to continue writing upto 3 cache lines. This will overwrite any trace that was written previously. Add the Neoverse-N2 erratum(#2139208) and Cortex-A710 erratum (#2119858) to the detection logic. This will be used by the TRBE driver in later patches to work around the issue. The detection has been kept with the core arm64 errata framework list to make sure : - We don't duplicate the framework in TRBE driver - The errata detection is advertised like the rest of the CPU errata. Note that the Kconfig entries are not fully active until the TRBE driver implements the work around. Cc: Will Deacon Cc: Mark Rutland Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Mathieu Poirier Cc: Mike Leach cc: Leo Yan Acked-by: Catalin Marinas Reviewed-by: Mathieu Poirier Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-3-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 41 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 25 +++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 71 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d410a47ffa57..2f99229d993c 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -92,12 +92,16 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..26bd128935bc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -666,6 +666,47 @@ config ARM64_ERRATUM_1508412 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + bool + +config ARM64_ERRATUM_2119858 + bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Cortex-A710 erratum 2119858. + + Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_ERRATUM_2139208 + bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2139208. + + Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..ccd757373f36 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,18 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -533,6 +545,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP, ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + { + /* + * The erratum work around is handled within the TRBE + * driver and can be applied per-cpu. So, we can allow + * a late CPU to come online with this erratum. + */ + .desc = "ARM erratum 2119858 or 2139208", + .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..1ccb92165bd8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From fa82d0b4b833790ac4572377fb777dcea24a9d69 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:41 +0100 Subject: arm64: errata: Add workaround for TSB flush failures Arm Neoverse-N2 (#2067961) and Cortex-A710 (#2054223) suffers from errata, where a TSB (trace synchronization barrier) fails to flush the trace data completely, when executed from a trace prohibited region. In Linux we always execute it after we have moved the PE to trace prohibited region. So, we can apply the workaround every time a TSB is executed. The work around is to issue two TSB consecutively. NOTE: This errata is defined as LOCAL_CPU_ERRATUM, implying that a late CPU could be blocked from booting if it is the first CPU that requires the workaround. This is because we do not allow setting a cpu_hwcaps after the SMP boot. The other alternative is to use "this_cpu_has_cap()" instead of the faster system wide check, which may be a bit of an overhead, given we may have to do this in nvhe KVM host before a guest entry. Cc: Will Deacon Cc: Catalin Marinas Cc: Mathieu Poirier Cc: Mike Leach Cc: Mark Rutland Cc: Anshuman Khandual Cc: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Mathieu Poirier Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-4-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 33 +++++++++++++++++++++++++++++++++ arch/arm64/include/asm/barrier.h | 16 +++++++++++++++- arch/arm64/kernel/cpu_errata.c | 19 +++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 5 files changed, 72 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 2f99229d993c..569a92411dcd 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,6 +94,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -102,6 +104,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 26bd128935bc..ec3bb346957f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -707,6 +707,39 @@ config ARM64_ERRATUM_2139208 If unsure, say Y. +config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 451e11e5fd23..1c5a00598458 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") #ifdef CONFIG_ARM64_PSEUDO_NMI @@ -46,6 +46,20 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) + +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. The barriers \ + * don't have to be strictly back to back, as long as the \ + * CPU is in trace prohibited state. \ + */ \ + if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \ + __tsb_csync(); \ + __tsb_csync(); \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ccd757373f36..aaa66c9eee24 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -352,6 +352,18 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -558,6 +570,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1ccb92165bd8..2102e15af43d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -54,6 +54,7 @@ WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_TRBE_OVERWRITE_FILL_MODE +WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From 8d81b2a38ddfc4b03662d2359765648c8b4cc73c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:42 +0100 Subject: arm64: errata: Add detection for TRBE write to out-of-range Arm Neoverse-N2 and Cortex-A710 cores are affected by an erratum where the trbe, under some circumstances, might write upto 64bytes to an address after the Limit as programmed by the TRBLIMITR_EL1.LIMIT. This might - - Corrupt a page in the ring buffer, which may corrupt trace from a previous session, consumed by userspace. - Hit the guard page at the end of the vmalloc area and raise a fault. To keep the handling simpler, we always leave the last page from the range, which TRBE is allowed to write. This can be achieved by ensuring that we always have more than a PAGE worth space in the range, while calculating the LIMIT for TRBE. And then the LIMIT pointer can be adjusted to leave the PAGE (TRBLIMITR.LIMIT -= PAGE_SIZE), out of the TRBE range while enabling it. This makes sure that the TRBE will only write to an area within its allowed limit (i.e, [head-head+size]) and we do not have to handle address faults within the driver. Cc: Anshuman Khandual Cc: Mathieu Poirier Cc: Mike Leach Cc: Leo Yan Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Anshuman Khandual Reviewed-by: Mathieu Poirier Acked-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-5-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 41 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 20 +++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 66 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 569a92411dcd..5342e895fb60 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -96,6 +96,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -106,6 +108,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ec3bb346957f..a500af15ebd5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -740,6 +740,47 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + bool + +config ARM64_ERRATUM_2253138 + bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2253138. + + Affected Neoverse-N2 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + +config ARM64_ERRATUM_2224489 + bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Cortex-A710 erratum 2224489. + + Affected Cortex-A710 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index aaa66c9eee24..9e1c1aef9ebd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -364,6 +364,18 @@ static const struct midr_range tsb_flush_fail_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE +static struct midr_range trbe_write_out_of_range_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2253138 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2224489 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -577,6 +589,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + { + .desc = "ARM erratum 2253138 or 2224489", + .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 2102e15af43d..90628638e0f9 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -55,6 +55,7 @@ WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE +WORKAROUND_TRBE_WRITE_OUT_OF_RANGE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From 692c9a499b286ea478f41b23a91fe3873b9e1326 Mon Sep 17 00:00:00 2001 From: Tao Zhang Date: Thu, 19 Aug 2021 17:29:37 +0800 Subject: coresight: cti: Correct the parameter for pm_runtime_put The input parameter of the function pm_runtime_put should be the same in the function cti_enable_hw and cti_disable_hw. The correct parameter to use here should be dev->parent. Signed-off-by: Tao Zhang Reviewed-by: Leo Yan Fixes: 835d722ba10a ("coresight: cti: Initial CoreSight CTI Driver") Cc: stable Link: https://lore.kernel.org/r/1629365377-5937-1-git-send-email-quic_taozha@quicinc.com Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-cti-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index e2a3620cbf48..8988b2ed2ea6 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -175,7 +175,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); spin_unlock(&drvdata->spinlock); - pm_runtime_put(dev); + pm_runtime_put(dev->parent); return 0; /* not disabled this call */ -- cgit v1.2.3 From 204879e6990d2a57d7a6e26792cec34f97a63c0e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 3 Sep 2021 18:28:54 -0700 Subject: coresight: cpu-debug: Control default behavior via Kconfig Debugfs is nice and so are module parameters, but * debugfs doesn't take effect early (e.g., if drivers are locking up before user space gets anywhere) * module parameters either add a lot to the kernel command line, or else take effect late as well (if you build =m and configure in /etc/modprobe.d/) So in the same spirit as these CONFIG_PANIC_ON_OOPS (also available via cmdline or modparam) CONFIG_INTEL_IOMMU_DEFAULT_ON (also available via cmdline) add a new Kconfig option. Module parameters and debugfs can still override. Signed-off-by: Brian Norris Reviewed-by: Leo Yan [Fixed missing double quote in Kconfig title] Link: https://lore.kernel.org/r/20210903182839.1.I20856983f2841b78936134dcf9cdf6ecafe632b9@changeid Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/Kconfig | 13 +++++++++++++ drivers/hwtracing/coresight/coresight-cpu-debug.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index f026e5c0e777..514a9b8086e3 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -150,6 +150,19 @@ config CORESIGHT_CPU_DEBUG To compile this driver as a module, choose M here: the module will be called coresight-cpu-debug. +config CORESIGHT_CPU_DEBUG_DEFAULT_ON + bool "Enable CoreSight CPU Debug by default" + depends on CORESIGHT_CPU_DEBUG + help + Say Y here to enable the CoreSight Debug panic-debug by default. This + can also be enabled via debugfs, but this ensures the debug feature + is enabled as early as possible. + + Has the same effect as setting coresight_cpu_debug.enable=1 on the + kernel command line. + + Say N if unsure. + config CORESIGHT_CTI tristate "CoreSight Cross Trigger Interface (CTI) driver" depends on ARM || ARM64 diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 00de46565bc4..8845ec4b4402 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -105,7 +105,7 @@ static DEFINE_PER_CPU(struct debug_drvdata *, debug_drvdata); static int debug_count; static struct dentry *debug_debugfs_dir; -static bool debug_enable; +static bool debug_enable = IS_ENABLED(CONFIG_CORESIGHT_CPU_DEBUG_DEFAULT_ON); module_param_named(enable, debug_enable, bool, 0600); MODULE_PARM_DESC(enable, "Control to enable coresight CPU debug functionality"); -- cgit v1.2.3 From 0ab47f8079f27edc44ea0bcc67078561bcfdf542 Mon Sep 17 00:00:00 2001 From: Tanmay Jagdale Date: Wed, 1 Sep 2021 18:40:48 +0530 Subject: dt-bindings: coresight: Add burst size for TMC Add "arm,max-burst-size" optional property for TMC ETR. If specified, this value indicates the maximum burst size that can be initiated by TMC on the AXI bus. Signed-off-by: Tanmay Jagdale Reviewed-by: Mike Leach Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210901131049.1365367-2-tanmay@marvell.com Signed-off-by: Mathieu Poirier --- Documentation/devicetree/bindings/arm/coresight.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 7f9c1ca87487..c68d93a35b6c 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -127,6 +127,11 @@ its hardware characteristcs. * arm,scatter-gather: boolean. Indicates that the TMC-ETR can safely use the SG mode on this system. + * arm,max-burst-size: The maximum burst size initiated by TMC on the + AXI master interface. The burst size can be in the range [0..15], + the setting supports one data transfer per burst up to a maximum of + 16 data transfers per burst. + * Optional property for CATU : * interrupts : Exactly one SPI may be listed for reporting the address error -- cgit v1.2.3 From 4d5d88baa6c838bf92ed6a63c50dd3167c5a4956 Mon Sep 17 00:00:00 2001 From: Tanmay Jagdale Date: Wed, 1 Sep 2021 18:40:49 +0530 Subject: coresight: tmc: Configure AXI write burst size The current driver sets the write burst size initiated by TMC-ETR on AXI bus to a fixed value of 16. Make this configurable by reading the value specified in fwnode. If not specified, then default to 16. Introduced a "max_burst_size" variable in tmc_drvdata structure to facilitate this change. Signed-off-by: Tanmay Jagdale Reviewed-by: Mike Leach Link: https://lore.kernel.org/r/20210901131049.1365367-3-tanmay@marvell.com Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-tmc-core.c | 21 +++++++++++++++++++-- drivers/hwtracing/coresight/coresight-tmc-etr.c | 3 ++- drivers/hwtracing/coresight/coresight-tmc.h | 6 +++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 74c6323d4d6a..d0276af82494 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -432,6 +432,21 @@ static u32 tmc_etr_get_default_buffer_size(struct device *dev) return size; } +static u32 tmc_etr_get_max_burst_size(struct device *dev) +{ + u32 burst_size; + + if (fwnode_property_read_u32(dev->fwnode, "arm,max-burst-size", + &burst_size)) + return TMC_AXICTL_WR_BURST_16; + + /* Only permissible values are 0 to 15 */ + if (burst_size > 0xF) + burst_size = TMC_AXICTL_WR_BURST_16; + + return burst_size; +} + static int tmc_probe(struct amba_device *adev, const struct amba_id *id) { int ret = 0; @@ -469,10 +484,12 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) /* This device is not associated with a session */ drvdata->pid = -1; - if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { drvdata->size = tmc_etr_get_default_buffer_size(dev); - else + drvdata->max_burst_size = tmc_etr_get_max_burst_size(dev); + } else { drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4; + } desc.dev = dev; desc.groups = coresight_tmc_groups; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index acdb59e0e661..0ac2a611110b 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -982,7 +982,8 @@ static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) axictl = readl_relaxed(drvdata->base + TMC_AXICTL); axictl &= ~TMC_AXICTL_CLEAR_MASK; - axictl |= (TMC_AXICTL_PROT_CTL_B1 | TMC_AXICTL_WR_BURST_16); + axictl |= TMC_AXICTL_PROT_CTL_B1; + axictl |= TMC_AXICTL_WR_BURST(drvdata->max_burst_size); axictl |= TMC_AXICTL_AXCACHE_OS; if (tmc_etr_has_cap(drvdata, TMC_ETR_AXI_ARCACHE)) { diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index b91ec7dde7bc..6bec20a392b3 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -70,7 +70,8 @@ #define TMC_AXICTL_PROT_CTL_B0 BIT(0) #define TMC_AXICTL_PROT_CTL_B1 BIT(1) #define TMC_AXICTL_SCT_GAT_MODE BIT(7) -#define TMC_AXICTL_WR_BURST_16 0xF00 +#define TMC_AXICTL_WR_BURST(v) (((v) & 0xf) << 8) +#define TMC_AXICTL_WR_BURST_16 0xf /* Write-back Read and Write-allocate */ #define TMC_AXICTL_AXCACHE_OS (0xf << 2) #define TMC_AXICTL_ARCACHE_OS (0xf << 16) @@ -174,6 +175,8 @@ struct etr_buf { * @etr_buf: details of buffer used in TMC-ETR * @len: size of the available trace for ETF/ETB. * @size: trace buffer size for this TMC (common for all modes). + * @max_burst_size: The maximum burst size that can be initiated by + * TMC-ETR on AXI bus. * @mode: how this TMC is being used. * @config_type: TMC variant, must be of type @tmc_config_type. * @memwidth: width of the memory interface databus, in bytes. @@ -198,6 +201,7 @@ struct tmc_drvdata { }; u32 len; u32 size; + u32 max_burst_size; u32 mode; enum tmc_config_type config_type; enum tmc_mem_intf_width memwidth; -- cgit v1.2.3 From 26701ceb4c2c6bfa5f8c8984fa1a1ea08fa0f02c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 9 Aug 2021 19:14:00 +0800 Subject: coresight: tmc-etr: Add barrier after updating AUX ring buffer Since a memory barrier is required between AUX trace data store and aux_head store, and the AUX trace data is filled with memcpy(), it's sufficient to use smp_wmb() so can ensure the trace data is visible prior to updating aux_head. Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210809111407.596077-3-leo.yan@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 0ac2a611110b..7f4654b72314 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1564,6 +1564,14 @@ tmc_update_etr_buffer(struct coresight_device *csdev, */ if (etr_perf->snapshot) handle->head += size; + + /* + * Ensure that the AUX trace data is visible before the aux_head + * is updated via perf_aux_output_end(), as expected by the + * perf ring buffer. + */ + smp_wmb(); + out: /* * Don't set the TRUNCATED flag in snapshot mode because 1) the -- cgit v1.2.3 From bd8d06886d0a9b59d020fdb2496c76db77816768 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 9 Aug 2021 19:14:01 +0800 Subject: coresight: tmc-etf: Add comment for store ordering Since the function CS_LOCK() has contained memory barrier mb(), it ensures the visibility of the AUX trace data before updating the aux_head, thus it's needless to add any explicit barrier anymore. Add comment to make clear for the barrier usage for ETF. Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20210809111407.596077-4-leo.yan@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index cd0fb7bfba68..8debd4f40f06 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -553,6 +553,11 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, if (buf->snapshot) handle->head += to_read; + /* + * CS_LOCK() contains mb() so it can ensure visibility of the AUX trace + * data before the aux_head is updated via perf_aux_output_end(), which + * is expected by the perf ring buffer. + */ CS_LOCK(drvdata->base); out: spin_unlock_irqrestore(&drvdata->spinlock, flags); -- cgit v1.2.3 From f36dec8da1a4a083c51e4d2297985d3648b6740f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 12 Sep 2021 20:57:47 +0800 Subject: coresight: tmc-etr: Use perf_output_handle::head for AUX ring buffer When enable the Arm CoreSight PMU event, the context for AUX ring buffer is prepared in the structure perf_output_handle, and its field "head" points the head of the AUX ring buffer and it is updated after filling AUX trace data into buffer. Current code uses an extra field etr_perf_buffer::head to maintain the header for the AUX ring buffer which is not necessary; alternatively, it's better to directly use perf_output_handle::head. This patch removes the field etr_perf_buffer::head and directly uses perf_output_handle::head for the head of AUX ring buffer. Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210912125748.2816606-2-leo.yan@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 7f4654b72314..14a86a232a18 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -32,7 +32,6 @@ struct etr_flat_buf { * @etr_buf - Actual buffer used by the ETR * @pid - The PID this etr_perf_buffer belongs to. * @snaphost - Perf session mode - * @head - handle->head at the beginning of the session. * @nr_pages - Number of pages in the ring buffer. * @pages - Array of Pages in the ring buffer. */ @@ -41,7 +40,6 @@ struct etr_perf_buffer { struct etr_buf *etr_buf; pid_t pid; bool snapshot; - unsigned long head; int nr_pages; void **pages; }; @@ -1438,16 +1436,16 @@ free_etr_perf_buffer: * buffer to the perf ring buffer. */ static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf, + unsigned long head, unsigned long src_offset, unsigned long to_copy) { long bytes; long pg_idx, pg_offset; - unsigned long head = etr_perf->head; char **dst_pages, *src_buf; struct etr_buf *etr_buf = etr_perf->etr_buf; - head = etr_perf->head; + head = PERF_IDX2OFF(head, etr_perf); pg_idx = head >> PAGE_SHIFT; pg_offset = head & (PAGE_SIZE - 1); dst_pages = (char **)etr_perf->pages; @@ -1554,7 +1552,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev, /* Insert barrier packets at the beginning, if there was an overflow */ if (lost) tmc_etr_buf_insert_barrier_packet(etr_buf, offset); - tmc_etr_sync_perf_buffer(etr_perf, offset, size); + tmc_etr_sync_perf_buffer(etr_perf, handle->head, offset, size); /* * In snapshot mode we simply increment the head by the number of byte @@ -1614,8 +1612,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) goto unlock_out; } - etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); - /* * No HW configuration is needed if the sink is already in * use for this session. -- cgit v1.2.3 From 7ba7ae1d5a4737ebea582526ada9d79c74dfd75d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 12 Sep 2021 20:57:48 +0800 Subject: coresight: Update comments for removing cs_etm_find_snapshot() Commit 2f01c200d440 ("perf cs-etm: Remove callback cs_etm_find_snapshot()") has removed the function cs_etm_find_snapshot() from the perf tool in the user space, now CoreSight trace directly uses the perf common function __auxtrace_mmap__read() to calcualte the head and size for AUX trace data in snapshot mode. This patch updates the comments in drivers to make them generic and not stick to any specific function from perf tool. Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20210912125748.2816606-3-leo.yan@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-etb10.c | 5 ++--- drivers/hwtracing/coresight/coresight-tmc-etf.c | 5 ++--- drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index f775cbee12b8..efa39820acec 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -557,9 +557,8 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev, /* * In snapshot mode we simply increment the head by the number of byte - * that were written. User space function cs_etm_find_snapshot() will - * figure out how many bytes to get from the AUX buffer based on the - * position of the head. + * that were written. User space will figure out how many bytes to get + * from the AUX buffer based on the position of the head. */ if (buf->snapshot) handle->head += to_read; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 8debd4f40f06..4c4cbd1f7258 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -546,9 +546,8 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, /* * In snapshot mode we simply increment the head by the number of byte - * that were written. User space function cs_etm_find_snapshot() will - * figure out how many bytes to get from the AUX buffer based on the - * position of the head. + * that were written. User space will figure out how many bytes to get + * from the AUX buffer based on the position of the head. */ if (buf->snapshot) handle->head += to_read; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 14a86a232a18..d6da38e7c682 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -1556,9 +1556,8 @@ tmc_update_etr_buffer(struct coresight_device *csdev, /* * In snapshot mode we simply increment the head by the number of byte - * that were written. User space function cs_etm_find_snapshot() will - * figure out how many bytes to get from the AUX buffer based on the - * position of the head. + * that were written. User space will figure out how many bytes to get + * from the AUX buffer based on the position of the head. */ if (etr_perf->snapshot) handle->head += size; -- cgit v1.2.3 From 0abd076217a39c4abc47dcd84d0c8f491f87cbe7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 5 Sep 2021 11:21:44 +0800 Subject: coresight: tmc-etr: Speed up for bounce buffer in flat mode The AUX bounce buffer is allocated with API dma_alloc_coherent(), in the low level's architecture code, e.g. for Arm64, it maps the memory with the attribution "Normal non-cacheable"; this can be concluded from the definition for pgprot_dmacoherent() in arch/arm64/include/asm/pgtable.h. Later when access the AUX bounce buffer, since the memory mapping is non-cacheable, it's low efficiency due to every load instruction must reach out DRAM. This patch changes to allocate pages with dma_alloc_noncoherent(), the driver can access the memory via cacheable mapping; therefore, load instructions can fetch data from cache lines rather than always read data from DRAM, the driver can boost memory performance. After using the cacheable mapping, the driver uses dma_sync_single_for_cpu() to invalidate cacheline prior to read bounce buffer so can avoid read stale trace data. By measurement the duration for function tmc_update_etr_buffer() with ftrace function_graph tracer, it shows the performance significant improvement for copying 4MiB data from bounce buffer: # echo tmc_etr_get_data_flat_buf > set_graph_notrace // avoid noise # echo tmc_update_etr_buffer > set_graph_function # echo function_graph > current_tracer before: # CPU DURATION FUNCTION CALLS # | | | | | | | 2) | tmc_update_etr_buffer() { ... 2) # 8148.320 us | } after: # CPU DURATION FUNCTION CALLS # | | | | | | | 2) | tmc_update_etr_buffer() { ... 2) # 2525.420 us | } Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210905032144.966766-1-leo.yan@linaro.org Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 26 +++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index d6da38e7c682..867ad8bb9b0c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -607,8 +607,9 @@ static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, if (!flat_buf) return -ENOMEM; - flat_buf->vaddr = dma_alloc_coherent(real_dev, etr_buf->size, - &flat_buf->daddr, GFP_KERNEL); + flat_buf->vaddr = dma_alloc_noncoherent(real_dev, etr_buf->size, + &flat_buf->daddr, + DMA_FROM_DEVICE, GFP_KERNEL); if (!flat_buf->vaddr) { kfree(flat_buf); return -ENOMEM; @@ -629,14 +630,18 @@ static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf) if (flat_buf && flat_buf->daddr) { struct device *real_dev = flat_buf->dev->parent; - dma_free_coherent(real_dev, flat_buf->size, - flat_buf->vaddr, flat_buf->daddr); + dma_free_noncoherent(real_dev, etr_buf->size, + flat_buf->vaddr, flat_buf->daddr, + DMA_FROM_DEVICE); } kfree(flat_buf); } static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) { + struct etr_flat_buf *flat_buf = etr_buf->private; + struct device *real_dev = flat_buf->dev->parent; + /* * Adjust the buffer to point to the beginning of the trace data * and update the available trace data. @@ -646,6 +651,19 @@ static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) etr_buf->len = etr_buf->size; else etr_buf->len = rwp - rrp; + + /* + * The driver always starts tracing at the beginning of the buffer, + * the only reason why we would get a wrap around is when the buffer + * is full. Sync the entire buffer in one go for this case. + */ + if (etr_buf->offset + etr_buf->len > etr_buf->size) + dma_sync_single_for_cpu(real_dev, flat_buf->daddr, + etr_buf->size, DMA_FROM_DEVICE); + else + dma_sync_single_for_cpu(real_dev, + flat_buf->daddr + etr_buf->offset, + etr_buf->len, DMA_FROM_DEVICE); } static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf, -- cgit v1.2.3 From 8c60acbcb982bfff14d1e85094474671c4f3d006 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 22 Sep 2021 13:51:44 +0100 Subject: coresight: Don't immediately close events that are run on invalid CPU/sink combos When a traced process runs on a CPU that can't reach the selected sink, the event will be stopped with PERF_HES_STOPPED. This means that even if the process migrates to a valid CPU, tracing will not resume. This can be reproduced (on N1SDP) by using taskset to start the process on CPU 0, and then switching it to CPU 2 (ETF 1 is only reachable from CPU 2): taskset --cpu-list 0 ./perf record -e cs_etm/@tmc_etf1/ --per-thread -- taskset --cpu-list 2 ls This produces a single 0 length AUX record, and then no more trace: 0x3c8 [0x30]: PERF_RECORD_AUX offset: 0 size: 0 flags: 0x1 [T] After the fix, the same command produces normal AUX records. The perf self test "89: Check Arm CoreSight trace data recording and synthesized samples" no longer fails intermittently. This was because the taskset in the test is after the fork, so there is a period where the task is scheduled on a random CPU rather than forced to a valid one. Specifically selecting an invalid CPU will still result in a failure to open the event because it will never produce trace: ./perf record -C 2 -e cs_etm/@tmc_etf0/ failed to mmap with 12 (Cannot allocate memory) The only scenario that has changed is if the CPU mask has a valid CPU sink combo in it. Testing ======= * Coresight self test passes consistently: ./perf test Coresight * CPU wide mode still produces trace: ./perf record -e cs_etm// -a * Invalid -C options still fail to open: ./perf record -C 2,3 -e cs_etm/@tmc_etf0/ failed to mmap with 12 (Cannot allocate memory) * Migrating a task to a valid sink/CPU now produces trace: taskset --cpu-list 0 ./perf record -e cs_etm/@tmc_etf1/ --per-thread -- taskset --cpu-list 2 ls * If the task remains on an invalid CPU, no trace is emitted: taskset --cpu-list 0 ./perf record -e cs_etm/@tmc_etf1/ --per-thread -- ls Reviewed-by: Suzuki K Poulose Signed-off-by: James Clark Link: https://lore.kernel.org/r/20210922125144.133872-2-james.clark@arm.com Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-etm-perf.c | 29 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8ebd728d3a80..3bce1ec4c4bf 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -452,9 +452,14 @@ static void etm_event_start(struct perf_event *event, int flags) * sink from this ETM. We can't do much in this case if * the sink was specified or hinted to the driver. For * now, simply don't record anything on this ETM. + * + * As such we pretend that everything is fine, and let + * it continue without actually tracing. The event could + * continue tracing when it moves to a CPU where it is + * reachable to a sink. */ if (!cpumask_test_cpu(cpu, &event_data->mask)) - goto fail_end_stop; + goto out; path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ @@ -466,16 +471,15 @@ static void etm_event_start(struct perf_event *event, int flags) if (coresight_enable_path(path, CS_MODE_PERF, handle)) goto fail_end_stop; - /* Tell the perf core the event is alive */ - event->hw.state = 0; - /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path; +out: + /* Tell the perf core the event is alive */ + event->hw.state = 0; /* Save the event_data for this ETM */ ctxt->event_data = event_data; -out: return; fail_disable_path: @@ -485,7 +489,7 @@ fail_end_stop: perf_aux_output_end(handle, 0); fail: event->hw.state = PERF_HES_STOPPED; - goto out; + return; } static void etm_event_stop(struct perf_event *event, int mode) @@ -517,6 +521,19 @@ static void etm_event_stop(struct perf_event *event, int mode) if (WARN_ON(!event_data)) return; + /* + * Check if this ETM was allowed to trace, as decided at + * etm_setup_aux(). If it wasn't allowed to trace, then + * nothing needs to be torn down other than outputting a + * zero sized record. + */ + if (handle->event && (mode & PERF_EF_UPDATE) && + !cpumask_test_cpu(cpu, &event_data->mask)) { + event->hw.state = PERF_HES_STOPPED; + perf_aux_output_end(handle, 0); + return; + } + if (!csdev) return; -- cgit v1.2.3 From 937d3f58cacf377cab7c32e475e1ffa91d611dce Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Sep 2021 11:26:32 +0100 Subject: coresight: etm4x: Save restore TRFCR_EL1 When the CPU enters a low power mode, the TRFCR_EL1 contents could be reset. Thus we need to save/restore the TRFCR_EL1 along with the ETM4x registers to allow the tracing. The TRFCR related helpers are in a new header file, as we need to use them for TRBE in the later patches. Cc: Mathieu Poirier Cc: Anshuman Khandual Cc: Mike Leach Cc: Leo Yan Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210914102641.1852544-2-suzuki.poulose@arm.com [Fixed cosmetic details] Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 43 ++++++++++++++++------ drivers/hwtracing/coresight/coresight-etm4x.h | 2 + .../coresight/coresight-self-hosted-trace.h | 24 ++++++++++++ 3 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 drivers/hwtracing/coresight/coresight-self-hosted-trace.h diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index e24252eaf8e4..537c0d7ee1ed 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -40,6 +40,7 @@ #include "coresight-etm4x.h" #include "coresight-etm-perf.h" #include "coresight-etm4x-cfg.h" +#include "coresight-self-hosted-trace.h" #include "coresight-syscfg.h" static int boot_enable; @@ -1011,7 +1012,7 @@ static void cpu_enable_tracing(struct etmv4_drvdata *drvdata) if (is_kernel_in_hyp_mode()) trfcr |= TRFCR_EL2_CX; - write_sysreg_s(trfcr, SYS_TRFCR_EL1); + write_trfcr(trfcr); } static void etm4_init_arch_data(void *info) @@ -1554,7 +1555,7 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } -static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) { int i, ret = 0; struct etmv4_save_state *state; @@ -1693,7 +1694,23 @@ out: return ret; } -static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +{ + int ret = 0; + + /* Save the TRFCR irrespective of whether the ETM is ON */ + if (drvdata->trfc) + drvdata->save_trfcr = read_trfcr(); + /* + * Save and restore the ETM Trace registers only if + * the ETM is active. + */ + if (local_read(&drvdata->mode) && drvdata->save_state) + ret = __etm4_cpu_save(drvdata); + return ret; +} + +static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) { int i; struct etmv4_save_state *state = drvdata->save_state; @@ -1789,6 +1806,14 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4_cs_lock(drvdata, csa); } +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +{ + if (drvdata->trfc) + write_trfcr(drvdata->save_trfcr); + if (drvdata->state_needs_restore) + __etm4_cpu_restore(drvdata); +} + static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, void *v) { @@ -1800,23 +1825,17 @@ static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, drvdata = etmdrvdata[cpu]; - if (!drvdata->save_state) - return NOTIFY_OK; - if (WARN_ON_ONCE(drvdata->cpu != cpu)) return NOTIFY_BAD; switch (cmd) { case CPU_PM_ENTER: - /* save the state if self-hosted coresight is in use */ - if (local_read(&drvdata->mode)) - if (etm4_cpu_save(drvdata)) - return NOTIFY_BAD; + if (etm4_cpu_save(drvdata)) + return NOTIFY_BAD; break; case CPU_PM_EXIT: case CPU_PM_ENTER_FAILED: - if (drvdata->state_needs_restore) - etm4_cpu_restore(drvdata); + etm4_cpu_restore(drvdata); break; default: return NOTIFY_DONE; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index e5b79bdb9851..82cba16b73a6 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -921,6 +921,7 @@ struct etmv4_save_state { * @lpoverride: If the implementation can support low-power state over. * @trfc: If the implementation supports Arm v8.4 trace filter controls. * @config: structure holding configuration parameters. + * @save_trfcr: Saved TRFCR_EL1 register during a CPU PM event. * @save_state: State to be preserved across power loss * @state_needs_restore: True when there is context to restore after PM exit * @skip_power_up: Indicates if an implementation can skip powering up @@ -973,6 +974,7 @@ struct etmv4_drvdata { bool lpoverride; bool trfc; struct etmv4_config config; + u64 save_trfcr; struct etmv4_save_state *save_state; bool state_needs_restore; bool skip_power_up; diff --git a/drivers/hwtracing/coresight/coresight-self-hosted-trace.h b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h new file mode 100644 index 000000000000..303d71911870 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Arm v8 Self-Hosted trace support. + * + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef __CORESIGHT_SELF_HOSTED_TRACE_H +#define __CORESIGHT_SELF_HOSTED_TRACE_H + +#include + +static inline u64 read_trfcr(void) +{ + return read_sysreg_s(SYS_TRFCR_EL1); +} + +static inline void write_trfcr(u64 val) +{ + write_sysreg_s(val, SYS_TRFCR_EL1); + isb(); +} + +#endif /* __CORESIGHT_SELF_HOSTED_TRACE_H */ -- cgit v1.2.3 From 5f6fd1aa8cc147b111af1a833574487a87237dc0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Sep 2021 11:26:33 +0100 Subject: coresight: etm4x: Use Trace Filtering controls dynamically The Trace Filtering support (FEAT_TRF) ensures that the ETM can be prohibited from generating any trace for a given EL. This is much stricter knob, than the TRCVICTLR exception level masks, which doesn't prevent the ETM from generating Context packets for an "excluded" EL. At the moment, we do a onetime enable trace at user and kernel and leave it untouched for the kernel life time. This implies that the ETM could potentially generate trace packets containing the kernel addresses, and thus leaking the kernel virtual address in the trace. This patch makes the switch dynamic, by honoring the filters set by the user and enforcing them in the TRFCR controls. We also rename the cpu_enable_tracing() appropriately to cpu_detect_trace_filtering() and the drvdata member trfc => trfcr to indicate the "value" of the TRFCR_EL1. Cc: Mathieu Poirier Cc: Al Grant Cc: Mike Leach Cc: Leo Yan Signed-off-by: Suzuki K Poulose Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210914102641.1852544-3-suzuki.poulose@arm.com Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 63 ++++++++++++++++------ drivers/hwtracing/coresight/coresight-etm4x.h | 7 ++- .../coresight/coresight-self-hosted-trace.h | 7 +++ 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 537c0d7ee1ed..d9a85ca25b60 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -239,6 +239,45 @@ struct etm4_enable_arg { int rc; }; +/* + * etm4x_prohibit_trace - Prohibit the CPU from tracing at all ELs. + * When the CPU supports FEAT_TRF, we could move the ETM to a trace + * prohibited state by filtering the Exception levels via TRFCR_EL1. + */ +static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata) +{ + /* If the CPU doesn't support FEAT_TRF, nothing to do */ + if (!drvdata->trfcr) + return; + cpu_prohibit_trace(); +} + +/* + * etm4x_allow_trace - Allow CPU tracing in the respective ELs, + * as configured by the drvdata->config.mode for the current + * session. Even though we have TRCVICTLR bits to filter the + * trace in the ELs, it doesn't prevent the ETM from generating + * a packet (e.g, TraceInfo) that might contain the addresses from + * the excluded levels. Thus we use the additional controls provided + * via the Trace Filtering controls (FEAT_TRF) to make sure no trace + * is generated for the excluded ELs. + */ +static void etm4x_allow_trace(struct etmv4_drvdata *drvdata) +{ + u64 trfcr = drvdata->trfcr; + + /* If the CPU doesn't support FEAT_TRF, nothing to do */ + if (!trfcr) + return; + + if (drvdata->config.mode & ETM_MODE_EXCL_KERN) + trfcr &= ~TRFCR_ELx_ExTRE; + if (drvdata->config.mode & ETM_MODE_EXCL_USER) + trfcr &= ~TRFCR_ELx_E0TRE; + + write_trfcr(trfcr); +} + #ifdef CONFIG_ETM4X_IMPDEF_FEATURE #define HISI_HIP08_AMBA_ID 0x000b6d01 @@ -443,6 +482,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) if (etm4x_is_ete(drvdata)) etm4x_relaxed_write32(csa, TRCRSR_TA, TRCRSR); + etm4x_allow_trace(drvdata); /* Enable the trace unit */ etm4x_relaxed_write32(csa, 1, TRCPRGCTLR); @@ -738,7 +778,6 @@ static int etm4_enable(struct coresight_device *csdev, static void etm4_disable_hw(void *info) { u32 control; - u64 trfcr; struct etmv4_drvdata *drvdata = info; struct etmv4_config *config = &drvdata->config; struct coresight_device *csdev = drvdata->csdev; @@ -765,12 +804,7 @@ static void etm4_disable_hw(void *info) * If the CPU supports v8.4 Trace filter Control, * set the ETM to trace prohibited region. */ - if (drvdata->trfc) { - trfcr = read_sysreg_s(SYS_TRFCR_EL1); - write_sysreg_s(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE), - SYS_TRFCR_EL1); - isb(); - } + etm4x_prohibit_trace(drvdata); /* * Make sure everything completes before disabling, as recommended * by section 7.3.77 ("TRCVICTLR, ViewInst Main Control Register, @@ -786,9 +820,6 @@ static void etm4_disable_hw(void *info) if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) dev_err(etm_dev, "timeout while waiting for PM stable Trace Status\n"); - if (drvdata->trfc) - write_sysreg_s(trfcr, SYS_TRFCR_EL1); - /* read the status of the single shot comparators */ for (i = 0; i < drvdata->nr_ss_cmp; i++) { config->ss_status[i] = @@ -990,15 +1021,15 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata, return false; } -static void cpu_enable_tracing(struct etmv4_drvdata *drvdata) +static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata) { u64 dfr0 = read_sysreg(id_aa64dfr0_el1); u64 trfcr; + drvdata->trfcr = 0; if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT)) return; - drvdata->trfc = true; /* * If the CPU supports v8.4 SelfHosted Tracing, enable * tracing at the kernel EL and EL0, forcing to use the @@ -1012,7 +1043,7 @@ static void cpu_enable_tracing(struct etmv4_drvdata *drvdata) if (is_kernel_in_hyp_mode()) trfcr |= TRFCR_EL2_CX; - write_trfcr(trfcr); + drvdata->trfcr = trfcr; } static void etm4_init_arch_data(void *info) @@ -1203,7 +1234,7 @@ static