From d93277b9839b0bde06238a7a7f644114edb2ad4a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 May 2018 13:25:49 +0100 Subject: Revert "arm64: Increase the max granular size" This reverts commit 97303480753e48fb313dc0e15daaf11b0451cdb8. Commit 97303480753e ("arm64: Increase the max granular size") increased the cache line size to 128 to match Cavium ThunderX, apparently for some performance benefit which could not be confirmed. This change, however, has an impact on the network packet allocation in certain circumstances, requiring slightly over a 4K page with a significant performance degradation. The patch reverts L1_CACHE_SHIFT back to 6 (64-byte cache line). Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 9bbffc7a301f..1dd2c2db0010 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -33,7 +33,7 @@ #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 -#define L1_CACHE_SHIFT 7 +#define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* -- cgit v1.2.3 From ebc7e21e0fa28c46b938baed292c77e2d3ef8165 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 May 2018 13:33:12 +0100 Subject: arm64: Increase ARCH_DMA_MINALIGN to 128 This patch increases the ARCH_DMA_MINALIGN to 128 so that it covers the currently known Cache Writeback Granule (CTR_EL0.CWG) on arm64 and moves the fallback in cache_line_size() from L1_CACHE_BYTES to this constant. In addition, it warns (and taints) if the CWG is larger than ARCH_DMA_MINALIGN as this is not safe with non-coherent DMA. Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 9 ++------- arch/arm64/mm/dma-mapping.c | 5 +++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 1dd2c2db0010..5df5cfe1c143 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -43,7 +43,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifndef __ASSEMBLY__ @@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void) static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); - return cwg ? 4 << cwg : L1_CACHE_BYTES; + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9d1b06d67c53..fbee8c17a4e6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void) void __init setup_cpu_features(void) { u32 cwg; - int cls; setup_system_capabilities(); mark_const_caps_ready(); @@ -1627,13 +1626,9 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); - cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } static bool __maybe_unused diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..ed84432264de 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -504,6 +504,11 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; + WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), + TAINT_CPU_OUT_OF_SPEC, + "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + ARCH_DMA_MINALIGN, cache_line_size()); + return atomic_pool_init(); } arch_initcall(arm64_dma_init); -- cgit v1.2.3 From 5c636aa015c644a3889044270b98c33a8a87734d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 9 May 2018 16:46:26 +0900 Subject: arm64: remove no-op macro VMLINUX_SYMBOL() VMLINUX_SYMBOL() is no-op unless CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX is defined. It has ever been selected only by BLACKFIN and METAG. VMLINUX_SYMBOL() is unneeded for ARM64-specific code. Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 0221aca6493d..605d1b60469c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -34,25 +34,25 @@ jiffies = jiffies_64; * 4 KB (see related ASSERT() below) \ */ \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + __hyp_idmap_text_start = .; \ *(.hyp.idmap.text) \ - VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ - VMLINUX_SYMBOL(__hyp_text_start) = .; \ + __hyp_idmap_text_end = .; \ + __hyp_text_start = .; \ *(.hyp.text) \ - VMLINUX_SYMBOL(__hyp_text_end) = .; + __hyp_text_end = .; #define IDMAP_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__idmap_text_start) = .; \ + __idmap_text_start = .; \ *(.idmap.text) \ - VMLINUX_SYMBOL(__idmap_text_end) = .; + __idmap_text_end = .; #ifdef CONFIG_HIBERNATION #define HIBERNATE_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + __hibernate_exit_text_start = .; \ *(.hibernate_exit.text) \ - VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; + __hibernate_exit_text_end = .; #else #define HIBERNATE_TEXT #endif @@ -60,10 +60,10 @@ jiffies = jiffies_64; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_end) = .; + __entry_tramp_text_end = .; #else #define TRAMP_TEXT #endif -- cgit v1.2.3 From 92faa7bea3e7592673109e32c75d50f8ce6d5ec6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 13 Apr 2018 15:44:35 +0100 Subject: arm64: Remove duplicate include "make includecheck" detected few duplicated includes in arch/arm64. This patch removes the double inclusions. Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_mmu.h | 1 - arch/arm64/kernel/armv8_deprecated.c | 3 +-- arch/arm64/kernel/fpsimd.c | 1 - arch/arm64/kernel/ptrace.c | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 082110993647..f74987b76d91 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,7 +72,6 @@ #ifdef __ASSEMBLY__ #include -#include /* * Convert a kernel VA into a HYP VA. diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6e47fc3ab549..97d45d5151d4 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -20,8 +21,6 @@ #include #include #include -#include -#include #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 87a35364e750..3db8ed530e56 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7ff81fed46e1..f847285d96f3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1046,8 +1046,6 @@ static const struct user_regset_view user_aarch64_view = { }; #ifdef CONFIG_COMPAT -#include - enum compat_regset { REGSET_COMPAT_GPR, REGSET_COMPAT_VFP, -- cgit v1.2.3 From e75bef2a4fe259b779765a85589e92657d26fdc9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 24 Apr 2018 16:25:47 +0100 Subject: arm64: Select ARCH_HAS_FAST_MULTIPLIER It is probably safe to assume that all Armv8-A implementations have a multiplier whose efficiency is comparable or better than a sequence of three or so register-dependent arithmetic instructions. Select ARCH_HAS_FAST_MULTIPLIER to get ever-so-slightly nicer codegen in the few dusty old corners which care. In a contrived benchmark calling hweight64() in a loop, this does indeed turn out to be a small win overall, with no measurable impact on Cortex-A57 but about 5% performance improvement on Cortex-A53. Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb2cf4938f6d..9c850f3b398f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,6 +12,7 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA -- cgit v1.2.3 From 1cfc63b5ae60fe7e01773f38132f98d8b13a99a0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Apr 2018 13:56:32 +0100 Subject: arm64: cmpwait: Clear event register before arming exclusive monitor When waiting for a cacheline to change state in cmpwait, we may immediately wake-up the first time around the outer loop if the event register was already set (for example, because of the event stream). Avoid these spurious wakeups by explicitly clearing the event register before loading the cacheline and setting the exclusive monitor. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cmpxchg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 4f5fd2a36e6e..3b0938281541 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -204,7 +204,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ - " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " sevl\n" \ + " wfe\n" \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ -- cgit v1.2.3 From d529a18a61f3f497328f096ddf757af928d6105b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:56 -0500 Subject: drivers: base: cacheinfo: move cache_setup_of_node() In preparation for the next patch, and to aid in review of that patch, lets move cache_setup_of_node further down in the module without any changes. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 80 ++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index edf726267282..09ccef7ddc99 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) } #ifdef CONFIG_OF -static int cache_setup_of_node(unsigned int cpu) -{ - struct device_node *np; - struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - unsigned int index = 0; - - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) - return 0; - - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; - if (!np) { - pr_err("Failed to find cpu%d device node\n", cpu); - return -ENOENT; - } - - while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; - if (this_leaf->level != 1) - np = of_find_next_cache_node(np); - else - np = of_node_get(np);/* cpu node itself */ - if (!np) - break; - this_leaf->of_node = np; - index++; - } - - if (index != cache_leaves(cpu)) /* not all OF nodes populated */ - return -ENOENT; - - return 0; -} - static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { @@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu) cache_associativity(this_leaf); } } + +static int cache_setup_of_node(unsigned int cpu) +{ + struct device_node *np; + struct cacheinfo *this_leaf; + struct device *cpu_dev = get_cpu_device(cpu); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int index = 0; + + /* skip if of_node is already populated */ + if (this_cpu_ci->info_list->of_node) + return 0; + + if (!cpu_dev) { + pr_err("No cpu device for CPU %d\n", cpu); + return -ENODEV; + } + np = cpu_dev->of_node; + if (!np) { + pr_err("Failed to find cpu%d device node\n", cpu); + return -ENOENT; + } + + while (index < cache_leaves(cpu)) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level != 1) + np = of_find_next_cache_node(np); + else + np = of_node_get(np);/* cpu node itself */ + if (!np) + break; + this_leaf->of_node = np; + index++; + } + + if (index != cache_leaves(cpu)) /* not all OF nodes populated */ + return -ENOENT; + + return 0; +} #else static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } -- cgit v1.2.3 From 2ff075c7dfd4705de12d687daede2dd664386b1c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:57 -0500 Subject: drivers: base: cacheinfo: setup DT cache properties early The original intent in cacheinfo was that an architecture specific populate_cache_leaves() would probe the hardware and then cache_shared_cpu_map_setup() and cache_override_properties() would provide firmware help to extend/expand upon what was probed. Arm64 was really the only architecture that was working this way, and with the removal of most of the hardware probing logic it became clear that it was possible to simplify the logic a bit. This patch combines the walk of the DT nodes with the code updating the cache size/line_size and nr_sets. cache_override_properties() (which was DT specific) is then removed. The result is that cacheinfo.of_node is no longer used as a temporary place to hold DT references for future calls that update cache properties. That change helps to clarify its one remaining use (matching cacheinfo nodes that represent shared caches) which will be used by the ACPI/PPTT code in the following patches. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/riscv/kernel/cacheinfo.c | 1 - drivers/base/cacheinfo.c | 65 +++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 10ed2749e246..0bc86e5f8f3f 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, struct device_node *node, enum cache_type type, unsigned int level) { - this_leaf->of_node = node; this_leaf->level = level; this_leaf->type = type; /* not a sector cache */ diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 09ccef7ddc99..a872523e8951 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type) return type; } -static void cache_size(struct cacheinfo *this_leaf) +static void cache_size(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *cache_size; @@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].size_prop; - cache_size = of_get_property(this_leaf->of_node, propname, NULL); + cache_size = of_get_property(np, propname, NULL); if (cache_size) this_leaf->size = of_read_number(cache_size, 1); } /* not cache_line_size() because that's a macro in include/linux/cache.h */ -static void cache_get_line_size(struct cacheinfo *this_leaf) +static void cache_get_line_size(struct cacheinfo *this_leaf, + struct device_node *np) { const __be32 *line_size; int i, lim, ct_idx; @@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) const char *propname; propname = cache_type_info[ct_idx].line_size_props[i]; - line_size = of_get_property(this_leaf->of_node, propname, NULL); + line_size = of_get_property(np, propname, NULL); if (line_size) break; } @@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) this_leaf->coherency_line_size = of_read_number(line_size, 1); } -static void cache_nr_sets(struct cacheinfo *this_leaf) +static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *nr_sets; @@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].nr_sets_prop; - nr_sets = of_get_property(this_leaf->of_node, propname, NULL); + nr_sets = of_get_property(np, propname, NULL); if (nr_sets) this_leaf->number_of_sets = of_read_number(nr_sets, 1); } @@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } -static bool cache_node_is_unified(struct cacheinfo *this_leaf) +static bool cache_node_is_unified(struct cacheinfo *this_leaf, + struct device_node *np) { - return of_property_read_bool(this_leaf->of_node, "cache-unified"); + return of_property_read_bool(np, "cache-unified"); } -static void cache_of_override_properties(unsigned int cpu) +static void cache_of_set_props(struct cacheinfo *this_leaf, + struct device_node *np) { - int index; - struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - - for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; - /* - * init_cache_level must setup the cache level correctly - * overriding the architecturally specified levels, so - * if type is NONE at this stage, it should be unified - */ - if (this_leaf->type == CACHE_TYPE_NOCACHE && - cache_node_is_unified(this_leaf)) - this_leaf->type = CACHE_TYPE_UNIFIED; - cache_size(this_leaf); - cache_get_line_size(this_leaf); - cache_nr_sets(this_leaf); - cache_associativity(this_leaf); - } + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf, np)) + this_leaf->type = CACHE_TYPE_UNIFIED; + cache_size(this_leaf, np); + cache_get_line_size(this_leaf, np); + cache_nr_sets(this_leaf, np); + cache_associativity(this_leaf); } static int cache_setup_of_node(unsigned int cpu) @@ -193,6 +189,7 @@ static int cache_setup_of_node(unsigned int cpu) np = of_node_get(np);/* cpu node itself */ if (!np) break; + cache_of_set_props(this_leaf, np); this_leaf->of_node = np; index++; } @@ -203,7 +200,6 @@ static int cache_setup_of_node(unsigned int cpu) return 0; } #else -static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) @@ -286,12 +282,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) } } -static void cache_override_properties(unsigned int cpu) -{ - if (of_have_populated_dt()) - return cache_of_override_properties(cpu); -} - static void free_cache_attributes(unsigned int cpu) { if (!per_cpu_cacheinfo(cpu)) @@ -325,6 +315,10 @@ static int detect_cache_attributes(unsigned int cpu) if (per_cpu_cacheinfo(cpu) == NULL) return -ENOMEM; + /* + * populate_cache_leaves() may completely setup the cache leaves and + * shared_cpu_map or it may leave it partially setup. + */ ret = populate_cache_leaves(cpu); if (ret) goto free_ci; @@ -338,7 +332,6 @@ static int detect_cache_attributes(unsigned int cpu) goto free_ci; } - cache_override_properties(cpu); return 0; free_ci: -- cgit v1.2.3 From 9b97387c5c4260ffcdf3b913bdef0d98cb2d4a74 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:58 -0500 Subject: cacheinfo: rename of_node to fw_token Rename and change the type of of_node to indicate it is a generic pointer which is generally only used for comparison purposes. In a later patch we will put an ACPI/PPTT token pointer in fw_token so that the code which builds the shared cpu masks can be reused. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 16 +++++++++------- include/linux/cacheinfo.h | 8 +++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index a872523e8951..597aacb233fc 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -35,7 +35,7 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { - return sib_leaf->of_node == this_leaf->of_node; + return sib_leaf->fw_token == this_leaf->fw_token; } /* OF properties to query for a given cache type */ @@ -167,9 +167,10 @@ static int cache_setup_of_node(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) + /* skip if fw_token is already populated */ + if (this_cpu_ci->info_list->fw_token) { return 0; + } if (!cpu_dev) { pr_err("No cpu device for CPU %d\n", cpu); @@ -190,7 +191,7 @@ static int cache_setup_of_node(unsigned int cpu) if (!np) break; cache_of_set_props(this_leaf, np); - this_leaf->of_node = np; + this_leaf->fw_token = np; index++; } @@ -278,7 +279,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->of_node); + of_node_put(this_leaf->fw_token); } } @@ -323,8 +324,9 @@ static int detect_cache_attributes(unsigned int cpu) if (ret) goto free_ci; /* - * For systems using DT for cache hierarchy, of_node and shared_cpu_map - * will be set up here only if they are not populated already + * For systems using DT for cache hierarchy, fw_token + * and shared_cpu_map will be set up here only if they are + * not populated already */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 3d9805297cda..0c6f658054d2 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -34,9 +34,8 @@ enum cache_type { * @shared_cpu_map: logical cpumask representing all the cpus sharing * this cache node * @attributes: bitfield representing various cache attributes - * @of_node: if devicetree is used, this represents either the cpu node in - * case there's no explicit cache node or the cache node itself in the - * device tree + * @fw_token: Unique value used to determine if different cacheinfo + * structures represent a single hardware cache instance. * @disable_sysfs: indicates whether this node is visible to the user via * sysfs or not * @priv: pointer to any private data structure specific to particular @@ -65,8 +64,7 @@ struct cacheinfo { #define CACHE_ALLOCATE_POLICY_MASK \ (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) #define CACHE_ID BIT(4) - - struct device_node *of_node; + void *fw_token; bool disable_sysfs; void *priv; }; -- cgit v1.2.3 From 30d87bfacbee396646975a00959764a7c49510ec Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:59 -0500 Subject: arm64/acpi: Create arch specific cpu to acpi id helper Its helpful to be able to lookup the acpi_processor_id associated with a logical cpu. Provide an arm64 helper to do this. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Ard Biesheuvel Acked-by: Sudeep Holla Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 32f465a80e4e..0db62a4cbce2 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void) } struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu); +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_cpu_get_madt_gicc(cpu)->uid; +} static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); -- cgit v1.2.3 From 2bd00bcd73e5edd5769e2a5f24c59a517582d862 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:00 -0500 Subject: ACPI/PPTT: Add Processor Properties Topology Table parsing ACPI 6.2 adds a new table, which describes how processing units are related to each other in tree like fashion. Caches are also sprinkled throughout the tree and describe the properties of the caches in relation to other caches and processing units. Add the code to parse the cache hierarchy and report the total number of levels of cache for a given core using acpi_find_last_cache_level() as well as fill out the individual cores cache information with cache_setup_acpi() once the cpu_cacheinfo structure has been populated by the arch specific code. An additional patch later in the set adds the ability to report peers in the topology using find_acpi_cpu_topology() to report a unique ID for each processing unit at a given level in the tree. These unique id's can then be used to match related processing units which exist as threads, within a given package, etc. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 655 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 4 + 2 files changed, 659 insertions(+) create mode 100644 drivers/acpi/pptt.c diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c new file mode 100644 index 000000000000..e5ea1974d1e3 --- /dev/null +++ b/drivers/acpi/pptt.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * pptt.c - parsing of Processor Properties Topology Table (PPTT) + * + * Copyright (C) 2018, ARM + * + * This file implements parsing of the Processor Properties Topology Table + * which is optionally used to describe the processor and cache topology. + * Due to the relative pointers used throughout the table, this doesn't + * leverage the existing subtable parsing in the kernel. + * + * The PPTT structure is an inverted tree, with each node potentially + * holding one or two inverted tree data structures describing + * the caches available at that level. Each cache structure optionally + * contains properties describing the cache at a given level which can be + * used to override hardware probed values. + */ +#define pr_fmt(fmt) "ACPI PPTT: " fmt + +#include +#include +#include + +static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + struct acpi_subtable_header *entry; + + /* there isn't a subtable at reference 0 */ + if (pptt_ref < sizeof(struct acpi_subtable_header)) + return NULL; + + if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length) + return NULL; + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref); + + if (entry->length == 0) + return NULL; + + if (pptt_ref + entry->length > table_hdr->length) + return NULL; + + return entry; +} + +static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node, + int resource) +{ + u32 *ref; + + if (resource >= node->number_of_priv_resources) + return NULL; + + ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor)); + ref += resource; + + return fetch_pptt_subtable(table_hdr, *ref); +} + +static inline bool acpi_pptt_match_type(int table_type, int type) +{ + return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type || + table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type); +} + +/** + * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache + * @table_hdr: Pointer to the head of the PPTT table + * @local_level: passed res reflects this cache level + * @res: cache resource in the PPTT we want to walk + * @found: returns a pointer to the requested level if found + * @level: the requested cache level + * @type: the requested cache type + * + * Attempt to find a given cache level, while counting the max number + * of cache levels for the cache node. + * + * Given a pptt resource, verify that it is a cache node, then walk + * down each level of caches, counting how many levels are found + * as well as checking the cache type (icache, dcache, unified). If a + * level & type match, then we set found, and continue the search. + * Once the entire cache branch has been walked return its max + * depth. + * + * Return: The cache structure and the level we terminated with. + */ +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr, + int local_level, + struct acpi_subtable_header *res, + struct acpi_pptt_cache **found, + int level, int type) +{ + struct acpi_pptt_cache *cache; + + if (res->type != ACPI_PPTT_TYPE_CACHE) + return 0; + + cache = (struct acpi_pptt_cache *) res; + while (cache) { + local_level++; + + if (local_level == level && + cache->flags & ACPI_PPTT_CACHE_TYPE_VALID && + acpi_pptt_match_type(cache->attributes, type)) { + if (*found != NULL && cache != *found) + pr_warn("Found duplicate cache level/type unable to determine uniqueness\n"); + + pr_debug("Found cache @ level %d\n", level); + *found = cache; + /* + * continue looking at this node's resource list + * to verify that we don't find a duplicate + * cache node. + */ + } + cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache); + } + return local_level; +} + +static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + int *starting_level, int level, + int type) +{ + struct acpi_subtable_header *res; + int number_of_levels = *starting_level; + int resource = 0; + struct acpi_pptt_cache *ret = NULL; + int local_level; + + /* walk down from processor node */ + while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) { + resource++; + + local_level = acpi_pptt_walk_cache(table_hdr, *starting_level, + res, &ret, level, type); + /* + * we are looking for the max depth. Since its potentially + * possible for a given node to have resources with differing + * depths verify that the depth we have found is the largest. + */ + if (number_of_levels < local_level) + number_of_levels = local_level; + } + if (number_of_levels > *starting_level) + *starting_level = number_of_levels; + + return ret; +} + +/** + * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches + * @table_hdr: Pointer to the head of the PPTT table + * @cpu_node: processor node we wish to count caches for + * + * Given a processor node containing a processing unit, walk into it and count + * how many levels exist solely for it, and then walk up each level until we hit + * the root node (ignore the package level because it may be possible to have + * caches that exist across packages). Count the number of cache levels that + * exist at each level on the way up. + * + * Return: Total number of levels found. + */ +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node) +{ + int total_levels = 0; + + do { + acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0); + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } while (cpu_node); + + return total_levels; +} + +/** + * acpi_pptt_leaf_node() - Given a processor node, determine if its a leaf + * @table_hdr: Pointer to the head of the PPTT table + * @node: passed node is checked to see if its a leaf + * + * Determine if the *node parameter is a leaf node by iterating the + * PPTT table, looking for nodes which reference it. + * + * Return: 0 if we find a node referencing the passed node (or table error), + * or 1 if we don't. + */ +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + u32 node_entry; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + node_entry = ACPI_PTR_DIFF(node, table_hdr); + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + cpu_node->parent == node_entry) + return 0; + if (entry->length == 0) + return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + + } + return 1; +} + +/** + * acpi_find_processor_node() - Given a PPTT table find the requested processor + * @table_hdr: Pointer to the head of the PPTT table + * @acpi_cpu_id: cpu we are searching for + * + * Find the subtable entry describing the provided processor. + * This is done by iterating the PPTT table looking for processor nodes + * which have an acpi_processor_id that matches the acpi_cpu_id parameter + * passed into the function. If we find a node that matches this criteria + * we verify that its a leaf node in the topology rather than depending + * on the valid flag, which doesn't need to be set for leaf nodes. + * + * Return: NULL, or the processors acpi_pptt_processor* + */ +static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + /* find the processor structure associated with this cpuid */ + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + + if (entry->length == 0) { + pr_warn("Invalid zero length subtable\n"); + break; + } + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + acpi_cpu_id == cpu_node->acpi_processor_id && + acpi_pptt_leaf_node(table_hdr, cpu_node)) { + return (struct acpi_pptt_processor *)entry; + } + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + } + + return NULL; +} + +static int acpi_find_cache_levels(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + int number_of_levels = 0; + struct acpi_pptt_processor *cpu; + + cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id); + if (cpu) + number_of_levels = acpi_count_levels(table_hdr, cpu); + + return number_of_levels; +} + +static u8 acpi_cache_type(enum cache_type type) +{ + switch (type) { + case CACHE_TYPE_DATA: + pr_debug("Looking for data cache\n"); + return ACPI_PPTT_CACHE_TYPE_DATA; + case CACHE_TYPE_INST: + pr_debug("Looking for instruction cache\n"); + return ACPI_PPTT_CACHE_TYPE_INSTR; + default: + case CACHE_TYPE_UNIFIED: + pr_debug("Looking for unified cache\n"); + /* + * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED + * contains the bit pattern that will match both + * ACPI unified bit patterns because we use it later + * to match both cases. + */ + return ACPI_PPTT_CACHE_TYPE_UNIFIED; + } +} + +static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id, + enum cache_type type, + unsigned int level, + struct acpi_pptt_processor **node) +{ + int total_levels = 0; + struct acpi_pptt_cache *found = NULL; + struct acpi_pptt_processor *cpu_node; + u8 acpi_type = acpi_cache_type(type); + + pr_debug("Looking for CPU %d's level %d cache type %d\n", + acpi_cpu_id, level, acpi_type); + + cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id); + + while (cpu_node && !found) { + found = acpi_find_cache_level(table_hdr, cpu_node, + &total_levels, level, acpi_type); + *node = cpu_node; + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + + return found; +} + +/* total number of attributes checked by the properties code */ +#define PPTT_CHECKED_ATTRIBUTES 4 + +/** + * update_cache_properties() - Update cacheinfo for the given processor + * @this_leaf: Kernel cache info structure being updated + * @found_cache: The PPTT node describing this cache instance + * @cpu_node: A unique reference to describe this cache instance + * + * The ACPI spec implies that the fields in the cache structures are used to + * extend and correct the information probed from the hardware. Lets only + * set fields that we determine are VALID. + * + * Return: nothing. Side effect of updating the global cacheinfo + */ +static void update_cache_properties(struct cacheinfo *this_leaf, + struct acpi_pptt_cache *found_cache, + struct acpi_pptt_processor *cpu_node) +{ + int valid_flags = 0; + + this_leaf->fw_token = cpu_node; + if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) { + this_leaf->size = found_cache->size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) { + this_leaf->coherency_line_size = found_cache->line_size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) { + this_leaf->number_of_sets = found_cache->number_of_sets; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) { + this_leaf->ways_of_associativity = found_cache->associativity; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) { + case ACPI_PPTT_CACHE_POLICY_WT: + this_leaf->attributes = CACHE_WRITE_THROUGH; + break; + case ACPI_PPTT_CACHE_POLICY_WB: + this_leaf->attributes = CACHE_WRITE_BACK; + break; + } + } + if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) { + case ACPI_PPTT_CACHE_READ_ALLOCATE: + this_leaf->attributes |= CACHE_READ_ALLOCATE; + break; + case ACPI_PPTT_CACHE_WRITE_ALLOCATE: + this_leaf->attributes |= CACHE_WRITE_ALLOCATE; + break; + case ACPI_PPTT_CACHE_RW_ALLOCATE: + case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT: + this_leaf->attributes |= + CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE; + break; + } + } + /* + * If the above flags are valid, and the cache type is NOCACHE + * update the cache type as well. + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + valid_flags == PPTT_CHECKED_ATTRIBUTES) + this_leaf->type = CACHE_TYPE_UNIFIED; +} + +static void cache_setup_acpi_cpu(struct acpi_table_header *table, + unsigned int cpu) +{ + struct acpi_pptt_cache *found_cache; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct cacheinfo *this_leaf; + unsigned int index = 0; + struct acpi_pptt_processor *cpu_node = NULL; + + while (index < get_cpu_cacheinfo(cpu)->num_leaves) { + this_leaf = this_cpu_ci->info_list + index; + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + this_leaf->type, + this_leaf->level, + &cpu_node); + pr_debug("found = %p %p\n", found_cache, cpu_node); + if (found_cache) + update_cache_properties(this_leaf, + found_cache, + cpu_node); + + index++; + } +} + +/* Passing level values greater than this will result in search termination */ +#define PPTT_ABORT_PACKAGE 0xFF + +static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu, + int level, int flag) +{ + struct acpi_pptt_processor *prev_node; + + while (cpu && level) { + if (cpu->flags & flag) + break; + pr_debug("level %d\n", level); + prev_node = fetch_pptt_node(table_hdr, cpu->parent); + if (prev_node == NULL) + break; + cpu = prev_node; + level--; + } + return cpu; +} + +/** + * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature + * @table: Pointer to the head of the PPTT table + * @cpu: Kernel logical cpu number + * @level: A level that terminates the search + * @flag: A flag which terminates the search + * + * Get a unique value given a cpu, and a topology level, that can be + * matched to determine which cpus share common topological features + * at that level. + * + * Return: Unique value, or -ENOENT if unable to locate cpu + */ +static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, + unsigned int cpu, int level, int flag) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (cpu_node) { + cpu_node = acpi_find_processor_package_id(table, cpu_node, + level, flag); + /* Only the first level has a guaranteed id */ + if (level == 0) + return cpu_node->acpi_processor_id; + return ACPI_PTR_DIFF(cpu_node, table); + } + pr_warn_once("PPTT table found, but unable to locate core %d (%d)\n", + cpu, acpi_cpu_id); + return -ENOENT; +} + +static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag) +{ + struct acpi_table_header *table; + acpi_status status; + int retval; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cpu topology may be inaccurate\n"); + return -ENOENT; + } + retval = topology_get_acpi_cpu_tag(table, cpu, level, flag); + pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n", + cpu, level, retval); + acpi_put_table(table); + + return retval; +} + +/** + * acpi_find_last_cache_level() - Determines the number of cache levels for a PE + * @cpu: Kernel logical cpu number + * + * Given a logical cpu number, returns the number of levels of cache represented + * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0 + * indicating we didn't find any cache levels. + * + * Return: Cache levels visible to this core. + */ +int acpi_find_last_cache_level(unsigned int cpu) +{ + u32 acpi_cpu_id; + struct acpi_table_header *table; + int number_of_levels = 0; + acpi_status status; + + pr_debug("Cache Setup find last level cpu=%d\n", cpu); + + acpi_cpu_id = get_acpi_id_for_cpu(cpu); + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + } else { + number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id); + acpi_put_table(table); + } + pr_debug("Cache Setup find last level level=%d\n", number_of_levels); + + return number_of_levels; +} + +/** + * cache_setup_acpi() - Override CPU cache topology with data from the PPTT + * @cpu: Kernel logical cpu number + * + * Updates the global cache info provided by cpu_get_cacheinfo() + * when there are valid properties in the acpi_pptt_cache nodes. A + * successful parse may not result in any updates if none of the + * cache levels have any valid flags set. Futher, a unique value is + * associated with each known CPU cache entry. This unique value + * can be used to determine whether caches are shared between cpus. + * + * Return: -ENOENT on failure to find table, or 0 on success + */ +int cache_setup_acpi(unsigned int cpu) +{ + struct acpi_table_header *table; + acpi_status status; + + pr_debug("Cache Setup ACPI cpu %d\n", cpu); + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + return -ENOENT; + } + + cache_setup_acpi_cpu(table, cpu); + acpi_put_table(table); + + return status; +} + +/** + * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu + * @cpu: Kernel logical cpu number + * @level: The topological level for which we would like a unique ID + * + * Determine a topology unique ID for each thread/core/cluster/mc_grouping + * /socket/etc. This ID can then be used to group peers, which will have + * matching ids. + * + * The search terminates when either the requested level is found or + * we reach a root node. Levels beyond the termination point will return the + * same unique ID. The unique id for level 0 is the acpi processor id. All + * other levels beyond this use a generated value to uniquely identify + * a topological feature. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_topology(unsigned int cpu, int level) +{ + return find_acpi_cpu_topology_tag(cpu, level, 0); +} + +/** + * find_acpi_cpu_cache_topology() - Determine a unique cache topology value + * @cpu: Kernel logical cpu number + * @level: The cache level for which we would like a unique ID + * + * Determine a unique ID for each unified cache in the system + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_cache_topology(unsigned int cpu, int level) +{ + struct acpi_table_header *table; + struct acpi_pptt_cache *found_cache; + acpi_status status; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_processor *cpu_node = NULL; + int ret = -1; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, topology may be inaccurate\n"); + return -ENOENT; + } + + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + CACHE_TYPE_UNIFIED, + level, + &cpu_node); + if (found_cache) + ret = ACPI_PTR_DIFF(cpu_node, table); + + acpi_put_table(table); + + return ret; +} + + +/** + * find_acpi_cpu_topology_package() - Determine a unique cpu package value + * @cpu: Kernel logical cpu number + * + * Determine a topology unique package ID for the given cpu. + * This ID can then be used to group peers, which will have matching ids. + * + * The search terminates when either a level is found with the PHYSICAL_PACKAGE + * flag set or we reach a root node. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents the package for this cpu. + */ +int find_acpi_cpu_topology_package(unsigned int cpu) +{ + return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, + ACPI_PPTT_PHYSICAL_PACKAGE); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 15bfb15c2fa5..032e12a2fdc2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1297,4 +1297,8 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +int find_acpi_cpu_topology(unsigned int cpu, int level); +int find_acpi_cpu_topology_package(unsigned int cpu); +int find_acpi_cpu_cache_topology(unsigned int cpu, int level); + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 0ce82232232a2f76128e9bfcc6e8b662e110a671 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:01 -0500 Subject: ACPI: Enable PPTT support on ARM64 Now that we have a PPTT parser, in preparation for its use on arm64, lets build it. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + drivers/acpi/Kconfig | 3 +++ drivers/acpi/Makefile | 1 + 3 files changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9c850f3b398f..4d98774cf3c7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI + select ACPI_PPTT if ACPI select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 516d7b36d6fb..b533eeb6139d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -547,6 +547,9 @@ config ACPI_CONFIGFS if ARM64 source "drivers/acpi/arm64/Kconfig" + +config ACPI_PPTT + bool endif config TPS68470_PMIC_OPREGION diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 48e202752754..6d59aa109a91 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_ACPI_BGRT) += bgrt.o obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o +obj-$(CONFIG_ACPI_PPTT) += pptt.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o -- cgit v1.2.3 From 582b468bdc6d9c287a432a63225cf7922e985e15 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:02 -0500 Subject: drivers: base cacheinfo: Add support for ACPI based firmware tables Call ACPI cache parsing routines from base cacheinfo code if ACPI is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level so that individual architectures can enable ACPI topology parsing. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 14 ++++++++++---- include/linux/cacheinfo.h | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 597aacb233fc..2880e2ab01f5 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { /* - * For non-DT systems, assume unique level 1 cache, system-wide + * For non-DT/ACPI systems, assume unique level 1 caches, system-wide * shared caches for all other levels. This will be used only if * arch specific code has not populated shared_cpu_map */ @@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, } #endif +int __weak cache_setup_acpi(unsigned int cpu) +{ + return -ENOTSUPP; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (of_have_populated_dt()) ret = cache_setup_of_node(cpu); else if (!acpi_disabled) - /* No cache property/hierarchy support yet in ACPI */ - ret = -ENOTSUPP; + ret = cache_setup_acpi(cpu); + if (ret) return ret; @@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->fw_token); + if (of_have_populated_dt()) + of_node_put(this_leaf->fw_token); } } diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 0c6f658054d2..89397e30e269 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -97,6 +97,23 @@ int func(unsigned int cpu) \ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); +int cache_setup_acpi(unsigned int cpu); +#ifndef CONFIG_ACPI +/* + * acpi_find_last_cache_level is only called on ACPI enabled + * platforms using the PPTT for topology. This means that if + * the platform supports other firmware configuration methods + * we need to stub out the call when ACPI is disabled. + * ACPI enabled platforms not using PPTT won't be making calls + * to this function so we need not worry about them. + */ +static inline int acpi_find_last_cache_level(unsigned int cpu) +{ + return 0; +} +#else +int acpi_find_last_cache_level(unsigned int cpu); +#endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); -- cgit v1.2.3 From 8571890e1513bc6768495b6541fb8064e046a61c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:03 -0500 Subject: arm64: Add support for ACPI based firmware tables The /sys cache entries should support ACPI/PPTT generated cache topology information. For arm64, if ACPI is enabled, determine the max number of cache levels and populate them using the PPTT table if one is available. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cacheinfo.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 380f2e2fbed5..0bf0a835122f 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, static int __init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, of_level; + unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu) leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; } - of_level = of_find_last_cache_level(cpu); - if (level < of_level) { + if (acpi_disabled) + fw_level = of_find_last_cache_level(cpu); + else + fw_level = acpi_find_last_cache_level(cpu); + + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 * the information may be available in the device tree * only unified external caches are considered here */ - leaves += (of_level - level); - level = of_level; + leaves += (fw_level - level); + level = fw_level; } this_cpu_ci->num_levels = level; -- cgit v1.2.3 From 868abc07680c2c8b7f85ae883f9f1b90bf4ef4bf Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:04 -0500 Subject: arm64: topology: rename cluster_id The cluster concept isn't architecturally defined for arm64. Lets match the name of the arm64 topology field to the kernel macro that uses it. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Morten Rasmussen Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/topology.h | 4 ++-- arch/arm64/kernel/topology.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index c4f2d50491eb..6b10459e6905 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -7,14 +7,14 @@ struct cpu_topology { int thread_id; int core_id; - int cluster_id; + int package_id; cpumask_t thread_sibling; cpumask_t core_sibling; }; extern struct cpu_topology cpu_topology[NR_CPUS]; -#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) +#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 21868530018e..dc18b1e53194 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -47,7 +47,7 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } -static int __init parse_core(struct device_node *core, int cluster_id, +static int __init parse_core(struct device_node *core, int package_id, int core_id) { char name[10]; @@ -63,7 +63,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, leaf = false; cpu = get_cpu_for_node(t); if (cpu >= 0) { - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { @@ -85,7 +85,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, return -EINVAL; } - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -101,7 +101,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int cluster_id __initdata; + static int package_id __initdata; int core_id = 0; int i, ret; @@ -140,7 +140,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, cluster_id, core_id++); + ret = parse_core(c, package_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -158,7 +158,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) pr_warn("%pOF: empty cluster\n", cluster); if (leaf) - cluster_id++; + package_id++; return 0; } @@ -194,7 +194,7 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].cluster_id == -1) + if (cpu_topology[cpu].package_id == -1) ret = -EINVAL; out_map: @@ -224,7 +224,7 @@ static void update_siblings_masks(unsigned int cpuid) for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + if (cpuid_topo->package_id != cpu_topo->package_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); @@ -245,7 +245,7 @@ void store_cpu_topology(unsigned int cpuid) struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; u64 mpidr; - if (cpuid_topo->cluster_id != -1) + if (cpuid_topo->package_id != -1) goto topology_populated; mpidr = read_cpuid_mpidr(); @@ -259,19 +259,19 @@ void store_cpu_topology(unsigned int cpuid) /* Multiprocessor system : Multi-threads per core */ cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; } else { /* Multiprocessor system : Single-thread per core */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | + cpuid_topo->package_id = MPIDR_AFFIN