From d93277b9839b0bde06238a7a7f644114edb2ad4a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 11 May 2018 13:25:49 +0100
Subject: Revert "arm64: Increase the max granular size"

This reverts commit 97303480753e48fb313dc0e15daaf11b0451cdb8.

Commit 97303480753e ("arm64: Increase the max granular size") increased
the cache line size to 128 to match Cavium ThunderX, apparently for some
performance benefit which could not be confirmed. This change, however,
has an impact on the network packet allocation in certain circumstances,
requiring slightly over a 4K page with a significant performance
degradation. The patch reverts L1_CACHE_SHIFT back to 6 (64-byte cache
line).

Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 9bbffc7a301f..1dd2c2db0010 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -33,7 +33,7 @@
 #define ICACHE_POLICY_VIPT	2
 #define ICACHE_POLICY_PIPT	3
 
-#define L1_CACHE_SHIFT		7
+#define L1_CACHE_SHIFT		(6)
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
 /*
-- 
cgit v1.2.3


From ebc7e21e0fa28c46b938baed292c77e2d3ef8165 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 11 May 2018 13:33:12 +0100
Subject: arm64: Increase ARCH_DMA_MINALIGN to 128

This patch increases the ARCH_DMA_MINALIGN to 128 so that it covers the
currently known Cache Writeback Granule (CTR_EL0.CWG) on arm64 and moves
the fallback in cache_line_size() from L1_CACHE_BYTES to this constant.
In addition, it warns (and taints) if the CWG is larger than
ARCH_DMA_MINALIGN as this is not safe with non-coherent DMA.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cache.h | 4 ++--
 arch/arm64/kernel/cpufeature.c | 9 ++-------
 arch/arm64/mm/dma-mapping.c    | 5 +++++
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 1dd2c2db0010..5df5cfe1c143 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -43,7 +43,7 @@
  * cache before the transfer is done, causing old data to be seen by
  * the CPU.
  */
-#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	(128)
 
 #ifndef __ASSEMBLY__
 
@@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void)
 static inline int cache_line_size(void)
 {
 	u32 cwg = cache_type_cwg();
-	return cwg ? 4 << cwg : L1_CACHE_BYTES;
+	return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
 }
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9d1b06d67c53..fbee8c17a4e6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void)
 void __init setup_cpu_features(void)
 {
 	u32 cwg;
-	int cls;
 
 	setup_system_capabilities();
 	mark_const_caps_ready();
@@ -1627,13 +1626,9 @@ void __init setup_cpu_features(void)
 	 * Check for sane CTR_EL0.CWG value.
 	 */
 	cwg = cache_type_cwg();
-	cls = cache_line_size();
 	if (!cwg)
-		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
-			cls);
-	if (L1_CACHE_BYTES < cls)
-		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
-			L1_CACHE_BYTES, cls);
+		pr_warn("No Cache Writeback Granule information, assuming %d\n",
+			ARCH_DMA_MINALIGN);
 }
 
 static bool __maybe_unused
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec0181818..ed84432264de 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -504,6 +504,11 @@ static int __init arm64_dma_init(void)
 	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
 		swiotlb = 1;
 
+	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
+		   TAINT_CPU_OUT_OF_SPEC,
+		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+		   ARCH_DMA_MINALIGN, cache_line_size());
+
 	return atomic_pool_init();
 }
 arch_initcall(arm64_dma_init);
-- 
cgit v1.2.3


From 5c636aa015c644a3889044270b98c33a8a87734d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 9 May 2018 16:46:26 +0900
Subject: arm64: remove no-op macro VMLINUX_SYMBOL()

VMLINUX_SYMBOL() is no-op unless CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
is defined.  It has ever been selected only by BLACKFIN and METAG.
VMLINUX_SYMBOL() is unneeded for ARM64-specific code.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 0221aca6493d..605d1b60469c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -34,25 +34,25 @@ jiffies = jiffies_64;
 	 *    4 KB (see related ASSERT() below)		\
 	 */						\
 	. = ALIGN(SZ_4K);				\
-	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	__hyp_idmap_text_start = .;			\
 	*(.hyp.idmap.text)				\
-	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
-	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	__hyp_idmap_text_end = .;			\
+	__hyp_text_start = .;				\
 	*(.hyp.text)					\
-	VMLINUX_SYMBOL(__hyp_text_end) = .;
+	__hyp_text_end = .;
 
 #define IDMAP_TEXT					\
 	. = ALIGN(SZ_4K);				\
-	VMLINUX_SYMBOL(__idmap_text_start) = .;		\
+	__idmap_text_start = .;				\
 	*(.idmap.text)					\
-	VMLINUX_SYMBOL(__idmap_text_end) = .;
+	__idmap_text_end = .;
 
 #ifdef CONFIG_HIBERNATION
 #define HIBERNATE_TEXT					\
 	. = ALIGN(SZ_4K);				\
-	VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+	__hibernate_exit_text_start = .;		\
 	*(.hibernate_exit.text)				\
-	VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+	__hibernate_exit_text_end = .;
 #else
 #define HIBERNATE_TEXT
 #endif
@@ -60,10 +60,10 @@ jiffies = jiffies_64;
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 #define TRAMP_TEXT					\
 	. = ALIGN(PAGE_SIZE);				\
-	VMLINUX_SYMBOL(__entry_tramp_text_start) = .;	\
+	__entry_tramp_text_start = .;			\
 	*(.entry.tramp.text)				\
 	. = ALIGN(PAGE_SIZE);				\
-	VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
+	__entry_tramp_text_end = .;
 #else
 #define TRAMP_TEXT
 #endif
-- 
cgit v1.2.3


From 92faa7bea3e7592673109e32c75d50f8ce6d5ec6 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 13 Apr 2018 15:44:35 +0100
Subject: arm64: Remove duplicate include

"make includecheck" detected few duplicated includes in arch/arm64.

This patch removes the double inclusions.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kvm_mmu.h     | 1 -
 arch/arm64/kernel/armv8_deprecated.c | 3 +--
 arch/arm64/kernel/fpsimd.c           | 1 -
 arch/arm64/kernel/ptrace.c           | 2 --
 4 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 082110993647..f74987b76d91 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -72,7 +72,6 @@
 #ifdef __ASSEMBLY__
 
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
 
 /*
  * Convert a kernel VA into a HYP VA.
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 6e47fc3ab549..97d45d5151d4 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/uaccess.h>
 
 #include <asm/cpufeature.h>
 #include <asm/insn.h>
@@ -20,8 +21,6 @@
 #include <asm/system_misc.h>
 #include <asm/traps.h>
 #include <asm/kprobes.h>
-#include <linux/uaccess.h>
-#include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace-events-emulation.h"
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a35364e750..3db8ed530e56 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -31,7 +31,6 @@
 #include <linux/percpu.h>
 #include <linux/prctl.h>
 #include <linux/preempt.h>
-#include <linux/prctl.h>
 #include <linux/ptrace.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7ff81fed46e1..f847285d96f3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1046,8 +1046,6 @@ static const struct user_regset_view user_aarch64_view = {
 };
 
 #ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
 enum compat_regset {
 	REGSET_COMPAT_GPR,
 	REGSET_COMPAT_VFP,
-- 
cgit v1.2.3


From e75bef2a4fe259b779765a85589e92657d26fdc9 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 24 Apr 2018 16:25:47 +0100
Subject: arm64: Select ARCH_HAS_FAST_MULTIPLIER

It is probably safe to assume that all Armv8-A implementations have a
multiplier whose efficiency is comparable or better than a sequence of
three or so register-dependent arithmetic instructions. Select
ARCH_HAS_FAST_MULTIPLIER to get ever-so-slightly nicer codegen in the
few dusty old corners which care.

In a contrived benchmark calling hweight64() in a loop, this does indeed
turn out to be a small win overall, with no measurable impact on
Cortex-A57 but about 5% performance improvement on Cortex-A53.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..9c850f3b398f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -12,6 +12,7 @@ config ARM64
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
-- 
cgit v1.2.3


From 1cfc63b5ae60fe7e01773f38132f98d8b13a99a0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 30 Apr 2018 13:56:32 +0100
Subject: arm64: cmpwait: Clear event register before arming exclusive monitor

When waiting for a cacheline to change state in cmpwait, we may immediately
wake-up the first time around the outer loop if the event register was
already set (for example, because of the event stream).

Avoid these spurious wakeups by explicitly clearing the event register
before loading the cacheline and setting the exclusive monitor.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cmpxchg.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 4f5fd2a36e6e..3b0938281541 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -204,7 +204,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr,		\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
-	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"		\
+	"	sevl\n"							\
+	"	wfe\n"							\
+	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"			\
 	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
 	"	cbnz	%" #w "[tmp], 1f\n"				\
 	"	wfe\n"							\
-- 
cgit v1.2.3


From d529a18a61f3f497328f096ddf757af928d6105b Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:57:56 -0500
Subject: drivers: base: cacheinfo: move cache_setup_of_node()

In preparation for the next patch, and to aid in
review of that patch, lets move cache_setup_of_node
further down in the module without any changes.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/base/cacheinfo.c | 80 ++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index edf726267282..09ccef7ddc99 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 }
 
 #ifdef CONFIG_OF
-static int cache_setup_of_node(unsigned int cpu)
-{
-	struct device_node *np;
-	struct cacheinfo *this_leaf;
-	struct device *cpu_dev = get_cpu_device(cpu);
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	unsigned int index = 0;
-
-	/* skip if of_node is already populated */
-	if (this_cpu_ci->info_list->of_node)
-		return 0;
-
-	if (!cpu_dev) {
-		pr_err("No cpu device for CPU %d\n", cpu);
-		return -ENODEV;
-	}
-	np = cpu_dev->of_node;
-	if (!np) {
-		pr_err("Failed to find cpu%d device node\n", cpu);
-		return -ENOENT;
-	}
-
-	while (index < cache_leaves(cpu)) {
-		this_leaf = this_cpu_ci->info_list + index;
-		if (this_leaf->level != 1)
-			np = of_find_next_cache_node(np);
-		else
-			np = of_node_get(np);/* cpu node itself */
-		if (!np)
-			break;
-		this_leaf->of_node = np;
-		index++;
-	}
-
-	if (index != cache_leaves(cpu)) /* not all OF nodes populated */
-		return -ENOENT;
-
-	return 0;
-}
-
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
@@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu)
 		cache_associativity(this_leaf);
 	}
 }
+
+static int cache_setup_of_node(unsigned int cpu)
+{
+	struct device_node *np;
+	struct cacheinfo *this_leaf;
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int index = 0;
+
+	/* skip if of_node is already populated */
+	if (this_cpu_ci->info_list->of_node)
+		return 0;
+
+	if (!cpu_dev) {
+		pr_err("No cpu device for CPU %d\n", cpu);
+		return -ENODEV;
+	}
+	np = cpu_dev->of_node;
+	if (!np) {
+		pr_err("Failed to find cpu%d device node\n", cpu);
+		return -ENOENT;
+	}
+
+	while (index < cache_leaves(cpu)) {
+		this_leaf = this_cpu_ci->info_list + index;
+		if (this_leaf->level != 1)
+			np = of_find_next_cache_node(np);
+		else
+			np = of_node_get(np);/* cpu node itself */
+		if (!np)
+			break;
+		this_leaf->of_node = np;
+		index++;
+	}
+
+	if (index != cache_leaves(cpu)) /* not all OF nodes populated */
+		return -ENOENT;
+
+	return 0;
+}
 #else
 static void cache_of_override_properties(unsigned int cpu) { }
 static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-- 
cgit v1.2.3


From 2ff075c7dfd4705de12d687daede2dd664386b1c Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:57:57 -0500
Subject: drivers: base: cacheinfo: setup DT cache properties early

The original intent in cacheinfo was that an architecture
specific populate_cache_leaves() would probe the hardware
and then cache_shared_cpu_map_setup() and
cache_override_properties() would provide firmware help to
extend/expand upon what was probed. Arm64 was really
the only architecture that was working this way, and
with the removal of most of the hardware probing logic it
became clear that it was possible to simplify the logic a bit.

This patch combines the walk of the DT nodes with the
code updating the cache size/line_size and nr_sets.
cache_override_properties() (which was DT specific) is
then removed. The result is that cacheinfo.of_node is
no longer used as a temporary place to hold DT references
for future calls that update cache properties. That change
helps to clarify its one remaining use (matching
cacheinfo nodes that represent shared caches) which
will be used by the ACPI/PPTT code in the following patches.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/riscv/kernel/cacheinfo.c |  1 -
 drivers/base/cacheinfo.c      | 65 +++++++++++++++++++------------------------
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 10ed2749e246..0bc86e5f8f3f 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 			 struct device_node *node,
 			 enum cache_type type, unsigned int level)
 {
-	this_leaf->of_node = node;
 	this_leaf->level = level;
 	this_leaf->type = type;
 	/* not a sector cache */
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 09ccef7ddc99..a872523e8951 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type)
 	return type;
 }
 
-static void cache_size(struct cacheinfo *this_leaf)
+static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
 {
 	const char *propname;
 	const __be32 *cache_size;
@@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf)
 	ct_idx = get_cacheinfo_idx(this_leaf->type);
 	propname = cache_type_info[ct_idx].size_prop;
 
-	cache_size = of_get_property(this_leaf->of_node, propname, NULL);
+	cache_size = of_get_property(np, propname, NULL);
 	if (cache_size)
 		this_leaf->size = of_read_number(cache_size, 1);
 }
 
 /* not cache_line_size() because that's a macro in include/linux/cache.h */
-static void cache_get_line_size(struct cacheinfo *this_leaf)
+static void cache_get_line_size(struct cacheinfo *this_leaf,
+				struct device_node *np)
 {
 	const __be32 *line_size;
 	int i, lim, ct_idx;
@@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
 		const char *propname;
 
 		propname = cache_type_info[ct_idx].line_size_props[i];
-		line_size = of_get_property(this_leaf->of_node, propname, NULL);
+		line_size = of_get_property(np, propname, NULL);
 		if (line_size)
 			break;
 	}
@@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
 		this_leaf->coherency_line_size = of_read_number(line_size, 1);
 }
 
-static void cache_nr_sets(struct cacheinfo *this_leaf)
+static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
 {
 	const char *propname;
 	const __be32 *nr_sets;
@@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf)
 	ct_idx = get_cacheinfo_idx(this_leaf->type);
 	propname = cache_type_info[ct_idx].nr_sets_prop;
 
-	nr_sets = of_get_property(this_leaf->of_node, propname, NULL);
+	nr_sets = of_get_property(np, propname, NULL);
 	if (nr_sets)
 		this_leaf->number_of_sets = of_read_number(nr_sets, 1);
 }
@@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo *this_leaf)
 		this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
-static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+static bool cache_node_is_unified(struct cacheinfo *this_leaf,
+				  struct device_node *np)
 {
-	return of_property_read_bool(this_leaf->of_node, "cache-unified");
+	return of_property_read_bool(np, "cache-unified");
 }
 
-static void cache_of_override_properties(unsigned int cpu)
+static void cache_of_set_props(struct cacheinfo *this_leaf,
+			       struct device_node *np)
 {
-	int index;
-	struct cacheinfo *this_leaf;
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-
-	for (index = 0; index < cache_leaves(cpu); index++) {
-		this_leaf = this_cpu_ci->info_list + index;
-		/*
-		 * init_cache_level must setup the cache level correctly
-		 * overriding the architecturally specified levels, so
-		 * if type is NONE at this stage, it should be unified
-		 */
-		if (this_leaf->type == CACHE_TYPE_NOCACHE &&
-		    cache_node_is_unified(this_leaf))
-			this_leaf->type = CACHE_TYPE_UNIFIED;
-		cache_size(this_leaf);
-		cache_get_line_size(this_leaf);
-		cache_nr_sets(this_leaf);
-		cache_associativity(this_leaf);
-	}
+	/*
+	 * init_cache_level must setup the cache level correctly
+	 * overriding the architecturally specified levels, so
+	 * if type is NONE at this stage, it should be unified
+	 */
+	if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+	    cache_node_is_unified(this_leaf, np))
+		this_leaf->type = CACHE_TYPE_UNIFIED;
+	cache_size(this_leaf, np);
+	cache_get_line_size(this_leaf, np);
+	cache_nr_sets(this_leaf, np);
+	cache_associativity(this_leaf);
 }
 
 static int cache_setup_of_node(unsigned int cpu)
@@ -193,6 +189,7 @@ static int cache_setup_of_node(unsigned int cpu)
 			np = of_node_get(np);/* cpu node itself */
 		if (!np)
 			break;
+		cache_of_set_props(this_leaf, np);
 		this_leaf->of_node = np;
 		index++;
 	}
@@ -203,7 +200,6 @@ static int cache_setup_of_node(unsigned int cpu)
 	return 0;
 }
 #else
-static void cache_of_override_properties(unsigned int cpu) { }
 static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
@@ -286,12 +282,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
 	}
 }
 
-static void cache_override_properties(unsigned int cpu)
-{
-	if (of_have_populated_dt())
-		return cache_of_override_properties(cpu);
-}
-
 static void free_cache_attributes(unsigned int cpu)
 {
 	if (!per_cpu_cacheinfo(cpu))
@@ -325,6 +315,10 @@ static int detect_cache_attributes(unsigned int cpu)
 	if (per_cpu_cacheinfo(cpu) == NULL)
 		return -ENOMEM;
 
+	/*
+	 * populate_cache_leaves() may completely setup the cache leaves and
+	 * shared_cpu_map or it may leave it partially setup.
+	 */
 	ret = populate_cache_leaves(cpu);
 	if (ret)
 		goto free_ci;
@@ -338,7 +332,6 @@ static int detect_cache_attributes(unsigned int cpu)
 		goto free_ci;
 	}
 
-	cache_override_properties(cpu);
 	return 0;
 
 free_ci:
-- 
cgit v1.2.3


From 9b97387c5c4260ffcdf3b913bdef0d98cb2d4a74 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:57:58 -0500
Subject: cacheinfo: rename of_node to fw_token

Rename and change the type of of_node to indicate
it is a generic pointer which is generally only used
for comparison purposes. In a later patch we will put
an ACPI/PPTT token pointer in fw_token so that
the code which builds the shared cpu masks can be reused.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/base/cacheinfo.c  | 16 +++++++++-------
 include/linux/cacheinfo.h |  8 +++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index a872523e8951..597aacb233fc 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -35,7 +35,7 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
-	return sib_leaf->of_node == this_leaf->of_node;
+	return sib_leaf->fw_token == this_leaf->fw_token;
 }
 
 /* OF properties to query for a given cache type */
@@ -167,9 +167,10 @@ static int cache_setup_of_node(unsigned int cpu)
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	unsigned int index = 0;
 
-	/* skip if of_node is already populated */
-	if (this_cpu_ci->info_list->of_node)
+	/* skip if fw_token is already populated */
+	if (this_cpu_ci->info_list->fw_token) {
 		return 0;
+	}
 
 	if (!cpu_dev) {
 		pr_err("No cpu device for CPU %d\n", cpu);
@@ -190,7 +191,7 @@ static int cache_setup_of_node(unsigned int cpu)
 		if (!np)
 			break;
 		cache_of_set_props(this_leaf, np);
-		this_leaf->of_node = np;
+		this_leaf->fw_token = np;
 		index++;
 	}
 
@@ -278,7 +279,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
 			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
 			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
 		}
-		of_node_put(this_leaf->of_node);
+		of_node_put(this_leaf->fw_token);
 	}
 }
 
@@ -323,8 +324,9 @@ static int detect_cache_attributes(unsigned int cpu)
 	if (ret)
 		goto free_ci;
 	/*
-	 * For systems using DT for cache hierarchy, of_node and shared_cpu_map
-	 * will be set up here only if they are not populated already
+	 * For systems using DT for cache hierarchy, fw_token
+	 * and shared_cpu_map will be set up here only if they are
+	 * not populated already
 	 */
 	ret = cache_shared_cpu_map_setup(cpu);
 	if (ret) {
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 3d9805297cda..0c6f658054d2 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -34,9 +34,8 @@ enum cache_type {
  * @shared_cpu_map: logical cpumask representing all the cpus sharing
  *	this cache node
  * @attributes: bitfield representing various cache attributes
- * @of_node: if devicetree is used, this represents either the cpu node in
- *	case there's no explicit cache node or the cache node itself in the
- *	device tree
+ * @fw_token: Unique value used to determine if different cacheinfo
+ *	structures represent a single hardware cache instance.
  * @disable_sysfs: indicates whether this node is visible to the user via
  *	sysfs or not
  * @priv: pointer to any private data structure specific to particular
@@ -65,8 +64,7 @@ struct cacheinfo {
 #define CACHE_ALLOCATE_POLICY_MASK	\
 	(CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE)
 #define CACHE_ID		BIT(4)
-
-	struct device_node *of_node;
+	void *fw_token;
 	bool disable_sysfs;
 	void *priv;
 };
-- 
cgit v1.2.3


From 30d87bfacbee396646975a00959764a7c49510ec Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:57:59 -0500
Subject: arm64/acpi: Create arch specific cpu to acpi id helper

Its helpful to be able to lookup the acpi_processor_id associated
with a logical cpu. Provide an arm64 helper to do this.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/acpi.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 32f465a80e4e..0db62a4cbce2 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void)
 }
 
 struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+{
+	return	acpi_cpu_get_madt_gicc(cpu)->uid;
+}
 
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
 void __init acpi_init_cpus(void);
-- 
cgit v1.2.3


From 2bd00bcd73e5edd5769e2a5f24c59a517582d862 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:58:00 -0500
Subject: ACPI/PPTT: Add Processor Properties Topology Table parsing

ACPI 6.2 adds a new table, which describes how processing units
are related to each other in tree like fashion. Caches are
also sprinkled throughout the tree and describe the properties
of the caches in relation to other caches and processing units.

Add the code to parse the cache hierarchy and report the total
number of levels of cache for a given core using
acpi_find_last_cache_level() as well as fill out the individual
cores cache information with cache_setup_acpi() once the
cpu_cacheinfo structure has been populated by the arch specific
code.

An additional patch later in the set adds the ability to report
peers in the topology using find_acpi_cpu_topology()
to report a unique ID for each processing unit at a given level
in the tree. These unique id's can then be used to match related
processing units which exist as threads, within a given
package, etc.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/acpi/pptt.c  | 655 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |   4 +
 2 files changed, 659 insertions(+)
 create mode 100644 drivers/acpi/pptt.c

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
new file mode 100644
index 000000000000..e5ea1974d1e3
--- /dev/null
+++ b/drivers/acpi/pptt.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pptt.c - parsing of Processor Properties Topology Table (PPTT)
+ *
+ * Copyright (C) 2018, ARM
+ *
+ * This file implements parsing of the Processor Properties Topology Table
+ * which is optionally used to describe the processor and cache topology.
+ * Due to the relative pointers used throughout the table, this doesn't
+ * leverage the existing subtable parsing in the kernel.
+ *
+ * The PPTT structure is an inverted tree, with each node potentially
+ * holding one or two inverted tree data structures describing
+ * the caches available at that level. Each cache structure optionally
+ * contains properties describing the cache at a given level which can be
+ * used to override hardware probed values.
+ */
+#define pr_fmt(fmt) "ACPI PPTT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <acpi/processor.h>
+
+static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
+							u32 pptt_ref)
+{
+	struct acpi_subtable_header *entry;
+
+	/* there isn't a subtable at reference 0 */
+	if (pptt_ref < sizeof(struct acpi_subtable_header))
+		return NULL;
+
+	if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
+		return NULL;
+
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
+
+	if (entry->length == 0)
+		return NULL;
+
+	if (pptt_ref + entry->length > table_hdr->length)
+		return NULL;
+
+	return entry;
+}
+
+static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header *table_hdr,
+						   u32 pptt_ref)
+{
+	return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref);
+}
+
+static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_hdr,
+						u32 pptt_ref)
+{
+	return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref);
+}
+
+static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr,
+							   struct acpi_pptt_processor *node,
+							   int resource)
+{
+	u32 *ref;
+
+	if (resource >= node->number_of_priv_resources)
+		return NULL;
+
+	ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
+	ref += resource;
+
+	return fetch_pptt_subtable(table_hdr, *ref);
+}
+
+static inline bool acpi_pptt_match_type(int table_type, int type)
+{
+	return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type ||
+		table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
+}
+
+/**
+ * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @local_level: passed res reflects this cache level
+ * @res: cache resource in the PPTT we want to walk
+ * @found: returns a pointer to the requested level if found
+ * @level: the requested cache level
+ * @type: the requested cache type
+ *
+ * Attempt to find a given cache level, while counting the max number
+ * of cache levels for the cache node.
+ *
+ * Given a pptt resource, verify that it is a cache node, then walk
+ * down each level of caches, counting how many levels are found
+ * as well as checking the cache type (icache, dcache, unified). If a
+ * level & type match, then we set found, and continue the search.
+ * Once the entire cache branch has been walked return its max
+ * depth.
+ *
+ * Return: The cache structure and the level we terminated with.
+ */
+static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
+				int local_level,
+				struct acpi_subtable_header *res,
+				struct acpi_pptt_cache **found,
+				int level, int type)
+{
+	struct acpi_pptt_cache *cache;
+
+	if (res->type != ACPI_PPTT_TYPE_CACHE)
+		return 0;
+
+	cache = (struct acpi_pptt_cache *) res;
+	while (cache) {
+		local_level++;
+
+		if (local_level == level &&
+		    cache->flags & ACPI_PPTT_CACHE_TYPE_VALID &&
+		    acpi_pptt_match_type(cache->attributes, type)) {
+			if (*found != NULL && cache != *found)
+				pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
+
+			pr_debug("Found cache @ level %d\n", level);
+			*found = cache;
+			/*
+			 * continue looking at this node's resource list
+			 * to verify that we don't find a duplicate
+			 * cache node.
+			 */
+		}
+		cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
+	}
+	return local_level;
+}
+
+static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr,
+						     struct acpi_pptt_processor *cpu_node,
+						     int *starting_level, int level,
+						     int type)
+{
+	struct acpi_subtable_header *res;
+	int number_of_levels = *starting_level;
+	int resource = 0;
+	struct acpi_pptt_cache *ret = NULL;
+	int local_level;
+
+	/* walk down from processor node */
+	while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
+		resource++;
+
+		local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
+						   res, &ret, level, type);
+		/*
+		 * we are looking for the max depth. Since its potentially
+		 * possible for a given node to have resources with differing
+		 * depths verify that the depth we have found is the largest.
+		 */
+		if (number_of_levels < local_level)
+			number_of_levels = local_level;
+	}
+	if (number_of_levels > *starting_level)
+		*starting_level = number_of_levels;
+
+	return ret;
+}
+
+/**
+ * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @cpu_node: processor node we wish to count caches for
+ *
+ * Given a processor node containing a processing unit, walk into it and count
+ * how many levels exist solely for it, and then walk up each level until we hit
+ * the root node (ignore the package level because it may be possible to have
+ * caches that exist across packages). Count the number of cache levels that
+ * exist at each level on the way up.
+ *
+ * Return: Total number of levels found.
+ */
+static int acpi_count_levels(struct acpi_table_header *table_hdr,
+			     struct acpi_pptt_processor *cpu_node)
+{
+	int total_levels = 0;
+
+	do {
+		acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
+		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+	} while (cpu_node);
+
+	return total_levels;
+}
+
+/**
+ * acpi_pptt_leaf_node() - Given a processor node, determine if its a leaf
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @node: passed node is checked to see if its a leaf
+ *
+ * Determine if the *node parameter is a leaf node by iterating the
+ * PPTT table, looking for nodes which reference it.
+ *
+ * Return: 0 if we find a node referencing the passed node (or table error),
+ * or 1 if we don't.
+ */
+static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
+			       struct acpi_pptt_processor *node)
+{
+	struct acpi_subtable_header *entry;
+	unsigned long table_end;
+	u32 node_entry;
+	struct acpi_pptt_processor *cpu_node;
+	u32 proc_sz;
+
+	table_end = (unsigned long)table_hdr + table_hdr->length;
+	node_entry = ACPI_PTR_DIFF(node, table_hdr);
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+			     sizeof(struct acpi_table_pptt));
+	proc_sz = sizeof(struct acpi_pptt_processor *);
+
+	while ((unsigned long)entry + proc_sz < table_end) {
+		cpu_node = (struct acpi_pptt_processor *)entry;
+		if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
+		    cpu_node->parent == node_entry)
+			return 0;
+		if (entry->length == 0)
+			return 0;
+		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+				     entry->length);
+
+	}
+	return 1;
+}
+
+/**
+ * acpi_find_processor_node() - Given a PPTT table find the requested processor
+ * @table_hdr:  Pointer to the head of the PPTT table
+ * @acpi_cpu_id: cpu we are searching for
+ *
+ * Find the subtable entry describing the provided processor.
+ * This is done by iterating the PPTT table looking for processor nodes
+ * which have an acpi_processor_id that matches the acpi_cpu_id parameter
+ * passed into the function. If we find a node that matches this criteria
+ * we verify that its a leaf node in the topology rather than depending
+ * on the valid flag, which doesn't need to be set for leaf nodes.
+ *
+ * Return: NULL, or the processors acpi_pptt_processor*
+ */
+static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_header *table_hdr,
+							    u32 acpi_cpu_id)
+{
+	struct acpi_subtable_header *entry;
+	unsigned long table_end;
+	struct acpi_pptt_processor *cpu_node;
+	u32 proc_sz;
+
+	table_end = (unsigned long)table_hdr + table_hdr->length;
+	entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+			     sizeof(struct acpi_table_pptt));
+	proc_sz = sizeof(struct acpi_pptt_processor *);
+
+	/* find the processor structure associated with this cpuid */
+	while ((unsigned long)entry + proc_sz < table_end) {
+		cpu_node = (struct acpi_pptt_processor *)entry;
+
+		if (entry->length == 0) {
+			pr_warn("Invalid zero length subtable\n");
+			break;
+		}
+		if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
+		    acpi_cpu_id == cpu_node->acpi_processor_id &&
+		     acpi_pptt_leaf_node(table_hdr, cpu_node)) {
+			return (struct acpi_pptt_processor *)entry;
+		}
+
+		entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+				     entry->length);
+	}
+
+	return NULL;
+}
+
+static int acpi_find_cache_levels(struct acpi_table_header *table_hdr,
+				  u32 acpi_cpu_id)
+{
+	int number_of_levels = 0;
+	struct acpi_pptt_processor *cpu;
+
+	cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+	if (cpu)
+		number_of_levels = acpi_count_levels(table_hdr, cpu);
+
+	return number_of_levels;
+}
+
+static u8 acpi_cache_type(enum cache_type type)
+{
+	switch (type) {
+	case CACHE_TYPE_DATA:
+		pr_debug("Looking for data cache\n");
+		return ACPI_PPTT_CACHE_TYPE_DATA;
+	case CACHE_TYPE_INST:
+		pr_debug("Looking for instruction cache\n");
+		return ACPI_PPTT_CACHE_TYPE_INSTR;
+	default:
+	case CACHE_TYPE_UNIFIED:
+		pr_debug("Looking for unified cache\n");
+		/*
+		 * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED
+		 * contains the bit pattern that will match both
+		 * ACPI unified bit patterns because we use it later
+		 * to match both cases.
+		 */
+		return ACPI_PPTT_CACHE_TYPE_UNIFIED;
+	}
+}
+
+static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr,
+						    u32 acpi_cpu_id,
+						    enum cache_type type,
+						    unsigned int level,
+						    struct acpi_pptt_processor **node)
+{
+	int total_levels = 0;
+	struct acpi_pptt_cache *found = NULL;
+	struct acpi_pptt_processor *cpu_node;
+	u8 acpi_type = acpi_cache_type(type);
+
+	pr_debug("Looking for CPU %d's level %d cache type %d\n",
+		 acpi_cpu_id, level, acpi_type);
+
+	cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+
+	while (cpu_node && !found) {
+		found = acpi_find_cache_level(table_hdr, cpu_node,
+					      &total_levels, level, acpi_type);
+		*node = cpu_node;
+		cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+	}
+
+	return found;
+}
+
+/* total number of attributes checked by the properties code */
+#define PPTT_CHECKED_ATTRIBUTES 4
+
+/**
+ * update_cache_properties() - Update cacheinfo for the given processor
+ * @this_leaf: Kernel cache info structure being updated
+ * @found_cache: The PPTT node describing this cache instance
+ * @cpu_node: A unique reference to describe this cache instance
+ *
+ * The ACPI spec implies that the fields in the cache structures are used to
+ * extend and correct the information probed from the hardware. Lets only
+ * set fields that we determine are VALID.
+ *
+ * Return: nothing. Side effect of updating the global cacheinfo
+ */
+static void update_cache_properties(struct cacheinfo *this_leaf,
+				    struct acpi_pptt_cache *found_cache,
+				    struct acpi_pptt_processor *cpu_node)
+{
+	int valid_flags = 0;
+
+	this_leaf->fw_token = cpu_node;
+	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) {
+		this_leaf->size = found_cache->size;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) {
+		this_leaf->coherency_line_size = found_cache->line_size;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) {
+		this_leaf->number_of_sets = found_cache->number_of_sets;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) {
+		this_leaf->ways_of_associativity = found_cache->associativity;
+		valid_flags++;
+	}
+	if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) {
+		switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
+		case ACPI_PPTT_CACHE_POLICY_WT:
+			this_leaf->attributes = CACHE_WRITE_THROUGH;
+			break;
+		case ACPI_PPTT_CACHE_POLICY_WB:
+			this_leaf->attributes = CACHE_WRITE_BACK;
+			break;
+		}
+	}
+	if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) {
+		switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
+		case ACPI_PPTT_CACHE_READ_ALLOCATE:
+			this_leaf->attributes |= CACHE_READ_ALLOCATE;
+			break;
+		case ACPI_PPTT_CACHE_WRITE_ALLOCATE:
+			this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
+			break;
+		case ACPI_PPTT_CACHE_RW_ALLOCATE:
+		case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT:
+			this_leaf->attributes |=
+				CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE;
+			break;
+		}
+	}
+	/*
+	 * If the above flags are valid, and the cache type is NOCACHE
+	 * update the cache type as well.
+	 */
+	if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+	    valid_flags == PPTT_CHECKED_ATTRIBUTES)
+		this_leaf->type = CACHE_TYPE_UNIFIED;
+}
+
+static void cache_setup_acpi_cpu(struct acpi_table_header *table,
+				 unsigned int cpu)
+{
+	struct acpi_pptt_cache *found_cache;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	struct cacheinfo *this_leaf;
+	unsigned int index = 0;
+	struct acpi_pptt_processor *cpu_node = NULL;
+
+	while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
+		this_leaf = this_cpu_ci->info_list + index;
+		found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+						   this_leaf->type,
+						   this_leaf->level,
+						   &cpu_node);
+		pr_debug("found = %p %p\n", found_cache, cpu_node);
+		if (found_cache)
+			update_cache_properties(this_leaf,
+						found_cache,
+						cpu_node);
+
+		index++;
+	}
+}
+
+/* Passing level values greater than this will result in search termination */
+#define PPTT_ABORT_PACKAGE 0xFF
+
+static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
+								  struct acpi_pptt_processor *cpu,
+								  int level, int flag)
+{
+	struct acpi_pptt_processor *prev_node;
+
+	while (cpu && level) {
+		if (cpu->flags & flag)
+			break;
+		pr_debug("level %d\n", level);
+		prev_node = fetch_pptt_node(table_hdr, cpu->parent);
+		if (prev_node == NULL)
+			break;
+		cpu = prev_node;
+		level--;
+	}
+	return cpu;
+}
+
+/**
+ * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature
+ * @table: Pointer to the head of the PPTT table
+ * @cpu: Kernel logical cpu number
+ * @level: A level that terminates the search
+ * @flag: A flag which terminates the search
+ *
+ * Get a unique value given a cpu, and a topology level, that can be
+ * matched to determine which cpus share common topological features
+ * at that level.
+ *
+ * Return: Unique value, or -ENOENT if unable to locate cpu
+ */
+static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
+				     unsigned int cpu, int level, int flag)
+{
+	struct acpi_pptt_processor *cpu_node;
+	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+
+	cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
+	if (cpu_node) {
+		cpu_node = acpi_find_processor_package_id(table, cpu_node,
+							  level, flag);
+		/* Only the first level has a guaranteed id */
+		if (level == 0)
+			return cpu_node->acpi_processor_id;
+		return ACPI_PTR_DIFF(cpu_node, table);
+	}
+	pr_warn_once("PPTT table found, but unable to locate core %d (%d)\n",
+		    cpu, acpi_cpu_id);
+	return -ENOENT;
+}
+
+static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	int retval;
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_warn_once("No PPTT table found, cpu topology may be inaccurate\n");
+		return -ENOENT;
+	}
+	retval = topology_get_acpi_cpu_tag(table, cpu, level, flag);
+	pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n",
+		 cpu, level, retval);
+	acpi_put_table(table);
+
+	return retval;
+}
+
+/**
+ * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
+ * @cpu: Kernel logical cpu number
+ *
+ * Given a logical cpu number, returns the number of levels of cache represented
+ * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
+ * indicating we didn't find any cache levels.
+ *
+ * Return: Cache levels visible to this core.
+ */
+int acpi_find_last_cache_level(unsigned int cpu)
+{
+	u32 acpi_cpu_id;
+	struct acpi_table_header *table;
+	int number_of_levels = 0;
+	acpi_status status;
+
+	pr_debug("Cache Setup find last level cpu=%d\n", cpu);
+
+	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_warn_once("No PPTT table found, cache topology may be inaccurate\n");
+	} else {
+		number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
+		acpi_put_table(table);
+	}
+	pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
+
+	return number_of_levels;
+}
+
+/**
+ * cache_setup_acpi() - Override CPU cache topology with data from the PPTT
+ * @cpu: Kernel logical cpu number
+ *
+ * Updates the global cache info provided by cpu_get_cacheinfo()
+ * when there are valid properties in the acpi_pptt_cache nodes. A
+ * successful parse may not result in any updates if none of the
+ * cache levels have any valid flags set.  Futher, a unique value is
+ * associated with each known CPU cache entry. This unique value
+ * can be used to determine whether caches are shared between cpus.
+ *
+ * Return: -ENOENT on failure to find table, or 0 on success
+ */
+int cache_setup_acpi(unsigned int cpu)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+
+	pr_debug("Cache Setup ACPI cpu %d\n", cpu);
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_warn_once("No PPTT table found, cache topology may be inaccurate\n");
+		return -ENOENT;
+	}
+
+	cache_setup_acpi_cpu(table, cpu);
+	acpi_put_table(table);
+
+	return status;
+}
+
+/**
+ * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu
+ * @cpu: Kernel logical cpu number
+ * @level: The topological level for which we would like a unique ID
+ *
+ * Determine a topology unique ID for each thread/core/cluster/mc_grouping
+ * /socket/etc. This ID can then be used to group peers, which will have
+ * matching ids.
+ *
+ * The search terminates when either the requested level is found or
+ * we reach a root node. Levels beyond the termination point will return the
+ * same unique ID. The unique id for level 0 is the acpi processor id. All
+ * other levels beyond this use a generated value to uniquely identify
+ * a topological feature.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents a unique topological feature.
+ */
+int find_acpi_cpu_topology(unsigned int cpu, int level)
+{
+	return find_acpi_cpu_topology_tag(cpu, level, 0);
+}
+
+/**
+ * find_acpi_cpu_cache_topology() - Determine a unique cache topology value
+ * @cpu: Kernel logical cpu number
+ * @level: The cache level for which we would like a unique ID
+ *
+ * Determine a unique ID for each unified cache in the system
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents a unique topological feature.
+ */
+int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
+{
+	struct acpi_table_header *table;
+	struct acpi_pptt_cache *found_cache;
+	acpi_status status;
+	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	struct acpi_pptt_processor *cpu_node = NULL;
+	int ret = -1;
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		pr_warn_once("No PPTT table found, topology may be inaccurate\n");
+		return -ENOENT;
+	}
+
+	found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+					   CACHE_TYPE_UNIFIED,
+					   level,
+					   &cpu_node);
+	if (found_cache)
+		ret = ACPI_PTR_DIFF(cpu_node, table);
+
+	acpi_put_table(table);
+
+	return ret;
+}
+
+
+/**
+ * find_acpi_cpu_topology_package() - Determine a unique cpu package value
+ * @cpu: Kernel logical cpu number
+ *
+ * Determine a topology unique package ID for the given cpu.
+ * This ID can then be used to group peers, which will have matching ids.
+ *
+ * The search terminates when either a level is found with the PHYSICAL_PACKAGE
+ * flag set or we reach a root node.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents the package for this cpu.
+ */
+int find_acpi_cpu_topology_package(unsigned int cpu)
+{
+	return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
+					  ACPI_PPTT_PHYSICAL_PACKAGE);
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 15bfb15c2fa5..032e12a2fdc2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1297,4 +1297,8 @@ static inline int lpit_read_residency_count_address(u64 *address)
 }
 #endif
 
+int find_acpi_cpu_topology(unsigned int cpu, int level);
+int find_acpi_cpu_topology_package(unsigned int cpu);
+int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 0ce82232232a2f76128e9bfcc6e8b662e110a671 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:58:01 -0500
Subject: ACPI: Enable PPTT support on ARM64

Now that we have a PPTT parser, in preparation for its use
on arm64, lets build it.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig    | 1 +
 drivers/acpi/Kconfig  | 3 +++
 drivers/acpi/Makefile | 1 +
 3 files changed, 5 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9c850f3b398f..4d98774cf3c7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,6 +7,7 @@ config ARM64
 	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
 	select ACPI_MCFG if ACPI
 	select ACPI_SPCR_TABLE if ACPI
+	select ACPI_PPTT if ACPI
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 516d7b36d6fb..b533eeb6139d 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -547,6 +547,9 @@ config ACPI_CONFIGFS
 
 if ARM64
 source "drivers/acpi/arm64/Kconfig"
+
+config ACPI_PPTT
+	bool
 endif
 
 config TPS68470_PMIC_OPREGION
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 48e202752754..6d59aa109a91 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_ACPI_BGRT)		+= bgrt.o
 obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc_acpi.o
 obj-$(CONFIG_ACPI_SPCR_TABLE)	+= spcr.o
 obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o
+obj-$(CONFIG_ACPI_PPTT) 	+= pptt.o
 
 # processor has its own "processor." module_param namespace
 processor-y			:= processor_driver.o
-- 
cgit v1.2.3


From 582b468bdc6d9c287a432a63225cf7922e985e15 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:58:02 -0500
Subject: drivers: base cacheinfo: Add support for ACPI based firmware tables

Call ACPI cache parsing routines from base cacheinfo code if ACPI
is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level
so that individual architectures can enable ACPI topology parsing.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/base/cacheinfo.c  | 14 ++++++++++----
 include/linux/cacheinfo.h | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 597aacb233fc..2880e2ab01f5 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
 	/*
-	 * For non-DT systems, assume unique level 1 cache, system-wide
+	 * For non-DT/ACPI systems, assume unique level 1 caches, system-wide
 	 * shared caches for all other levels. This will be used only if
 	 * arch specific code has not populated shared_cpu_map
 	 */
@@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 }
 #endif
 
+int __weak cache_setup_acpi(unsigned int cpu)
+{
+	return -ENOTSUPP;
+}
+
 static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
 	if (of_have_populated_dt())
 		ret = cache_setup_of_node(cpu);
 	else if (!acpi_disabled)
-		/* No cache property/hierarchy support yet in ACPI */
-		ret = -ENOTSUPP;
+		ret = cache_setup_acpi(cpu);
+
 	if (ret)
 		return ret;
 
@@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
 			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
 			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
 		}
-		of_node_put(this_leaf->fw_token);
+		if (of_have_populated_dt())
+			of_node_put(this_leaf->fw_token);
 	}
 }
 
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 0c6f658054d2..89397e30e269 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -97,6 +97,23 @@ int func(unsigned int cpu)					\
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
 int init_cache_level(unsigned int cpu);
 int populate_cache_leaves(unsigned int cpu);
+int cache_setup_acpi(unsigned int cpu);
+#ifndef CONFIG_ACPI
+/*
+ * acpi_find_last_cache_level is only called on ACPI enabled
+ * platforms using the PPTT for topology. This means that if
+ * the platform supports other firmware configuration methods
+ * we need to stub out the call when ACPI is disabled.
+ * ACPI enabled platforms not using PPTT won't be making calls
+ * to this function so we need not worry about them.
+ */
+static inline int acpi_find_last_cache_level(unsigned int cpu)
+{
+	return 0;
+}
+#else
+int acpi_find_last_cache_level(unsigned int cpu);
+#endif
 
 const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
 
-- 
cgit v1.2.3


From 8571890e1513bc6768495b6541fb8064e046a61c Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:58:03 -0500
Subject: arm64: Add support for ACPI based firmware tables

The /sys cache entries should support ACPI/PPTT generated cache
topology information.  For arm64, if ACPI is enabled, determine
the max number of cache levels and populate them using the PPTT
table if one is available.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cacheinfo.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 380f2e2fbed5..0bf0a835122f 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/cacheinfo.h>
 #include <linux/of.h>
 
@@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 
 static int __init_cache_level(unsigned int cpu)
 {
-	unsigned int ctype, level, leaves, of_level;
+	unsigned int ctype, level, leaves, fw_level;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 	for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu)
 		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
 	}
 
-	of_level = of_find_last_cache_level(cpu);
-	if (level < of_level) {
+	if (acpi_disabled)
+		fw_level = of_find_last_cache_level(cpu);
+	else
+		fw_level = acpi_find_last_cache_level(cpu);
+
+	if (level < fw_level) {
 		/*
 		 * some external caches not specified in CLIDR_EL1
 		 * the information may be available in the device tree
 		 * only unified external caches are considered here
 		 */
-		leaves += (of_level - level);
-		level = of_level;
+		leaves += (fw_level - level);
+		level = fw_level;
 	}
 
 	this_cpu_ci->num_levels = level;
-- 
cgit v1.2.3


From 868abc07680c2c8b7f85ae883f9f1b90bf4ef4bf Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Fri, 11 May 2018 18:58:04 -0500
Subject: arm64: topology: rename cluster_id

The cluster concept isn't architecturally defined for arm64.
Lets match the name of the arm64 topology field to the kernel macro
that uses it.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Vijaya Kumar K <vkilari@codeaurora.org>
Tested-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Tested-by: Tomasz Nowicki <Tomasz.Nowicki@cavium.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/topology.h |  4 ++--
 arch/arm64/kernel/topology.c      | 26 +++++++++++++-------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index c4f2d50491eb..6b10459e6905 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -7,14 +7,14 @@
 struct cpu_topology {
 	int thread_id;
 	int core_id;
-	int cluster_id;
+	int package_id;
 	cpumask_t thread_sibling;
 	cpumask_t core_sibling;
 };
 
 extern struct cpu_topology cpu_topology[NR_CPUS];
 
-#define topology_physical_package_id(cpu)	(cpu_topology[cpu].cluster_id)
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
 #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
 #define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 21868530018e..dc18b1e53194 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -47,7 +47,7 @@ static int __init get_cpu_for_node(struct device_node *node)
 	return cpu;
 }
 
-static int __init parse_core(struct device_node *core, int cluster_id,
+static int __init parse_core(struct device_node *core, int package_id,
 			     int core_id)
 {
 	char name[10];
@@ -63,7 +63,7 @@ static int __init parse_core(struct device_node *core, int cluster_id,
 			leaf = false;
 			cpu = get_cpu_for_node(t);
 			if (cpu >= 0) {
-				cpu_topology[cpu].cluster_id = cluster_id;
+				cpu_topology[cpu].package_id = package_id;
 				cpu_topology[cpu].core_id = core_id;
 				cpu_topology[cpu].thread_id = i;
 			} else {
@@ -85,7 +85,7 @@ static int __init parse_core(struct device_node *core, int cluster_id,
 			return -EINVAL;
 		}
 
-		cpu_topology[cpu].cluster_id = cluster_id;
+		cpu_topology[cpu].package_id = package_id;
 		cpu_topology[cpu].core_id = core_id;
 	} else if (leaf) {
 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -101,7 +101,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	bool leaf = true;
 	bool has_cores = false;
 	struct device_node *c;
-	static int cluster_id __initdata;
+	static int package_id __initdata;
 	int core_id = 0;
 	int i, ret;
 
@@ -140,7 +140,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 			}
 
 			if (leaf) {
-				ret = parse_core(c, cluster_id, core_id++);
+				ret = parse_core(c, package_id, core_id++);
 			} else {
 				pr_err("%pOF: Non-leaf cluster with core %s\n",
 				       cluster, name);
@@ -158,7 +158,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 		pr_warn("%pOF: empty cluster\n", cluster);
 
 	if (leaf)
-		cluster_id++;
+		package_id++;
 
 	return 0;
 }
@@ -194,7 +194,7 @@ static int __init parse_dt_topology(void)
 	 * only mark cores described in the DT as possible.
 	 */
 	for_each_possible_cpu(cpu)
-		if (cpu_topology[cpu].cluster_id == -1)
+		if (cpu_topology[cpu].package_id == -1)
 			ret = -EINVAL;
 
 out_map:
@@ -224,7 +224,7 @@ static void update_siblings_masks(unsigned int cpuid)
 	for_each_possible_cpu(cpu) {
 		cpu_topo = &cpu_topology[cpu];
 
-		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+		if (cpuid_topo->package_id != cpu_topo->package_id)
 			continue;
 
 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
@@ -245,7 +245,7 @@ void store_cpu_topology(unsigned int cpuid)
 	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
 	u64 mpidr;
 
-	if (cpuid_topo->cluster_id != -1)
+	if (cpuid_topo->package_id != -1)
 		goto topology_populated;
 
 	mpidr = read_cpuid_mpidr();
@@ -259,19 +259,19 @@ void store_cpu_topology(unsigned int cpuid)
 		/* Multiprocessor system : Multi-threads per core */
 		cpuid_topo->thread_id  = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
+		cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
 					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
 	} else {
 		/* Multiprocessor system : Single-thread per core */
 		cpuid_topo->thread_id  = -1;
 		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
+		cpuid_topo->package_id = MPIDR_AFFIN