diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 14:11:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 14:11:08 -0700 |
commit | 145ff1ec090dce9beb5a9590b5dc288e7bb2e65d (patch) | |
tree | 3e10a7c59553e56c1ea5f0aa71a2c3c9d6b7982b /arch | |
parent | 8c4e1c027ae63c67c523d695e4e8565ff78af1ba (diff) | |
parent | 0e4cd9f2654915be8d09a1bd1b405ce5426e64c4 (diff) | |
download | linux-145ff1ec090dce9beb5a9590b5dc288e7bb2e65d.tar.gz linux-145ff1ec090dce9beb5a9590b5dc288e7bb2e65d.tar.bz2 linux-145ff1ec090dce9beb5a9590b5dc288e7bb2e65d.zip |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 and cross-arch updates from Catalin Marinas:
"Here's a slightly wider-spread set of updates for 5.9.
Going outside the usual arch/arm64/ area is the removal of
read_barrier_depends() series from Will and the MSI/IOMMU ID
translation series from Lorenzo.
The notable arm64 updates include ARMv8.4 TLBI range operations and
translation level hint, time namespace support, and perf.
Summary:
- Removal of the tremendously unpopular read_barrier_depends()
barrier, which is a NOP on all architectures apart from Alpha, in
favour of allowing architectures to override READ_ONCE() and do
whatever dance they need to do to ensure address dependencies
provide LOAD -> LOAD/STORE ordering.
This work also offers a potential solution if compilers are shown
to convert LOAD -> LOAD address dependencies into control
dependencies (e.g. under LTO), as weakly ordered architectures will
effectively be able to upgrade READ_ONCE() to smp_load_acquire().
The latter case is not used yet, but will be discussed further at
LPC.
- Make the MSI/IOMMU input/output ID translation PCI agnostic,
augment the MSI/IOMMU ACPI/OF ID mapping APIs to accept an input ID
bus-specific parameter and apply the resulting changes to the
device ID space provided by the Freescale FSL bus.
- arm64 support for TLBI range operations and translation table level
hints (part of the ARMv8.4 architecture version).
- Time namespace support for arm64.
- Export the virtual and physical address sizes in vmcoreinfo for
makedumpfile and crash utilities.
- CPU feature handling cleanups and checks for programmer errors
(overlapping bit-fields).
- ACPI updates for arm64: disallow AML accesses to EFI code regions
and kernel memory.
- perf updates for arm64.
- Miscellaneous fixes and cleanups, most notably PLT counting
optimisation for module loading, recordmcount fix to ignore
relocations other than R_AARCH64_CALL26, CMA areas reserved for
gigantic pages on 16K and 64K configurations.
- Trivial typos, duplicate words"
Link: http://lkml.kernel.org/r/20200710165203.31284-1-will@kernel.org
Link: http://lkml.kernel.org/r/20200619082013.13661-1-lorenzo.pieralisi@arm.com
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (82 commits)
arm64: use IRQ_STACK_SIZE instead of THREAD_SIZE for irq stack
arm64/mm: save memory access in check_and_switch_context() fast switch path
arm64: sigcontext.h: delete duplicated word
arm64: ptrace.h: delete duplicated word
arm64: pgtable-hwdef.h: delete duplicated words
bus: fsl-mc: Add ACPI support for fsl-mc
bus/fsl-mc: Refactor the MSI domain creation in the DPRC driver
of/irq: Make of_msi_map_rid() PCI bus agnostic
of/irq: make of_msi_map_get_device_domain() bus agnostic
dt-bindings: arm: fsl: Add msi-map device-tree binding for fsl-mc bus
of/device: Add input id to of_dma_configure()
of/iommu: Make of_map_rid() PCI agnostic
ACPI/IORT: Add an input ID to acpi_dma_configure()
ACPI/IORT: Remove useless PCI bus walk
ACPI/IORT: Make iort_msi_map_rid() PCI agnostic
ACPI/IORT: Make iort_get_device_domain IRQ domain agnostic
ACPI/IORT: Make iort_match_node_callback walk the ACPI namespace for NC
arm64: enable time namespace support
arm64/vdso: Restrict splitting VVAR VMA
arm64/vdso: Handle faults on timens page
...
Diffstat (limited to 'arch')
50 files changed, 1016 insertions, 324 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 2144530d1428..2f8f7e54792f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -16,10 +16,10 @@ /* * To ensure dependency ordering is preserved for the _relaxed and - * _release atomics, an smp_read_barrier_depends() is unconditionally - * inserted into the _relaxed variants, which are used to build the - * barriered versions. Avoid redundant back-to-back fences in the - * _acquire and _fence versions. + * _release atomics, an smp_mb() is unconditionally inserted into the + * _relaxed variants, which are used to build the barriered versions. + * Avoid redundant back-to-back fences in the _acquire and _fence + * versions. */ #define __atomic_acquire_fence() #define __atomic_post_full_fence() @@ -70,7 +70,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -88,7 +88,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -123,7 +123,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -141,7 +141,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h index 92ec486a4f9e..c56bfffc9918 100644 --- a/arch/alpha/include/asm/barrier.h +++ b/arch/alpha/include/asm/barrier.h @@ -2,64 +2,15 @@ #ifndef __BARRIER_H #define __BARRIER_H -#include <asm/compiler.h> - #define mb() __asm__ __volatile__("mb": : :"memory") #define rmb() __asm__ __volatile__("mb": : :"memory") #define wmb() __asm__ __volatile__("wmb": : :"memory") -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ -#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") +#define __smp_load_acquire(p) \ +({ \ + compiletime_assert_atomic_type(*p); \ + __READ_ONCE(*p); \ +}) #ifdef CONFIG_SMP #define __ASM_SMP_MB "\tmb\n" diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 162c17b2631f..660b14ce1317 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -277,9 +277,9 @@ extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; retur extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } /* - * The smp_read_barrier_depends() in the following functions are required to - * order the load of *dir (the pointer in the top level page table) with any - * subsequent load of the returned pmd_t *ret (ret is data dependent on *dir). + * The smp_rmb() in the following functions are required to order the load of + * *dir (the pointer in the top level page table) with any subsequent load of + * the returned pmd_t *ret (ret is data dependent on *dir). * * If this ordering is not enforced, the CPU might load an older value of * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for @@ -293,7 +293,7 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) { pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); - smp_read_barrier_depends(); /* see above */ + smp_rmb(); /* see above */ return ret; } #define pmd_offset pmd_offset @@ -303,7 +303,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) { pte_t *ret = (pte_t *) pmd_page_vaddr(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); - smp_read_barrier_depends(); /* see above */ + smp_rmb(); /* see above */ return ret; } #define pte_offset_kernel pte_offset_kernel diff --git a/arch/alpha/include/asm/rwonce.h b/arch/alpha/include/asm/rwonce.h new file mode 100644 index 000000000000..35542bcf92b3 --- /dev/null +++ b/arch/alpha/include/asm/rwonce.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Google LLC. + */ +#ifndef __ASM_RWONCE_H +#define __ASM_RWONCE_H + +#ifdef CONFIG_SMP + +#include <asm/barrier.h> + +/* + * Alpha is apparently daft enough to reorder address-dependent loads + * on some CPU implementations. Knock some common sense into it with + * a memory barrier in READ_ONCE(). + * + * For the curious, more information about this unusual reordering is + * available in chapter 15 of the "perfbook": + * + * https://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html + * + */ +#define __READ_ONCE(x) \ +({ \ + __unqual_scalar_typeof(x) __x = \ + (*(volatile typeof(__x) *)(&(x))); \ + mb(); \ + (typeof(x))__x; \ +}) + +#endif /* CONFIG_SMP */ + +#include <asm-generic/rwonce.h> + +#endif /* __ASM_RWONCE_H */ diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 36dc18553ed8..1b207cf07697 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/errno.h> #include <asm/unistd.h> #include <asm/vdso/cp15.h> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 66dc41fd49f2..73aee7290cdf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -118,6 +118,7 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -1327,6 +1328,8 @@ config SWP_EMULATION ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that they are always undefined. Say Y here to enable software emulation of these instructions for userspace using LDXR/STXR. + This feature can be controlled at runtime with the abi.swp + sysctl which is disabled by default. In some older versions of glibc [<=2.8] SWP is used during futex trylock() operations with the assumption that the code will not @@ -1353,7 +1356,8 @@ config CP15_BARRIER_EMULATION Say Y here to enable software emulation of these instructions for AArch32 userspace code. When this option is enabled, CP15 barrier usage is traced which can help - identify software that needs updating. + identify software that needs updating. This feature can be + controlled at runtime with the abi.cp15_barrier sysctl. If unsure, say Y @@ -1364,7 +1368,8 @@ config SETEND_EMULATION AArch32 EL0, and is deprecated in ARMv8. Say Y here to enable software emulation of the instruction - for AArch32 userspace code. + for AArch32 userspace code. This feature can be controlled + at runtime with the abi.setend sysctl. Note: All the cpus on the system must have mixed endian support at EL0 for this feature to be enabled. If a new CPU - which doesn't support mixed @@ -1596,6 +1601,20 @@ config ARM64_AMU_EXTN correctly reflect reality. Most commonly, the value read will be 0, indicating that the counter is not enabled. +config AS_HAS_ARMV8_4 + def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a) + +config ARM64_TLB_RANGE + bool "Enable support for tlbi range feature" + default y + depends on AS_HAS_ARMV8_4 + help + ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a + range of input addresses. + + The feature introduces new assembly instructions, and they were + support when binutils >= 2.30. + endmenu menu "ARMv8.5 architectural features" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 70f5905954dd..55bc8546d9c7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -82,11 +82,18 @@ endif # compiler to generate them and consequently to break the single image contract # we pass it only to the assembler. This option is utilized only in case of non # integrated assemblers. +ifneq ($(CONFIG_AS_HAS_ARMV8_4), y) branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a endif +endif KBUILD_CFLAGS += $(branch-prot-flags-y) +ifeq ($(CONFIG_AS_HAS_ARMV8_4), y) +# make sure to pass the newest target architecture to -march. +KBUILD_CFLAGS += -Wa,-march=armv8.4-a +endif + ifeq ($(CONFIG_SHADOW_CALL_STACK), y) KBUILD_CFLAGS += -ffixed-x18 endif diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2ca7ba69c318..39273b5c14be 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -66,6 +66,7 @@ CONFIG_SCHED_SMT=y CONFIG_NUMA=y CONFIG_SECCOMP=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index a45366c3909b..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -47,20 +47,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); /* ACPI table mapping after acpi_permanent_mmap is set */ -static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, - acpi_size size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (memblock_is_map_memory(phys)) - return (void __iomem *)__phys_to_virt(phys); - - /* - * We should still honor the memory's attribute here because - * crash dump kernel possibly excludes some ACPI (reclaim) - * regions from memblock list. - */ - return __ioremap(phys, size, __acpi_get_mem_attribute(phys)); -} +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap typedef u64 phys_cpuid_t; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index d7b3bb0cb180..07b643a70710 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -62,7 +62,9 @@ #define ARM64_HAS_GENERIC_AUTH 52 #define ARM64_HAS_32BIT_EL1 53 #define ARM64_BTI 54 +#define ARM64_HAS_ARMv8_4_TTL 55 +#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_NCAPS 55 +#define ARM64_NCAPS 57 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7c3d1ff091d..89b4f0142c28 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -692,6 +692,12 @@ static inline bool system_supports_bti(void) return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI); } +static inline bool system_supports_tlb_range(void) +{ + return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) && + cpus_have_const_cap(ARM64_HAS_TLB_RANGE); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 @@ -774,6 +780,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) } u32 get_kvm_ipa_limit(void); +void dump_cpu_features(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 94ba0c5bced2..5abf91e3494c 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -49,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); #define set_huge_swap_pte_at set_huge_swap_pte_at +void __init arm64_hugetlb_cma_reserve(void); + #include <asm-generic/hugetlb.h> #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index d683bcbf1e7c..22f73fe09030 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -95,6 +95,7 @@ #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) +/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 3bf626f6fe0c..329fb15f6bac 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -8,7 +8,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H -#include <linux/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/sparsemem.h> /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a1871bb32bb1..afa722504bfd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -10,11 +10,8 @@ #ifndef __ASM_MEMORY_H #define __ASM_MEMORY_H -#include <linux/compiler.h> #include <linux/const.h> #include <linux/sizes.h> -#include <linux/types.h> -#include <asm/bug.h> #include <asm/page-def.h> /* @@ -157,11 +154,15 @@ #endif #ifndef __ASSEMBLY__ -extern u64 vabits_actual; -#define PAGE_END (_PAGE_END(vabits_actual)) #include <linux/bitops.h> +#include <linux/compiler.h> #include <linux/mmdebug.h> +#include <linux/types.h> +#include <asm/bug.h> + +extern u64 vabits_actual; +#define PAGE_END (_PAGE_END(vabits_actual)) extern s64 physvirt_offset; extern s64 memstart_addr; @@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x) __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) +void dump_mem_limit(void); #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b0bd9b55594c..f2d7537d6f83 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) * take CPU migration into account. */ #define destroy_context(mm) do { } while(0) -void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); +void check_and_switch_context(struct mm_struct *mm); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) @@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void __switch_mm(struct mm_struct *next) { - unsigned int cpu = smp_processor_id(); - /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next) return; } - check_and_switch_context(next, cpu); + check_and_switch_context(next); } |