diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 08:52:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 08:52:28 -0700 |
| commit | 112e7e21519422b6f2bb0fa8061f5685e9757170 (patch) | |
| tree | f381b6b0032b7d5e63f56e06743545237834e440 /arch/loongarch/include | |
| parent | e55e5df193d247a38a5e1ac65a5316a0adcc22fa (diff) | |
| parent | 5ee35c769663cb1c5f26e12cad84904dc3002de8 (diff) | |
| download | linux-112e7e21519422b6f2bb0fa8061f5685e9757170.tar.gz linux-112e7e21519422b6f2bb0fa8061f5685e9757170.tar.bz2 linux-112e7e21519422b6f2bb0fa8061f5685e9757170.zip | |
Merge tag 'loongarch-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch updates from Huacai Chen:
- preliminary ClangBuiltLinux enablement
- add support to clone a time namespace
- add vector extensions support
- add SMT (Simultaneous Multi-Threading) support
- support dbar with different hints
- introduce hardware page table walker
- add jump-label implementation
- add rethook and uprobes support
- some bug fixes and other small changes
* tag 'loongarch-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (28 commits)
LoongArch: Remove five DIE_* definitions in kdebug.h
LoongArch: Add uprobes support
LoongArch: Use larch_insn_gen_break() for kprobes
LoongArch: Add larch_insn_gen_break() to generate break insns
LoongArch: Check for AMO instructions in insns_not_supported()
LoongArch: Move three functions from kprobes.c to inst.c
LoongArch: Replace kretprobe with rethook
LoongArch: Add jump-label implementation
LoongArch: Select HAVE_DEBUG_KMEMLEAK to support kmemleak
LoongArch: Export some arch-specific pm interfaces
LoongArch: Introduce hardware page table walker
LoongArch: Support dbar with different hints
LoongArch: Add SMT (Simultaneous Multi-Threading) support
LoongArch: Add vector extensions support
LoongArch: Add support to clone a time namespace
Makefile: Add loongarch target flag for Clang compilation
LoongArch: Mark Clang LTO as working
LoongArch: Include KBUILD_CPPFLAGS in CHECKFLAGS invocation
LoongArch: vDSO: Use CLANG_FLAGS instead of filtering out '--target='
LoongArch: Tweak CFLAGS for Clang compatibility
...
Diffstat (limited to 'arch/loongarch/include')
29 files changed, 972 insertions, 184 deletions
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 77ad8e6f0906..6b222f227342 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 976a810352c6..8de6c4b83a61 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -8,11 +8,14 @@ #ifndef _ASM_LOONGARCH_ACPI_H #define _ASM_LOONGARCH_ACPI_H +#include <asm/suspend.h> + #ifdef CONFIG_ACPI extern int acpi_strict; extern int acpi_disabled; extern int acpi_pci_disabled; extern int acpi_noirq; +extern int pptt_enabled; #define acpi_os_ioremap acpi_os_ioremap void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); @@ -30,6 +33,14 @@ static inline bool acpi_has_cpu_in_madt(void) } extern struct list_head acpi_wakeup_device_list; +extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; + +extern int __init parse_acpi_topology(void); + +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_core_pic[cpu_logical_map(cpu)].processor_id; +} #endif /* !CONFIG_ACPI */ @@ -37,12 +48,10 @@ extern struct list_head acpi_wakeup_device_list; extern int loongarch_acpi_suspend(void); extern int (*acpi_suspend_lowlevel)(void); -extern void loongarch_suspend_enter(void); static inline unsigned long acpi_get_wakeup_address(void) { #ifdef CONFIG_SUSPEND - extern void loongarch_wakeup_start(void); return (unsigned long)loongarch_wakeup_start; #endif return 0UL; diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c51a1b43acb4..79e1d53fea89 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -270,6 +270,399 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vst $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vst $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vst $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vst $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vst $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vst $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vst $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vst $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vst $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vst $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vst $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vst $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vst $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vst $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vst $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vst $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vst $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vst $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vst $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vst $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vst $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vst $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vst $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vst $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vst $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vst $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vst $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vst $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vst $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vst $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vst $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vst $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vld $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vld $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vld $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vld $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vld $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vld $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vld $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vld $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vld $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vld $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vld $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vld $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vld $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vld $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vld $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vld $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vld $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vld $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vld $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vld $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vld $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vld $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vld $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vld $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vld $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vld $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vld $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vld $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vld $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vld $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vld $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vld $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + vpickve2gr.d \tmp, \vd, 1 + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + ld.d \tmp, \base, (\off+8) + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvst $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvst $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvst $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvst $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvst $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvst $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvst $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvst $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvst $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvst $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvst $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvst $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvst $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvst $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvst $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvst $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvst $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvst $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvst $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvst $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvst $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvst $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvst $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvst $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvst $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvst $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvst $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvst $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvst $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvst $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvst $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvst $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvld $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvld $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvld $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvld $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvld $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvld $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvld $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvld $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvld $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvld $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvld $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvld $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvld $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvld $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvld $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvld $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvld $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvld $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvld $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvld $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvld $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvld $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvld $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvld $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvld $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvld $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvld $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvld $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvld $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvld $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvld $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvld $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp0 tmp1 off + vld \tmp0, \base, (\off+16) + xvpermi.q \xd, \tmp1, 0x2 + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31 ($xr31 lower bits) with xvpickve2gr */ + xvpickve2gr.d $r17, $xr31, 0 + xvpickve2gr.d $r18, $xr31, 1 + lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */ + xvinsgr2vr.d $xr31, $r17, 0 + xvinsgr2vr.d $xr31, $r18, 1 + .endm + + .macro lasx_init_upper xd tmp + xvinsgr2vr.d \xd, \tmp, 2 + xvinsgr2vr.d \xd, \tmp, 3 + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro not dst src nor \dst, \src, zero .endm diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h index cda977675854..4b663f197706 100644 --- a/arch/loongarch/include/asm/barrier.h +++ b/arch/loongarch/include/asm/barrier.h @@ -5,27 +5,56 @@ #ifndef __ASM_BARRIER_H #define __ASM_BARRIER_H -#define __sync() __asm__ __volatile__("dbar 0" : : : "memory") +/* + * Hint encoding: + * + * Bit4: ordering or completion (0: completion, 1: ordering) + * Bit3: barrier for previous read (0: true, 1: false) + * Bit2: barrier for previous write (0: true, 1: false) + * Bit1: barrier for succeeding read (0: true, 1: false) + * Bit0: barrier for succeeding write (0: true, 1: false) + * + * Hint 0x700: barrier for "read after read" from the same address + */ + +#define DBAR(hint) __asm__ __volatile__("dbar %0 " : : "I"(hint) : "memory") + +#define crwrw 0b00000 +#define cr_r_ 0b00101 +#define c_w_w 0b01010 -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#define fast_iob() __sync() -#define wbflush() __sync() +#define orwrw 0b10000 +#define or_r_ 0b10101 +#define o_w_w 0b11010 -#define wmb() fast_wmb() -#define rmb() fast_rmb() -#define mb() fast_mb() -#define iob() fast_iob() +#define orw_w 0b10010 +#define or_rw 0b10100 -#define __smp_mb() __asm__ __volatile__("dbar 0" : : : "memory") -#define __smp_rmb() __asm__ __volatile__("dbar 0" : : : "memory") -#define __smp_wmb() __asm__ __volatile__("dbar 0" : : : "memory") +#define c_sync() DBAR(crwrw) +#define c_rsync() DBAR(cr_r_) +#define c_wsync() DBAR(c_w_w) + +#define o_sync() DBAR(orwrw) +#define o_rsync() DBAR(or_r_) +#define o_wsync() DBAR(o_w_w) + +#define ldacq_mb() DBAR(or_rw) +#define strel_mb() DBAR(orw_w) + +#define mb() c_sync() +#define rmb() c_rsync() +#define wmb() c_wsync() +#define iob() c_sync() +#define wbflush() c_sync() + +#define __smp_mb() o_sync() +#define __smp_rmb() o_rsync() +#define __smp_wmb() o_wsync() #ifdef CONFIG_SMP -#define __WEAK_LLSC_MB " dbar 0 \n" +#define __WEAK_LLSC_MB " dbar 0x700 \n" #else -#define __WEAK_LLSC_MB " \n" +#define __WEAK_LLSC_MB " \n" #endif #define __smp_mb__before_atomic() barrier() @@ -59,68 +88,19 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, return mask; } -#define __smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - unsigned long __tmp = 0; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - *(__u8 *)__u.__c = *(volatile __u8 *)p; \ - __smp_mb(); \ - break; \ - case 2: \ - *(__u16 *)__u.__c = *(volatile __u16 *)p; \ - __smp_mb(); \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amor_db.w %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u32 *)__u.__c) \ - : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amor_db.d %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u64 *)__u.__c) \ - : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - } \ - (typeof(*p))__u.__val; \ +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + ldacq_mb(); \ + ___p1; \ }) -#define __smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - unsigned long __tmp; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - __smp_mb(); \ - *(volatile __u8 *)p = *(__u8 *)__u.__c; \ - break; \ - case 2: \ - __smp_mb(); \ - *(volatile __u16 *)p = *(__u16 *)__u.__c; \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amswap_db.w %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u32 *)__u.__c) \ - : ); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amswap_db.d %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u64 *)__u.__c) \ - : ); \ - break; \ - } \ +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + strel_mb(); \ + WRITE_ONCE(*p, v); \ } while (0) #define __smp_store_mb(p, v) \ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index f6177f133477..2eafe6a6aca8 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -64,6 +64,6 @@ #define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) - +#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index cd73a6f57fe3..900589cb159d 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -54,6 +54,7 @@ struct cpuinfo_loongarch { struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ + int global_id; /* physical global thread number */ int vabits; /* Virtual Address size in bits */ int pabits; /* Physical Address size in bits */ unsigned int ksave_mask; /* Usable KSave mask. */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 88773d849e33..48b9f7168bcc 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -98,6 +98,7 @@ enum cpu_type_enum { #define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -125,5 +126,6 @@ enum cpu_type_enum { #define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index b6be527831dd..e56610ae8592 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -40,6 +40,7 @@ #define fs6 $f30 #define fs7 $f31 +#ifndef CONFIG_AS_HAS_FCSR_CLASS /* * Current binutils expects *GPRs* at FCSR position for the FCSR * operation instructions, so define aliases for those used. @@ -48,5 +49,11 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 +#else +#define fcsr0 $fcsr0 +#define fcsr1 $fcsr1 +#define fcsr2 $fcsr2 +#define fcsr3 $fcsr3 +#endif #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 192f8e35d912..e4193d637f66 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/inclu |
