summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/powerpc/DAWR-POWER9.txt32
-rw-r--r--arch/powerpc/include/asm/cpuidle.h19
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h8
-rw-r--r--arch/powerpc/include/asm/opal-api.h7
-rw-r--r--arch/powerpc/include/asm/opal.h7
-rw-r--r--arch/powerpc/include/asm/paca.h40
-rw-r--r--arch/powerpc/include/asm/processor.h9
-rw-r--r--arch/powerpc/include/asm/reg.h8
-rw-r--r--arch/powerpc/include/asm/xive.h14
-rw-r--r--arch/powerpc/kernel/asm-offsets.c18
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S23
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c62
-rw-r--r--arch/powerpc/kernel/idle_book3s.S1099
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kernel/setup-common.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S141
-rw-r--r--arch/powerpc/platforms/powernv/idle.c902
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c3
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c99
-rw-r--r--arch/powerpc/xmon/xmon.c24
23 files changed, 1259 insertions, 1277 deletions
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt
index 2feaa6619658..bdec03650941 100644
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ b/Documentation/powerpc/DAWR-POWER9.txt
@@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be
trapped in GDB. The watchpoint is remembered, so if the guest is
migrated back to the POWER8 host, it will start working again.
+Force enabling the DAWR
+=============================
+Kernels (since ~v5.2) have an option to force enable the DAWR via:
+
+ echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR. This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+ tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+Any errors/failures/skips mean something is wrong.
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 43e5f31fe64d..9844b3ded187 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -27,10 +27,11 @@
* the THREAD_WINKLE_BITS are set, which indicate which threads have not
* yet woken from the winkle state.
*/
-#define PNV_CORE_IDLE_LOCK_BIT 0x10000000
+#define NR_PNV_CORE_IDLE_LOCK_BIT 28
+#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
+#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
-#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000
#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
@@ -68,16 +69,6 @@
#define ERR_DEEP_STATE_ESL_MISMATCH -2
#ifndef __ASSEMBLY__
-/* Additional SPRs that need to be saved/restored during stop */
-struct stop_sprs {
- u64 pid;
- u64 ldbar;
- u64 fscr;
- u64 hfscr;
- u64 mmcr1;
- u64 mmcr2;
- u64 mmcra;
-};
#define PNV_IDLE_NAME_LEN 16
struct pnv_idle_states_t {
@@ -92,10 +83,6 @@ struct pnv_idle_states_t {
extern struct pnv_idle_states_t *pnv_idle_states;
extern int nr_pnv_idle_states;
-extern u32 pnv_fastsleep_workaround_at_entry[];
-extern u32 pnv_fastsleep_workaround_at_exit[];
-
-extern u64 pnv_first_deep_stop_state;
unsigned long pnv_cpu_offline(unsigned int cpu);
int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c90b..0fe8c1e46bbc 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
int hw_breakpoint_handler(struct die_args *args);
+extern int set_dawr(struct arch_hw_breakpoint *brk);
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+ return dawr_force_enable;
+}
+
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
static inline void thread_change_pc(struct task_struct *tsk,
struct pt_regs *regs) { }
+static inline bool dawr_enabled(void) { return false; }
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..e1d118ac61dc 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -186,8 +186,8 @@
#define OPAL_XIVE_FREE_IRQ 140
#define OPAL_XIVE_SYNC 141
#define OPAL_XIVE_DUMP 142
-#define OPAL_XIVE_RESERVED3 143
-#define OPAL_XIVE_RESERVED4 144
+#define OPAL_XIVE_GET_QUEUE_STATE 143
+#define OPAL_XIVE_SET_QUEUE_STATE 144
#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
@@ -210,7 +210,8 @@
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
#define OPAL_NX_COPROC_INIT 167
-#define OPAL_LAST 167
+#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_LAST 170
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..4e978d4dea5c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -279,6 +279,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+ __be32 *out_qtoggle,
+ __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+ uint32_t qtoggle,
+ uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
uint64_t desc, uint16_t pe_number);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e843bc5d1a0f..245d11a71784 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -173,7 +173,6 @@ struct paca_struct {
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
@@ -183,23 +182,28 @@ struct paca_struct {
#endif
#ifdef CONFIG_PPC_POWERNV
- /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
- u32 *core_idle_state_ptr;
- u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
- /* Mask to indicate thread id in core */
- u8 thread_mask;
- /* Mask to denote subcore sibling threads */
- u8 subcore_sibling_mask;
- /* Flag to request this thread not to stop */
- atomic_t dont_stop;
- /* The PSSCR value that the kernel requested before going to stop */
- u64 requested_psscr;
-
- /*
- * Save area for additional SPRs that need to be
- * saved/restored during cpuidle stop.
- */
- struct stop_sprs stop_sprs;
+ /* PowerNV idle fields */
+ /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+ unsigned long idle_state;
+ union {
+ /* P7/P8 specific fields */
+ struct {
+ /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ u8 thread_idle_state;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+ };
+
+ /* P9 specific fields */
+ struct {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
+#endif
+ };
+ };
#endif
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 540949b397d4..706ac5df546f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -414,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
}
#endif
+/* asm stubs */
+extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
+extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
+extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+
extern void power7_idle_type(unsigned long type);
-extern unsigned long power9_idle_stop(unsigned long psscr_val);
-extern unsigned long power9_offline_stop(unsigned long psscr_val);
extern void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c5b2aff0ce8e..10caa145f98b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -168,6 +168,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_PLS_SHIFT 60
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
@@ -758,10 +759,9 @@
#define SRR1_WAKERESET 0x00100000 /* System reset */
#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
-#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
- * may not be recoverable */
-#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
-#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
+#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */
+#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */
+#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */
#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3c704f5dd3ae..b579a943407b 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
+extern void xive_native_sync_queue(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
+extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+
#else
static inline bool xive_enabled(void) { return false; }
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 60b82198de7c..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -271,7 +271,6 @@ int main(void)
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
- OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -773,23 +772,6 @@ int main(void)
OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
#endif
-#ifdef CONFIG_PPC_POWERNV
- OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
- OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
- OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
- OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
- OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
- OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
-#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
- STOP_SPR(STOP_PID, pid);
- STOP_SPR(STOP_LDBAR, ldbar);
- STOP_SPR(STOP_FSCR, fscr);
- STOP_SPR(STOP_HFSCR, hfscr);
- STOP_SPR(STOP_MMCR1, mmcr1);
- STOP_SPR(STOP_MMCR2, mmcr2);
- STOP_SPR(STOP_MMCRA, mmcra);
-#endif
-
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index bedd89438827..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -121,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100)
mfspr r10,SPRN_SRR1 ; \
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
- cmpwi cr3,r10,2 ; \
+ cmpwi cr1,r10,2 ; \
+ mfspr r3,SPRN_SRR1 ; \
+ bltlr cr1 ; /* no state loss, return to idle caller */ \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
KVMTEST_PR(n) ; \
@@ -145,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
- mfspr r12,SPRN_SRR1
- b pnv_powersave_wakeup
+ /*
+ * This must be a direct branch (without linker branch stub) because
+ * we can not use TOC at this point as r2 may not be restored yet.
+ */
+ b idle_return_gpr_loss
#endif
/*
@@ -429,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
+ ld r4,_LINK(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
sth r11,PACA_IN_MCE(r13)
- /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
- /* Recoverability could be improved by reducing the use of SRR1. */
- li r11,0
- mtmsrd r11,1
-
- b pnv_powersave_wakeup_mce
+ mtlr r4
+ rlwinm r10,r3,47-31,30,31
+ cmpwi cr1,r10,2
+ bltlr cr1 /* no state loss, return to idle caller */
+ b idle_return_gpr_loss
#endif
/*
* Handle machine check early in real mode. We come here with
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a6773119..da307dd93ee3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,15 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/hvcall.h>
#include <linux/uaccess.h>
/*
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((attr->bp_addr >> 9) !=
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if ((!dawr_force_enable) &&
+ (firmware_has_feature(FW_FEATURE_LPAR)) &&
+ (set_dawr(&null_brk) != H_SUCCESS))
+ return -1;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ dawr_force_enable = false;
+
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ dawr_force_enable = false;
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 4a860d3b9229..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,995 +1,188 @@
/*
- * This file contains idle entry/exit functions for POWER7,
- * POWER8 and POWER9 CPUs.
+ * Copyright 2018, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * This file contains general idle entry/exit functions to save
+ * and restore stack and NVGPRs which allows C code to call idle
+ * states that lose GPRs, and it will return transparently with
+ * SRR1 wakeup reason return value.
+ *
+ * The platform / CPU caller must ensure SPRs and any other non-GPR
+ * state is saved and restored correctly, handle KVM, interrupts, etc.
*/
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/exception-64s.h>
-#include <asm/book3s/64/mmu-hash.h>
-#include <asm/mmu.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
-/*
- * Use unused space in the interrupt stack to save and restore
- * registers for winkle support.
- */
-#define _MMCR0 GPR0
-#define _SDR1 GPR3
-#define _PTCR GPR3
-#define _RPR GPR4
-#define _SPURR GPR5
-#define _PURR GPR6
-#define _TSCR GPR7
-#define _DSCR GPR8
-#define _AMOR GPR9
-#define _WORT GPR10
-#define _WORC GPR11
-#define _LPCR GPR12
-
-#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
- .text
-
-/*
- * Used by threads before entering deep idle states. Saves SPRs
- * in interrupt stack frame
- */
-save_sprs_to_stack:
- /*
- * Note all register i.e per-core, per-subcore or per-thread is saved
- * here since any thread in the core might wake up first
- */
-BEGIN_FTR_SECTION
- /*
- * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
- * SDR1 here
- */
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
- mfspr r3,SPRN_LPCR
- std r3,_LPCR(r1)
-FTR_SECTION_ELSE
- mfspr r3,SPRN_SDR1
- std r3,_SDR1(r1)
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
- mfspr r3,SPRN_RPR
- std r3,_RPR(r1)
- mfspr r3,SPRN_SPURR
- std r3,_SPURR(r1)
- mfspr r3,SPRN_PURR
- std r3,_PURR(r1)
- mfspr r3,SPRN_TSCR
- std r3,_TSCR(r1)
- mfspr r3,SPRN_DSCR
- std r3,_DSCR(r1)
- mfspr r3,SPRN_AMOR
- std r3,_AMOR(r1)
- mfspr r3,SPRN_WORT
- std r3,_WORT(r1)
- mfspr r3,SPRN_WORC
- std r3,_WORC(r1)
/*
- * On POWER9, there are idle states such as stop4, invoked via cpuidle,
- * that lose hypervisor resources. In such cases, we need to save
- * additional SPRs before entering those idle states so that they can
- * be restored to their older values on wakeup from the idle state.
+ * Desired PSSCR in r3
*
- * On POWER8, the only such deep idle state is winkle which is used
- * only in the context of CPU-Hotplug, where these additional SPRs are
- * reinitiazed to a sane value. Hence there is no need to save/restore
- * these SPRs.
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
+ *
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
*/
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-power9_save_additional_sprs:
- mfspr r3, SPRN_PID
- mfspr r4, SPRN_LDBAR
- std r3, STOP_PID(r13)
- std r4, STOP_LDBAR(r13)
-
- mfspr r3, SPRN_FSCR
- mfspr r4, SPRN_HFSCR
- std r3, STOP_FSCR(r13)
- std r4, STOP_HFSCR(r13)
-
- mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR0
- std r3, STOP_MMCRA(r13)
- std r4, _MMCR0(r1)
-
- mfspr r3, SPRN_MMCR1
- mfspr r4, SPRN_MMCR2
- std r3, STOP_MMCR1(r13)
- std r4, STOP_MMCR2(r13)
- blr
-
-power9_restore_additional_sprs:
- ld r3,_LPCR(r1)
- ld r4, STOP_PID(r13)
- mtspr SPRN_LPCR,r3
- mtspr SPRN_PID, r4
-
- ld r3, STOP_LDBAR(r13)
- ld r4, STOP_FSCR(r13)
- mtspr SPRN_LDBAR, r3
- mtspr SPRN_FSCR, r4
-
- ld r3, STOP_HFSCR(r13)
- ld r4, STOP_MMCRA(r13)
- mtspr SPRN_HFSCR, r3
- mtspr SPRN_MMCRA, r4
-
- ld r3, _MMCR0(r1)
- ld r4, STOP_MMCR1(r13)
- mtspr SPRN_MMCR0, r3
- mtspr SPRN_MMCR1, r4
-
- ld r3, STOP_MMCR2(r13)
- ld r4, PACA_SPRG_VDSO(r13)
- mtspr SPRN_MMCR2, r3
- mtspr SPRN_SPRG3, r4
+_GLOBAL(isa300_idle_stop_noloss)
+ mtspr SPRN_PSSCR,r3
+ PPC_STOP
+ li r3,0
blr
/*
- * Used by threads when the lock bit of core_idle_state is set.
- * Threads will spin in HMT_LOW until the lock bit is cleared.
- * r14 - pointer to core_idle_state
- * r15 - used to load contents of core_idle_state
- * r9 - used as a temporary variable
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
*/
-
-core_idle_lock_held:
- HMT_LOW
-3: lwz r15,0(r14)
- andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne 3b
- HMT_MEDIUM
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne- core_idle_lock_held
- blr
-
-/* Reuse some unused pt_regs slots for AMR/IAMR/UAMOR/UAMOR */
-#define PNV_POWERSAVE_AMR _TRAP
-#define PNV_POWERSAVE_IAMR _DAR
-#define PNV_POWERSAVE_UAMOR _DSISR
-#define PNV_POWERSAVE_AMOR RESULT
+_GLOBAL(isa300_idle_stop_mayloss)
+ mtspr SPRN_PSSCR,r3
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ /* 168 bytes */
+ PPC_STOP
+ b . /* catch bugs */
/*
- * Pass requested state in r3:
- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested PSSCR value in POWER9
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
*
- * Address of idle handler to branch to in realmode in r4
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
*/
-pnv_powersave_common:
- /* Use r3 to pass state nap/sleep/winkle */
- /* NAP is a state loss, we create a regs frame on the
- * stack, fill it up with the state we care about and
- * stick a pointer to it in PACAR1. We really only
- * need to save PC, some CR bits and the NV GPRs,
- * but for now an interrupt frame will do.
- */
- mtctr r4
-
- mflr r0
- std r0,16(r1)
- stdu r1,-INT_FRAME_SIZE(r1)
- std r0,_LINK(r1)
- std r0,_NIP(r1)
-
- /* We haven't lost state ... yet */
- li r0,0
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* Continue saving state */
- SAVE_GPR(2, r1)
- SAVE_NVGPRS(r1)
-
-BEGIN_FTR_SECTION
- mfspr r4, SPRN_AMR
- mfspr r5, SPRN_IAMR
- mfspr r6, SPRN_UAMOR
- std r4, PNV_POWERSAVE_AMR(r1)
- std r5, PNV_POWERSAVE_IAMR(r1)
- std r6, PNV_POWERSAVE_UAMOR(r1)
-BEGIN_FTR_SECTION_NESTED(42)
- mfspr r7, SPRN_AMOR
- std r7, PNV_POWERSAVE_AMOR(r1)
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
- mfcr r5
- std r5,_CCR(r1)
- std r1,PACAR1(r13)
-
-BEGIN_FTR_SECTION
- /*
- * POWER9 does not require real mode to stop, and presently does not
- * set hwthread_state for KVM (threads don't share MMU context), so
- * we can remain in virtual mode for this.
- */
- bctr
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- /*
- * POWER8
- * Go to real mode to do the nap, as required by the architecture.
- * Also, we need to be in real mode before setting hwthread_state,
- * because as soon as we do that, another thread can switch
- * the MMU context to the guest.
- */
- LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- mtmsrd r7,0
- bctr
+_GLOBAL(idle_return_gpr_loss)
+ ld r1,PACAR1(r13)
+ ld r4,-8*19(r1)
+ ld r5,-8*20(r1)
+ mtlr r4
+ mtcr r5
+ /*
+ * KVM nap requires r2 to be saved, rather than just restoring it
+ * from PACATOC. This could be avoided for that less common case
+ * if KVM saved its r2.
+ */
+ ld r2,-8*0(r1)
+ ld r14,-8*1(r1)
+ ld r15,-8*2(r1)
+ ld r16,-8*3(r1)
+ ld r17,-8*4(r1)
+ ld r18,-8*5(r1)
+ ld r19,-8*6(r1)
+ ld r20,-8*7(r1)
+ ld r21,-8*8(r1)
+ ld r22,-8*9(r1)
+ ld r23,-8*10(r1)
+ ld r24,-8*11(r1)
+ ld r25,-8*12(r1)
+ ld r26,-8*13(r1)
+ ld r27,-8*14(r1)
+ ld r28,-8*15(r1)
+ ld r29,-8*16(r1)
+ ld r30,-8*17(r1)
+ ld r31,-8*18(r1)
+ blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ *
+ * The 0(r1) slot is used to save r2 in isa206, so use that here.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
+ std r2,0(r1); \
ptesync; \
- ld r0,0(r1); \
-236: cmpd cr0,r0,r0; \
+ ld r2,0(r1); \
+236: cmpd cr0,r2,r2; \
bne 236b; \
- IDLE_INST;
-
-
- .globl pnv_enter_arch207_idle_mode
-pnv_enter_arch207_idle_mode:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /******************************************************/
- /* N O T E W E L L ! ! ! N O T E W E L L */
- /* The following store to HSTATE_HWTHREAD_STATE(r13) */
- /* MUST occur in real mode, i.e. with the MMU off, */
- /* and the MMU must stay off until we clear this flag */