From b23c83ad2c638420ec0608a9de354507c41bec29 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:41 -0700 Subject: x86/reboot: VMCLEAR active VMCSes before emergency reboot VMCLEAR active VMCSes before any emergency reboot, not just if the kernel may kexec into a new kernel after a crash. Per Intel's SDM, the VMX architecture doesn't require the CPU to flush the VMCS cache on INIT. If an emergency reboot doesn't RESET CPUs, cached VMCSes could theoretically be kept and only be written back to memory after the new kernel is booted, i.e. could effectively corrupt memory after reboot. Opportunistically remove the setting of the global pointer to NULL to make checkpatch happy. Cc: Andrew Cooper Link: https://lore.kernel.org/r/20230721201859.2307736-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/crash.c | 31 ------------------------------- arch/x86/kernel/reboot.c | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cdd92ab43cda..54cd959cb316 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -48,38 +48,12 @@ struct crash_memmap_data { unsigned int type; }; -/* - * This is used to VMCLEAR all VMCSs loaded on the - * processor. And when loading kvm_intel module, the - * callback function pointer will be assigned. - * - * protected by rcu. - */ -crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; -EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); - -static inline void cpu_crash_vmclear_loaded_vmcss(void) -{ - crash_vmclear_fn *do_vmclear_operation = NULL; - - rcu_read_lock(); - do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); - if (do_vmclear_operation) - do_vmclear_operation(); - rcu_read_unlock(); -} - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { crash_save_cpu(regs, cpu); - /* - * VMCLEAR VMCSs loaded on all cpus if needed. - */ - cpu_crash_vmclear_loaded_vmcss(); - /* * Disable Intel PT to stop its logging */ @@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); - /* - * VMCLEAR VMCSs loaded on this cpu if needed. - */ - cpu_crash_vmclear_loaded_vmcss(); - cpu_emergency_disable_virtualization(); /* diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 3adbe97015c1..3fa4c6717a1d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -787,6 +787,26 @@ void machine_crash_shutdown(struct pt_regs *regs) } #endif +/* + * This is used to VMCLEAR all VMCSs loaded on the + * processor. And when loading kvm_intel module, the + * callback function pointer will be assigned. + * + * protected by rcu. + */ +crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; +EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); + +static inline void cpu_crash_vmclear_loaded_vmcss(void) +{ + crash_vmclear_fn *do_vmclear_operation = NULL; + + rcu_read_lock(); + do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); + if (do_vmclear_operation) + do_vmclear_operation(); + rcu_read_unlock(); +} /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; @@ -798,6 +818,8 @@ int crashing_cpu = -1; */ void cpu_emergency_disable_virtualization(void) { + cpu_crash_vmclear_loaded_vmcss(); + cpu_emergency_vmxoff(); cpu_emergency_svm_disable(); } -- cgit v1.2.3 From 5e408396c60cd0f0b53a43713016b6d6af8d69e0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:42 -0700 Subject: x86/reboot: Harden virtualization hooks for emergency reboot Provide dedicated helpers to (un)register virt hooks used during an emergency crash/reboot, and WARN if there is an attempt to overwrite the registered callback, or an attempt to do an unpaired unregister. Opportunsitically use rcu_assign_pointer() instead of RCU_INIT_POINTER(), mainly so that the set/unset paths are more symmetrical, but also because any performance gains from using RCU_INIT_POINTER() are meaningless for this code. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 3fa4c6717a1d..62ccedeb5e2b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -794,17 +794,35 @@ void machine_crash_shutdown(struct pt_regs *regs) * * protected by rcu. */ -crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; -EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); +static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; + +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, callback); +} +EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); + +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); static inline void cpu_crash_vmclear_loaded_vmcss(void) { - crash_vmclear_fn *do_vmclear_operation = NULL; + cpu_emergency_virt_cb *callback; rcu_read_lock(); - do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); - if (do_vmclear_operation) - do_vmclear_operation(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); rcu_read_unlock(); } -- cgit v1.2.3 From 119b5cb4ffd0166f3e98e9ee042f5046f7744f28 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:43 -0700 Subject: x86/reboot: KVM: Handle VMXOFF in KVM's reboot callback Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't possibly be post-VMXON. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 62ccedeb5e2b..d2d0f2672a64 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs) } #endif -/* - * This is used to VMCLEAR all VMCSs loaded on the - * processor. And when loading kvm_intel module, the - * callback function pointer will be assigned. - * - * protected by rcu. - */ +/* RCU-protected callback to disable virtualization prior to reboot. */ static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) @@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) } EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); -static inline void cpu_crash_vmclear_loaded_vmcss(void) -{ - cpu_emergency_virt_cb *callback; - - rcu_read_lock(); - callback = rcu_dereference(cpu_emergency_virt_callback); - if (callback) - callback(); - rcu_read_unlock(); -} - /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; @@ -836,9 +819,15 @@ int crashing_cpu = -1; */ void cpu_emergency_disable_virtualization(void) { - cpu_crash_vmclear_loaded_vmcss(); + cpu_emergency_virt_cb *callback; + + rcu_read_lock(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); + rcu_read_unlock(); - cpu_emergency_vmxoff(); + /* KVM_AMD doesn't yet utilize the common callback. */ cpu_emergency_svm_disable(); } -- cgit v1.2.3 From baeb4de7ad12b700a91f2a7be8e1c0389a5c8fd4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:44 -0700 Subject: x86/reboot: KVM: Disable SVM during reboot via virt/KVM reboot callback Use the virt callback to disable SVM (and set GIF=1) during an emergency instead of blindly attempting to disable SVM. Like the VMX case, if a hypervisor, i.e. KVM, isn't loaded/active, SVM can't be in use. Acked-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d2d0f2672a64..48ad2d1ff83d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -826,9 +826,6 @@ void cpu_emergency_disable_virtualization(void) if (callback) callback(); rcu_read_unlock(); - - /* KVM_AMD doesn't yet utilize the common callback. */ - cpu_emergency_svm_disable(); } #if defined(CONFIG_SMP) -- cgit v1.2.3 From ad93c1a7c0102c93e92bf0c06412a1f588e015ab Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:45 -0700 Subject: x86/reboot: Assert that IRQs are disabled when turning off virtualization Assert that IRQs are disabled when turning off virtualization in an emergency. KVM enables hardware via on_each_cpu(), i.e. could re-enable hardware if a pending IPI were delivered after disabling virtualization. Remove a misleading comment from emergency_reboot_disable_virtualization() about "just" needing to guarantee the CPU is stable (see above). Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 48ad2d1ff83d..4cad7183b89e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -532,7 +532,6 @@ static inline void nmi_shootdown_cpus_on_restart(void); static void emergency_reboot_disable_virtualization(void) { - /* Just make sure we won't change CPUs while doing this */ local_irq_disable(); /* @@ -821,6 +820,13 @@ void cpu_emergency_disable_virtualization(void) { cpu_emergency_virt_cb *callback; + /* + * IRQs must be disabled as KVM enables virtualization in hardware via + * function call IPIs, i.e. IRQs need to be disabled to guarantee + * virtualization stays disabled. + */ + lockdep_assert_irqs_disabled(); + rcu_read_lock(); callback = rcu_dereference(cpu_emergency_virt_callback); if (callback) -- cgit v1.2.3 From edc8deb087d884bac2f7013f0c23af73042b23a7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:46 -0700 Subject: x86/reboot: Hoist "disable virt" helpers above "emergency reboot" path Move the various "disable virtualization" helpers above the emergency reboot path so that emergency_reboot_disable_virtualization() can be stubbed out in a future patch if neither KVM_INTEL nor KVM_AMD is enabled, i.e. if there is no in-tree user of CPU virtualization. No functional change intended. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 90 ++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4cad7183b89e..85cb2dfcb67b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -530,6 +530,51 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); +/* RCU-protected callback to disable virtualization prior to reboot. */ +static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; + +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, callback); +} +EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); + +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); + +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_virt_cb *callback; + + /* + * IRQs must be disabled as KVM enables virtualization in hardware via + * function call IPIs, i.e. IRQs need to be disabled to guarantee + * virtualization stays disabled. + */ + lockdep_assert_irqs_disabled(); + + rcu_read_lock(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); + rcu_read_unlock(); +} + static void emergency_reboot_disable_virtualization(void) { local_irq_disable(); @@ -786,54 +831,9 @@ void machine_crash_shutdown(struct pt_regs *regs) } #endif -/* RCU-protected callback to disable virtualization prior to reboot. */ -static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; - -void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) -{ - if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) - return; - - rcu_assign_pointer(cpu_emergency_virt_callback, callback); -} -EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); - -void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) -{ - if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) - return; - - rcu_assign_pointer(cpu_emergency_virt_callback, NULL); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); - /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; -/* - * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during - * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if - * GIF=0, i.e. if the crash occurred between CLGI and STGI. - */ -void cpu_emergency_disable_virtualization(void) -{ - cpu_emergency_virt_cb *callback; - - /* - * IRQs must be disabled as KVM enables virtualization in hardware via - * function call IPIs, i.e. IRQs need to be disabled to guarantee - * virtualization stays disabled. - */ - lockdep_assert_irqs_disabled(); - - rcu_read_lock(); - callback = rcu_dereference(cpu_emergency_virt_callback); - if (callback) - callback(); - rcu_read_unlock(); -} - #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; -- cgit v1.2.3 From 59765db5fc82726b32876b794667e2c6936a98ab Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:47 -0700 Subject: x86/reboot: Disable virtualization during reboot iff callback is registered Attempt to disable virtualization during an emergency reboot if and only if there is a registered virt callback, i.e. iff a hypervisor (KVM) is active. If there's no active hypervisor, then the CPU can't be operating with VMX or SVM enabled (barring an egregious bug). Checking for a valid callback instead of simply for SVM or VMX support can also eliminates spurious NMIs by avoiding the unecessary call to nmi_shootdown_cpus_on_restart(). Note, IRQs are disabled, which prevents KVM from coming along and enabling virtualization after the fact. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 85cb2dfcb67b..98e5db3fd7f4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -589,7 +588,7 @@ static void emergency_reboot_disable_virtualization(void) * Do the NMI shootdown even if virtualization is off on _this_ CPU, as * other CPUs may have virtualization enabled. */ - if (cpu_has_vmx() || cpu_has_svm(NULL)) { + if (rcu_access_pointer(cpu_emergency_virt_callback)) { /* Safely force _this_ CPU out of VMX/SVM operation. */ cpu_emergency_disable_virtualization(); -- cgit v1.2.3 From 261cd5ed934e6923187cf1c9eaa6cb63f2b81212 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 13:18:48 -0700 Subject: x86/reboot: Expose VMCS crash hooks if and only if KVM_{INTEL,AMD} is enabled Expose the crash/reboot hooks used by KVM to disable virtualization in hardware and unblock INIT only if there's a potential in-tree user, i.e. either KVM_INTEL or KVM_AMD is enabled. Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20230721201859.2307736-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kernel/reboot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 98e5db3fd7f4..830425e6d38e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -529,6 +529,7 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); +#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) /* RCU-protected callback to disable virtualization prior to reboot. */ static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; @@ -596,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void) nmi_shootdown_cpus_on_restart(); } } - +#else +static void emergency_reboot_disable_virtualization(void) { } +#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ void __attribute__((weak)) mach_reboot_fixups(void) { -- cgit v1.2.3