summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/head64.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-14 10:21:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-14 10:21:34 -0700
commitda9803dfd3955bd2f9909d55e23f188ad76dbe58 (patch)
tree9e4ea42559f82bf46340d91fa3312ea555686cf6 /arch/x86/kernel/head64.c
parent6873139ed078bfe0341d4cbb69e5af1b323bf532 (diff)
parent0ddfb1cf3b6b07c97cff16ea69931d986f9622ee (diff)
downloadlinux-da9803dfd3955bd2f9909d55e23f188ad76dbe58.tar.gz
linux-da9803dfd3955bd2f9909d55e23f188ad76dbe58.tar.bz2
linux-da9803dfd3955bd2f9909d55e23f188ad76dbe58.zip
Merge tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 SEV-ES support from Borislav Petkov: "SEV-ES enhances the current guest memory encryption support called SEV by also encrypting the guest register state, making the registers inaccessible to the hypervisor by en-/decrypting them on world switches. Thus, it adds additional protection to Linux guests against exfiltration, control flow and rollback attacks. With SEV-ES, the guest is in full control of what registers the hypervisor can access. This is provided by a guest-host exchange mechanism based on a new exception vector called VMM Communication Exception (#VC), a new instruction called VMGEXIT and a shared Guest-Host Communication Block which is a decrypted page shared between the guest and the hypervisor. Intercepts to the hypervisor become #VC exceptions in an SEV-ES guest so in order for that exception mechanism to work, the early x86 init code needed to be made able to handle exceptions, which, in itself, brings a bunch of very nice cleanups and improvements to the early boot code like an early page fault handler, allowing for on-demand building of the identity mapping. With that, !KASLR configurations do not use the EFI page table anymore but switch to a kernel-controlled one. The main part of this series adds the support for that new exchange mechanism. The goal has been to keep this as much as possibly separate from the core x86 code by concentrating the machinery in two SEV-ES-specific files: arch/x86/kernel/sev-es-shared.c arch/x86/kernel/sev-es.c Other interaction with core x86 code has been kept at minimum and behind static keys to minimize the performance impact on !SEV-ES setups. Work by Joerg Roedel and Thomas Lendacky and others" * tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (73 commits) x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer x86/sev-es: Check required CPU features for SEV-ES x86/efi: Add GHCB mappings when SEV-ES is active x86/sev-es: Handle NMI State x86/sev-es: Support CPU offline/online x86/head/64: Don't call verify_cpu() on starting APs x86/smpboot: Load TSS and getcpu GDT entry before loading IDT x86/realmode: Setup AP jump table x86/realmode: Add SEV-ES specific trampoline entry point x86/vmware: Add VMware-specific handling for VMMCALL under SEV-ES x86/kvm: Add KVM-specific VMMCALL handling under SEV-ES x86/paravirt: Allow hypervisor-specific VMMCALL handling under SEV-ES x86/sev-es: Handle #DB Events x86/sev-es: Handle #AC Events x86/sev-es: Handle VMMCALL Events x86/sev-es: Handle MWAIT/MWAITX Events x86/sev-es: Handle MONITOR/MONITORX Events x86/sev-es: Handle INVD Events x86/sev-es: Handle RDPMC Events x86/sev-es: Handle RDTSC(P) Events ...
Diffstat (limited to 'arch/x86/kernel/head64.c')
-rw-r--r--arch/x86/kernel/head64.c122
1 files changed, 118 insertions, 4 deletions
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index cbb71c1b574f..4199f25c0063 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -36,6 +36,11 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
#include <asm/fixmap.h>
+#include <asm/realmode.h>
+#include <asm/desc.h>
+#include <asm/extable.h>
+#include <asm/trapnr.h>
+#include <asm/sev-es.h>
/*
* Manage page tables very early on.
@@ -61,6 +66,24 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
+/*
+ * GDT used on the boot CPU before switching to virtual addresses.
+ */
+static struct desc_struct startup_gdt[GDT_ENTRIES] = {
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+};
+
+/*
+ * Address needs to be set at runtime because it references the startup_gdt
+ * while the kernel still uses a direct mapping.
+ */
+static struct desc_ptr startup_gdt_descr = {
+ .size = sizeof(startup_gdt),
+ .address = 0,
+};
+
#define __head __section(.head.text)
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
@@ -297,7 +320,7 @@ static void __init reset_early_page_tables(void)
}
/* Create a new PMD entry */
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
@@ -307,7 +330,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
- return -1;
+ return false;
again:
pgd_p = &early_top_pgt[pgd_index(address)].pgd;
@@ -364,10 +387,10 @@ again:
}
pmd_p[pmd_index(address)] = pmd;
- return 0;
+ return true;
}
-int __init early_make_pgtable(unsigned long address)
+static bool __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pmdval_t pmd;
@@ -377,6 +400,19 @@ int __init early_make_pgtable(unsigned long address)
return __early_make_pgtable(address, pmd);
}
+void __init do_early_exception(struct pt_regs *regs, int trapnr)
+{
+ if (trapnr == X86_TRAP_PF &&
+ early_make_pgtable(native_read_cr2()))
+ return;
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) &&
+ trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
+ return;
+
+ early_fixup_exception(regs, trapnr);
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -489,3 +525,81 @@ void __init x86_64_start_reservations(char *real_mode_data)
start_kernel();
}
+
+/*
+ * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is
+ * used until the idt_table takes over. On the boot CPU this happens in
+ * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases
+ * this happens in the functions called from head_64.S.
+ *
+ * The idt_table can't be used that early because all the code modifying it is
+ * in idt.c and can be instrumented by tracing or KASAN, which both don't work
+ * during early CPU bringup. Also the idt_table has the runtime vectors
+ * configured which require certain CPU state to be setup already (like TSS),
+ * which also hasn't happened yet in early CPU bringup.
+ */
+static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
+
+static struct desc_ptr bringup_idt_descr = {
+ .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1,
+ .address = 0, /* Set at runtime */
+};
+
+static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
+{
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ struct idt_data data;
+ gate_desc desc;
+
+ init_idt_data(&data, n, handler);
+ idt_init_desc(&desc, &data);
+ native_write_idt_entry(idt, n, &desc);
+#endif
+}
+
+/* This runs while still in the direct mapping */
+static void startup_64_load_idt(unsigned long physbase)
+{
+ struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
+ gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
+
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+ void *handler;
+
+ /* VMM Communication Exception */
+ handler = fixup_pointer(vc_no_ghcb, physbase);
+ set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
+ }
+
+ desc->address = (unsigned long)idt;
+ native_load_idt(desc);
+}
+
+/* This is used when running on kernel addresses */
+void early_setup_idt(void)
+{
+ /* VMM Communication Exception */
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+
+ bringup_idt_descr.address = (unsigned long)bringup_idt_table;
+ native_load_idt(&bringup_idt_descr);
+}
+
+/*
+ * Setup boot CPU state needed before kernel switches to virtual addresses.
+ */
+void __head startup_64_setup_env(unsigned long physbase)
+{
+ /* Load GDT */
+ startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase);
+ native_load_gdt(&startup_gdt_descr);
+
+ /* New GDT is live - reload data segment registers */
+ asm volatile("movl %%eax, %%ds\n"
+ "movl %%eax, %%ss\n"
+ "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
+
+ startup_64_load_idt(physbase);
+}