From dc4e0021b00b5a4ecba56fae509217776592b0aa Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Nov 2019 18:27:16 +0100 Subject: x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area There are three problems with the current layout of the doublefault stack and TSS. First, the TSS is only cacheline-aligned, which is not enough -- if the hardware portion of the TSS (struct x86_hw_tss) crosses a page boundary, horrible things happen [0]. Second, the stack and TSS are global, so simultaneous double faults on different CPUs will cause massive corruption. Third, the whole mechanism won't work if user CR3 is loaded, resulting in a triple fault [1]. Let the doublefault stack and TSS share a page (which prevents the TSS from spanning a page boundary), make it percpu, and move it into cpu_entry_area. Teach the stack dump code about the doublefault stack. [0] Real hardware will read past the end of the page onto the next *physical* page if a task switch happens. Virtual machines may have any number of bugs, and I would consider it reasonable for a VM to summarily kill the guest if it tries to task-switch to a page-spanning TSS. [1] Real hardware triple faults. At least some VMs seem to hang. I'm not sure what's going on. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/doublefault_32.c | 58 +++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/doublefault_32.c') diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 61c707ca8a09..4eecfe4825ed 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -10,10 +10,6 @@ #include #include -#define DOUBLEFAULT_STACKSIZE (1024) -static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) static void doublefault_fn(void) @@ -21,6 +17,8 @@ static void doublefault_fn(void) struct desc_ptr gdt_desc = {0, 0}; unsigned long gdt, tss; + BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE); + native_store_gdt(&gdt_desc); gdt = gdt_desc.address; @@ -48,24 +46,46 @@ static void doublefault_fn(void) cpu_relax(); } -struct x86_hw_tss doublefault_tss __cacheline_aligned = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, +DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = { + .tss = { + /* + * No sp0 or ss0 -- we never run CPL != 0 with this TSS + * active. sp is filled in later. + */ + .ldt = 0, .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, + .ip = (unsigned long) doublefault_fn, + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .es = __USER_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, + .ds = __USER_DS, + .fs = __KERNEL_PERCPU, #ifndef CONFIG_X86_32_LAZY_GS - .gs = __KERNEL_STACK_CANARY, + .gs = __KERNEL_STACK_CANARY, #endif - .__cr3 = __pa_nodebug(swapper_pg_dir), + .__cr3 = __pa_nodebug(swapper_pg_dir), + }, }; + +void doublefault_init_cpu_tss(void) +{ + unsigned int cpu = smp_processor_id(); + struct cpu_entry_area *cea = get_cpu_entry_area(cpu); + + /* + * The linker isn't smart enough to initialize percpu variables that + * point to other places in percpu space. + */ + this_cpu_write(doublefault_stack.tss.sp, + (unsigned long)&cea->doublefault_stack.stack + + sizeof(doublefault_stack.stack)); + + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, + &get_cpu_entry_area(cpu)->doublefault_stack.tss); + +} -- cgit v1.2.3