diff options
Diffstat (limited to 'arch/x86')
| -rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 40 | ||||
| -rw-r--r-- | arch/x86/include/asm/svm.h | 6 | ||||
| -rw-r--r-- | arch/x86/include/asm/vmx.h | 22 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 34 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.h | 186 | ||||
| -rw-r--r-- | arch/x86/kvm/emulate.c | 42 | ||||
| -rw-r--r-- | arch/x86/kvm/hyperv.c | 8 | ||||
| -rw-r--r-- | arch/x86/kvm/kvm_cache_regs.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.h | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 267 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.h | 23 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu_audit.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/mtrr.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 139 | ||||
| -rw-r--r-- | arch/x86/kvm/trace.h | 11 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 620 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 213 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 54 |
22 files changed, 1068 insertions, 621 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 42bbbf0f173d..2519c6c801c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,7 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -219,8 +219,8 @@ struct x86_emulate_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 369e41c23f07..8844eee290b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,15 +79,14 @@ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL #define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ - | X86_CR4_PKE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -204,7 +203,6 @@ enum { #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_USER_MASK | \ PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) @@ -317,15 +315,17 @@ struct kvm_pio_request { int size; }; +#define PT64_ROOT_MAX_LEVEL 5 + struct rsvd_bits_validate { - u64 rsvd_bits_mask[2][4]; + u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; }; /* - * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level - * 32-bit). The kvm_mmu structure abstracts the details of the current mmu - * mode. + * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, + * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the + * current mmu mode. */ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); @@ -548,8 +548,8 @@ struct kvm_vcpu_arch { struct kvm_queued_exception { bool pending; + bool injected; bool has_error_code; - bool reinject; u8 nr; u32 error_code; u8 nested_apf; @@ -687,8 +687,12 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available (AMD only) */ + /* GPA available */ bool gpa_available; + gpa_t gpa_val; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { @@ -979,7 +983,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); - int (*get_tdp_level)(void); + int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); @@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -static inline u64 get_canonical(u64 la) -{ - return ((int64_t)la << 16) >> 16; -} - -static inline bool is_noncanonical_address(u64 la) -{ -#ifdef CONFIG_X86_64 - return get_canonical(la) != la; -#else - return false; -#endif -} - #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IRQ_SHIFT 8 #define V_IRQ_MASK (1 << V_IRQ_SHIFT) +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) @@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,6 +72,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_RDRAND 0x00000800 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_RDSEED 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 @@ -114,6 +115,10 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +/* VMFUNC functions */ +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_EPTP_ENTRIES 512 + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -187,6 +192,8 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, POSTED_INTR_DESC_ADDR = 0x00002016, POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CONTROL = 0x00002018, + VM_FUNCTION_CONTROL_HIGH = 0x00002019, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -197,6 +204,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, @@ -444,6 +453,7 @@ enum vmcs_field { #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) +#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) #define VMX_EPTP_UC_BIT (1ull << 8) #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) @@ -459,12 +469,14 @@ enum vmcs_field { #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ -#define VMX_EPT_DEFAULT_GAW 3 -#define VMX_EPT_MAX_GAW 0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 -#define VMX_EPT_GAW_EPTP_SHIFT 3 -#define VMX_EPT_AD_ENABLE_BIT (1ull << 6) -#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPTP_PWL_MASK 0x38ull +#define VMX_EPTP_PWL_4 0x18ull +#define VMX_EPTP_PWL_5 0x20ull +#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +#define VMX_EPTP_MT_MASK 0x7ull +#define VMX_EPTP_MT_WB 0x6ull +#define VMX_EPTP_MT_UC 0x0ull #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19adbb418443..0099e10eb045 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); /* - * The existing code assumes virtual address is 48-bit in the canonical - * address checks; exit if it is ever changed. + * The existing code assumes virtual address is 48-bit or 57-bit in the + * canonical address checks; exit if it is ever changed. */ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && ((best->eax & 0xff00) >> 8) != 48 && - ((best->eax & 0xff00) >> 8) != 0) - return -EINVAL; + if (best) { + int vaddr_bits = (best->eax & 0xff00) >> 8; + + if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) + return -EINVAL; + } /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_mmu_reset_context(vcpu); kvm_pmu_refresh(vcpu); return 0; @@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); + F(AVX512VBMI) | F(LA57) | F(PKU) | + 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = @@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); } -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; struct kvm_cpuid_entry2 *best; + bool entry_found = true; best = kvm_find_cpuid_entry(vcpu, function, index); - if (!best) + if (!best) { + entry_found = false; + if (!check_limit) + goto out; + best = check_cpuid_limit(vcpu, function, index); + } +out: if (best) { *eax = best->eax; *ebx = best->ebx; @@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) *edx = best->edx; } else *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); + return entry_found; } EXPORT_SYMBOL_GPL(kvm_cpuid); @@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) eax = kvm_register_read(vcpu, VCPU_REGS_RAX); ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); + kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); kvm_register_write(vcpu, VCPU_REGS_RAX, eax); kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da6728383052..1ea3c0e1e3a9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -3,6 +3,7 @@ #include "x86.h" #include <asm/cpu.h> +#include <asm/processor.h> int kvm_update_cpuid(struct kvm_vcpu *vcpu); bool kvm_mpx_supported(void); @@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); @@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) return vcpu->arch.maxphyaddr; } -static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - if (!static_cpu_has(X86_FEATURE_XSAVE)) - return false; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_XSAVE)); -} - -static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->edx & bit(X86_FEATURE_MTRR)); -} - -static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST)); -} +struct cpuid_reg { + u32 function; + u32 index; + int reg; +}; -static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMEP)); -} +static const struct cpuid_reg reverse_cpuid[] = { + [CPUID_1_EDX] = { 1, 0, CPUID_EDX}, + [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX}, + [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, + [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, + [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, + [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, + [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, + [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, + [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, + [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, + [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, + [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, + [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, +}; -static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) +static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMAP)); -} + unsigned x86_leaf = x86_feature / 32; -static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); + BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); + BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); + return reverse_cpuid[x86_leaf]; } -static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu) +static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ecx & bit(X86_FEATURE_PKU)); -} + struct kvm_cpuid_entry2 *entry; + const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); -static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index); + if (!entry) + return NULL; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_LM)); + switch (cpuid.reg) { + case CPUID_EAX: + return &entry->eax; + case CPUID_EBX: + return &entry->ebx; + case CPUID_ECX: + return &entry->ecx; + case CPUID_EDX: + return &entry->edx; + default: + BUILD_BUG(); + return NULL; + } } -static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) +static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->ecx & bit(X86_FEATURE_OSVW)); -} + if (x86_feature == X86_FEATURE_XSAVE && + !static_cpu_has(X86_FEATURE_XSAVE)) + return false; -static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (!reg) + return false; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_PCID)); + return *reg & bit(x86_feature); } -static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) +static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_X2APIC)); + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (reg) + *reg &= ~bit(x86_feature); } static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) @@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; } -static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_GBPAGES)); -} - -static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_RTM)); -} - -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); -} - -static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_RDTSCP)); -} - -/* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 - */ -#define BIT_NRIPS 3 - -static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); - - /* - * NRIPS is a scattered cpuid feature, so we can't use - * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit - * position 8, not 3). - */ - return best && (best->edx & bit(BIT_NRIPS)); -} -#undef BIT_NRIPS - static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb0055953fbc..16bf6655aa85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -28,6 +28,7 @@ #include "x86.h" #include "tss.h" +#include "mmu.h" /* * Operand types @@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, ulong la; u32 lim; u16 sel; + u8 va_bits; la = seg_base(ctxt, addr.seg) + addr.ea; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: *linear = la; - if (is_noncanonical_address(la)) + va_bits = ctxt_virt_addr_bits(ctxt); + if (get_canonical(la, va_bits) != la) goto bad; - *max_size = min_t(u64, ~0u, (1ull << 48) - la); + *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); if (size > *max_size) goto bad; break; @@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, sizeof(base3), &ctxt->exception); if (ret != X86EMUL_CONTINUE) return ret; - if (is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32))) + if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | + ((u64)base3 << 32), ctxt)) return emulate_gp(ctxt, 0); } load: @@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) eax = 0x80000001; ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return edx & bit(X86_FEATURE_LM); } @@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) u32 eax, ebx, ecx, edx; eax = ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; @@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) eax = 0x00000000; ecx = 0x00000000; - ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); /* * Intel ("GenuineIntel") * remark: Intel CPUs only support "syscall" in 64bit @@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (is_noncanonical_address(rcx) || - is_noncanonical_address(rdx)) + if (emul_is_noncanonical_address(rcx, ctxt) || + emul_is_noncanonical_address(rdx, ctxt)) return emulate_gp(ctxt, 0); break; } @@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) /* * Check MOVBE is set in the guest-visible CPUID leaf. */ - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(ecx & FFL(MOVBE))) return emulate_ud(ctxt); @@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - is_noncanonical_address(desc_ptr.address)) + emul_is_noncanonical_address(desc_ptr.address, ctxt)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); @@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) eax = reg_read(ctxt, VCPU_REGS_RAX); ecx = reg_read(ctxt, VCPU_REGS_RCX); - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); *reg_write(ctxt, VCPU_REGS_RAX) = eax; *reg_write(ctxt, VCPU_REGS_RBX) = ebx; *reg_write(ctxt, VCPU_REGS_RCX) = ecx; @@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) { u32 eax = 1, ebx, ecx = 0, edx; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(edx & FFL(FXSR))) return emulate_ud(ctxt); @@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) u64 rsvd = 0; ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; + if (efer & EFER_LMA) { + u64 maxphyaddr; + u32 eax = 0x80000008; + + if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, + NULL, false)) + maxphyaddr = eax & 0xff; + else + maxphyaddr = 36; + rsvd = rsvd_bits(maxphyaddr, 62); + } if (new_val & rsvd) return emulate_gp(ctxt, 0); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 337b6d2730fa..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), pdata); } + case HV_X64_MSR_TSC_FREQUENCY: + data = (u64)vcpu->arch.virtual_tsc_khz * 1000; + break; + case HV_X64_MSR_APIC_FREQUENCY: + data = APIC_BUS_FREQUENCY; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, true); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index e1e89ee4af75..9add410f195f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -4,7 +4,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 589dcc117086..aaf10b6f5380 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -54,8 +54,6 @@ #define PRIu64 "u" #define PRIo64 "o" -#define APIC_BUS_CYCLE_NS 1 - /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ #define apic_debug(fmt, arg...) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 29caa2c3dff9..215721e1426a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,6 +12,9 @@ #define KVM_APIC_SHORT_MASK 0xc0000 #define KVM_APIC_DEST_MASK 0x800 +#define APIC_BUS_CYCLE_NS 1 +#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 04d750813c9d..eca30c1eb1d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, } struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; - unsigned int idx[PT64_ROOT_LEVEL]; + struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; + unsigned int idx[PT64_ROOT_MAX_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ @@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, iterator->shadow_addr = vcpu->arch.mmu.root_hpa; iterator->level = vcpu->arch.mmu.shadow_root_level; - if (iterator->level == PT64_ROOT_LEVEL && - vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && + if (iterator->level == PT64_ROOT_4LEVEL && + vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu.direct_map) --iterator->level; @@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, sp = list_last_entry(&kvm->arch.active_mmu_pages, struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); - - return true; + return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } /* @@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); -static void make_mmu_pages_available(struct kvm_vcpu *vcpu); +static int make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && - (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && + (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || vcpu->arch.mmu.direct_map)) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; unsigned i; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); + if(make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } + sp = kvm_mmu_get_page(vcpu, 0, 0, + vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); @@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ - if (vcpu->arch.mmu.root_level == PT64_ROO |
