diff options
87 files changed, 4043 insertions, 2393 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 092ee9fbaf2b..053f613fc9a9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; __u32 pad[8]; } u; }; @@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 No flags are specified so far, the corresponding field must be set to zero. @@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter { __u32 adapter_id; }; +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) @@ -3331,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if it is still asserted. Vector is the LAPIC interrupt vector for which the EOI was received. + struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + } u; + }; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; +Indicates that the VCPU exits into userspace to process some tasks +related to Hyper-V emulation. +Valid values for 'type' are: + KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about +Hyper-V SynIC state change. Notification is used to remap SynIC +event/message pages and to enable/disable SynIC messages/events processing +in userspace. + /* Fix the size of the union. */ char padding[256]; }; @@ -3685,3 +3713,16 @@ available, means that that the kernel has an implementation of the H_RANDOM hypercall backed by a hardware random-number generator. If present, the kernel H_RANDOM handler can be enabled for guest use with the KVM_CAP_PPC_ENABLE_HCALL capability. + +8.2 KVM_CAP_HYPERV_SYNIC + +Architectures: x86 +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that that the kernel has an implementation of the +Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is +used to support Windows Hyper-V based guest paravirt drivers(VMBus). + +In order to use SynIC, it has to be activated by setting this +capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this +will disable the use of APIC hardware virtualization even if supported +by the CPU, as it's incompatible with SynIC auto-EOI behavior. diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 2d09d1ed86d0..f083a168eb35 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible Allows userspace to query the actual limit and set a new limit for the maximum guest memory size. The limit will be rounded up to 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by -the number of page table levels. +the number of page table levels. In the case that there is no limit we will set +the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). 2. GROUP: KVM_S390_VM_CPU_MODEL Architectures: s390 diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 3a4d681c3e98..daf9c0f742d2 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -203,10 +203,10 @@ Shadow pages contain the following information: page cannot be destroyed. See role.invalid. parent_ptes: The reverse mapping for the pte/ptes pointing at this page's spt. If - parent_ptes bit 0 is zero, only one spte points at this pages and + parent_ptes bit 0 is zero, only one spte points at this page and parent_ptes points at this single spte, otherwise, there exists multiple sptes pointing at this page and (parent_ptes & ~0x1) points at a data - structure with a list of parent_ptes. + structure with a list of parent sptes. unsync: If true, then the translations in this page may not match the guest's translation. This is equivalent to the state of the tlb when a pte is diff --git a/MAINTAINERS b/MAINTAINERS index 020a3be31b18..e99a7b328c7f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6089,6 +6089,7 @@ M: Marc Zyngier <marc.zyngier@arm.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu W: http://systems.cs.columbia.edu/projects/kvm-arm +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git S: Supported F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index dc641ddf0784..e22089fb44dc 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_ARM_H__ #define __ARM_KVM_ARM_H__ +#include <linux/const.h> #include <linux/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -132,10 +133,9 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) -#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) +#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) +#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) @@ -162,17 +162,17 @@ #define VTTBR_X (5 - KVM_T0SZ) #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT _AC(48, ULL) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp Syndrome Register (HSR) bits */ #define HSR_EC_SHIFT (26) -#define HSR_EC (0x3fU << HSR_EC_SHIFT) -#define HSR_IL (1U << 25) +#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT) +#define HSR_IL (_AC(1, UL) << 25) #define HSR_ISS (HSR_IL - 1) #define HSR_ISV_SHIFT (24) -#define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) #define HSR_FSC (0x3f) @@ -180,9 +180,9 @@ #define HSR_SSE (1 << 21) #define HSR_WNR (1 << 6) #define HSR_CV_SHIFT (24) -#define HSR_CV (1U << HSR_CV_SHIFT) +#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT) #define HSR_COND_SHIFT (20) -#define HSR_COND (0xfU << HSR_COND_SHIFT) +#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) @@ -210,13 +210,13 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) -#define HSR_WFI_IS_WFE (1U << 0) +#define HSR_WFI_IS_WFE (_AC(1, UL) << 0) -#define HSR_HVC_IMM_MASK ((1UL << 16) - 1) +#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1) -#define HSR_DABT_S1PTW (1U << 7) -#define HSR_DABT_CM (1U << 8) -#define HSR_DABT_EA (1U << 9) +#define HSR_DABT_S1PTW (_AC(1, UL) << 7) +#define HSR_DABT_CM (_AC(1, UL) << 8) +#define HSR_DABT_EA (_AC(1, UL) << 9) #define kvm_arm_exception_type \ {0, "RESET" }, \ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6692982c9b57..f9f27792d8ed 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,6 +150,12 @@ struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_wakeup; + u32 hvc_exit_stat; + u64 wfe_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; }; int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 405aa1883307..9203c21b4673 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -279,6 +279,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *merged_hyp_pgd, unsigned long hyp_idmap_start) { } +static inline unsigned int kvm_get_vmid_bits(void) +{ + return 8; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e06fd299de08..dda1959f0dde 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -44,6 +44,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_psci.h> +#include <asm/sections.h> #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); @@ -58,9 +59,12 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); -static u8 kvm_next_vmid; +static u32 kvm_next_vmid; +static unsigned int kvm_vmid_bits __read_mostly; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -132,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); + kvm->arch.max_vcpus = vgic_present ? + kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; return ret; out_free_stage2_pgd: @@ -172,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: + r = vgic_present; + break; case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: @@ -433,11 +440,12 @@ static void update_vttbr(struct kvm *kvm) kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); kvm->arch.vmid = kvm_next_vmid; kvm_next_vmid++; + kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; + vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); @@ -603,6 +611,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; /* * Back from guest *************************************************************/ @@ -913,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -927,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { + if (!vgic_present) + return -ENXIO; return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { @@ -1067,6 +1080,12 @@ static int init_hyp_mode(void) goto out_free_mappings; } + err = create_hyp_mappings(__start_rodata, __end_rodata); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_free_mappings; + } + /* * Map the Hyp stack pages */ @@ -1111,8 +1130,17 @@ static int init_hyp_mode(void) * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); - if (err) + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + break; + default: goto out_free_context; + } /* * Init HYP architected timer support @@ -1127,6 +1155,10 @@ static int init_hyp_mode(void) kvm_perf_init(); + /* set size of VMID supported by CPU */ + kvm_vmid_bits = kvm_get_vmid_bits(); + kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_info("Hyp mode initialized successfully\n"); return 0; diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d6c005283678..dc99159857b4 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) return vbar; } +/* + * Switch to an exception mode, updating both CPSR and SPSR. Follow + * the logic described in AArch32.EnterMode() from the ARMv8 ARM. + */ +static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; + + *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; + + switch (mode) { + case FIQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_F_BIT; + /* Fall through */ + case ABT_MODE: + case IRQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_A_BIT; + /* Fall through */ + default: + *vcpu_cpsr(vcpu) |= PSR_I_BIT; + } + + *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); + + if (sctlr & SCTLR_TE) + *vcpu_cpsr(vcpu) |= PSR_T_BIT; + if (sctlr & SCTLR_EE) + *vcpu_cpsr(vcpu) |= PSR_E_BIT; + + /* Note: These now point to the mode banked copies */ + *vcpu_spsr(vcpu) = cpsr; +} + /** * kvm_inject_undefined - inject an undefi |
