From 35025735a79eaa894c43837b94fd33c9d6b122df Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:18 +0000 Subject: KVM: x86/xen: Support direct injection of event channel events This adds a KVM_XEN_HVM_EVTCHN_SEND ioctl which allows direct injection of events given an explicit { vcpu, port, priority } in precisely the same form that those fields are given in the IRQ routing table. Userspace is currently able to inject 2-level events purely by setting the bits in the shared_info and vcpu_info, but FIFO event channels are harder to deal with; we will need the kernel to take sole ownership of delivery when we support those. A patch advertising this feature with a new bit in the KVM_CAP_XEN_HVM ioctl will be added in a subsequent patch. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-9-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 91a6fe4e02c0..49cd2e9e0f6a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1699,6 +1699,9 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_HVM_EVTCHN_SEND _IOW(KVMIO, 0xd0, struct kvm_irq_routing_xen_evtchn) + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -- cgit v1.2.3 From 2fd6df2f2b47d4301b1ee0fe9d627d1c061a5988 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 3 Mar 2022 15:41:19 +0000 Subject: KVM: x86/xen: intercept EVTCHNOP_send from guests Userspace registers a sending @port to either deliver to an @eventfd or directly back to a local event channel port. After binding events the guest or host may wish to bind those events to a particular vcpu. This is usually done for unbound and and interdomain events. Update requests are handled via the KVM_XEN_EVTCHN_UPDATE flag. Unregistered ports are handled by the emulator. Co-developed-by: Ankur Arora Co-developed-By: David Woodhouse Signed-off-by: Joao Martins Signed-off-by: Ankur Arora Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-10-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 49cd2e9e0f6a..623ed2cb228f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1686,6 +1686,32 @@ struct kvm_xen_hvm_attr { struct { __u64 gfn; } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u64 pad[8]; } u; }; @@ -1694,6 +1720,8 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From 942c2490c23f2800ad8143f5eb84a79b859aa743 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:21 +0000 Subject: KVM: x86/xen: Add KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID In order to intercept hypercalls such as VCPUOP_set_singleshot_timer, we need to be aware of the Xen CPU numbering. This looks a lot like the Hyper-V handling of vpidx, for obvious reasons. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-12-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 623ed2cb228f..4b65e9f0a4d9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1747,6 +1747,7 @@ struct kvm_xen_vcpu_attr { __u64 time_blocked; __u64 time_offline; } runstate; + __u32 vcpu_id; } u; }; @@ -1757,6 +1758,8 @@ struct kvm_xen_vcpu_attr { #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 536395260582be7443b0b35b0bbb89ffe3947f62 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 3 Mar 2022 15:41:22 +0000 Subject: KVM: x86/xen: handle PV timers oneshot mode If the guest has offloaded the timer virq, handle the following hypercalls for programming the timer: VCPUOP_set_singleshot_timer VCPUOP_stop_singleshot_timer set_timer_op(timestamp_ns) The event channel corresponding to the timer virq is then used to inject events once timer deadlines are met. For now we back the PV timer with hrtimer. [ dwmw2: Add save/restore, 32-bit compat mode, immediate delivery, don't check timer in kvm_vcpu_has_event() ] Signed-off-by: Joao Martins Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-13-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4b65e9f0a4d9..cb223e425223 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1748,6 +1748,11 @@ struct kvm_xen_vcpu_attr { __u64 time_offline; } runstate; __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; } u; }; @@ -1760,6 +1765,7 @@ struct kvm_xen_vcpu_attr { #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 28d1629f751c4a5f9437fbaa0ee4ed81d1a8e587 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:23 +0000 Subject: KVM: x86/xen: Kernel acceleration for XENVER_version Turns out this is a fast path for PV guests because they use it to trigger the event channel upcall. So letting it bounce all the way up to userspace is not great. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-14-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb223e425223..4dda3896ed71 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1711,7 +1711,7 @@ struct kvm_xen_hvm_attr { __u32 padding[4]; } deliver; } evtchn; - + __u32 xen_version; __u64 pad[8]; } u; }; @@ -1722,6 +1722,7 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From fde0451be8fb3208d4d146b8602d99ee8139e515 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:24 +0000 Subject: KVM: x86/xen: Support per-vCPU event channel upcall via local APIC Windows uses a per-vCPU vector, and it's delivered via the local APIC basically like an MSI (with associated EOI) unlike the traditional guest-wide vector which is just magically asserted by Xen (and in the KVM case by kvm_xen_has_interrupt() / kvm_cpu_get_extint()). Now that the kernel is able to raise event channel events for itself, being able to do so for Windows guests is also going to be useful. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-15-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4dda3896ed71..a9ba690c4f37 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1754,6 +1754,7 @@ struct kvm_xen_vcpu_attr { __u32 priority; __u64 expires_ns; } timer; + __u8 vector; } u; }; @@ -1767,6 +1768,7 @@ struct kvm_xen_vcpu_attr { /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 #define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 661a20fab7d156cf6b9a407c946a1e558a633151 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:25 +0000 Subject: KVM: x86/xen: Advertise and document KVM_XEN_HVM_CONFIG_EVTCHN_SEND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the end of the patch series adding this batch of event channel acceleration features, finally add the feature bit which advertises them and document it all. For SCHEDOP_poll we need to wake a polling vCPU when a given port is triggered, even when it's masked — and we want to implement that in the kernel, for efficiency. So we want the kernel to know that it has sole ownership of event channel delivery. Thus, we allow userspace to make the 'promise' by setting the corresponding feature bit in its KVM_XEN_HVM_CONFIG call. As we implement SCHEDOP_poll bypass later, we will do so only if that promise has been made by userspace. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-16-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a9ba690c4f37..ee5cc9e2a837 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1232,6 +1232,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) struct kvm_xen_hvm_config { __u32 flags; -- cgit v1.2.3 From ffbb61d09fc56c85e28b110494f3788d0ed4d1f8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 25 Feb 2022 14:53:02 +0000 Subject: KVM: x86: Accept KVM_[GS]ET_TSC_KHZ as a VM ioctl. This sets the default TSC frequency for subsequently created vCPUs. Signed-off-by: David Woodhouse Message-Id: <20220225145304.36166-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ee5cc9e2a837..8616af85dc5d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1144,6 +1144,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +#define KVM_CAP_VM_TSC_CONTROL 214 #ifdef KVM_CAP_IRQ_ROUTING @@ -1471,7 +1472,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* Available with KVM_CAP_PPC_GET_PVINFO */ #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) -/* Available with KVM_CAP_TSC_CONTROL */ +/* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with +* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) /* Available with KVM_CAP_PCI_2_3 */ -- cgit v1.2.3 From c24a950ec7d60c4da91dc3f273295c7f438b531e Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 7 Apr 2022 14:02:33 -0700 Subject: KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES If an SEV-ES guest requests termination, exit to userspace with KVM_EXIT_SYSTEM_EVENT and a dedicated SEV_TERM type instead of -EINVAL so that userspace can take appropriate action. See AMD's GHCB spec section '4.1.13 Termination Request' for more details. Suggested-by: Sean Christopherson Suggested-by: Paolo Bonzini Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Gonda Reported-by: kernel test robot Message-Id: <20220407210233.782250-1-pgonda@google.com> [Add documentatino. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8616af85dc5d..dd1d8167e71f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,8 +444,11 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_SEV_TERM 4 +#define KVM_SYSTEM_EVENT_NDATA_VALID (1u << 31) __u32 type; - __u64 flags; + __u32 ndata; + __u64 data[16]; } system_event; /* KVM_EXIT_S390_STSI */ struct { -- cgit v1.2.3 From 7b33a09d036ffd9a04506122840629c7e870cf08 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:40 +0000 Subject: KVM: arm64: Add support for userspace to suspend a vCPU Introduce a new MP state, KVM_MP_STATE_SUSPENDED, which indicates a vCPU is in a suspended state. In the suspended state the vCPU will block until a wakeup event (pending interrupt) is recognized. Add a new system event type, KVM_SYSTEM_EVENT_WAKEUP, to indicate to userspace that KVM has recognized one such wakeup event. It is the responsibility of userspace to then make the vCPU runnable, or leave it suspended until the next wakeup event. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-7-oupton@google.com --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a184d260c7f..7f72fb7b05f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,6 +444,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; union { @@ -646,6 +647,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state; -- cgit v1.2.3 From bfbab44568779e1682bc6f63688bb9c965f0e74a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:41 +0000 Subject: KVM: arm64: Implement PSCI SYSTEM_SUSPEND ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND" describes a PSCI call that allows software to request that a system be placed in the deepest possible low-power state. Effectively, software can use this to suspend itself to RAM. Unfortunately, there really is no good way to implement a system-wide PSCI call in KVM. Any precondition checks done in the kernel will need to be repeated by userspace since there is no good way to protect a critical section that spans an exit to userspace. SYSTEM_RESET and SYSTEM_OFF are equally plagued by this issue, although no users have seemingly cared for the relatively long time these calls have been supported. The solution is to just make the whole implementation userspace's problem. Introduce a new system event, KVM_SYSTEM_EVENT_SUSPEND, that indicates to userspace a calling vCPU has invoked PSCI SYSTEM_SUSPEND. Additionally, add a CAP to get buy-in from userspace for this new exit type. Only advertise the SYSTEM_SUSPEND PSCI call if userspace has opted in. If a vCPU calls SYSTEM_SUSPEND, punt straight to userspace. Provide explicit documentation of userspace's responsibilites for the exit and point to the PSCI specification to describe the actual PSCI call. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-8-oupton@google.com --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f72fb7b05f2..32c56384fd08 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,6 +445,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 +#define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; union { @@ -1154,6 +1155,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DISABLE_QUIRKS2 213 /* #define KVM_CAP_VM_TSC_CONTROL 214 */ #define KVM_CAP_SYSTEM_EVENT_DATA 215 +#define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3