diff options
| author | Sean Christopherson <seanjc@google.com> | 2025-10-13 15:30:09 -0400 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-10-29 13:59:54 +0100 |
| commit | a908eca437789589dd4624da428614c1275064dc (patch) | |
| tree | 1f0b916eacb12005224ed52d8964efda1419afff | |
| parent | a5901a0dfb5964525990106706ae8b98db098226 (diff) | |
| download | linux-a908eca437789589dd4624da428614c1275064dc.tar.gz linux-a908eca437789589dd4624da428614c1275064dc.tar.bz2 linux-a908eca437789589dd4624da428614c1275064dc.zip | |
KVM: x86: Don't (re)check L1 intercepts when completing userspace I/O
[ Upstream commit e750f85391286a4c8100275516973324b621a269 ]
When completing emulation of instruction that generated a userspace exit
for I/O, don't recheck L1 intercepts as KVM has already finished that
phase of instruction execution, i.e. has already committed to allowing L2
to perform I/O. If L1 (or host userspace) modifies the I/O permission
bitmaps during the exit to userspace, KVM will treat the access as being
intercepted despite already having emulated the I/O access.
Pivot on EMULTYPE_NO_DECODE to detect that KVM is completing emulation.
Of the three users of EMULTYPE_NO_DECODE, only complete_emulated_io() (the
intended "recipient") can reach the code in question. gp_interception()'s
use is mutually exclusive with is_guest_mode(), and
complete_emulated_insn_gp() unconditionally pairs EMULTYPE_NO_DECODE with
EMULTYPE_SKIP.
The bad behavior was detected by a syzkaller program that toggles port I/O
interception during the userspace I/O exit, ultimately resulting in a WARN
on vcpu->arch.pio.count being non-zero due to KVM no completing emulation
of the I/O instruction.
WARNING: CPU: 23 PID: 1083 at arch/x86/kvm/x86.c:8039 emulator_pio_in_out+0x154/0x170 [kvm]
Modules linked in: kvm_intel kvm irqbypass
CPU: 23 UID: 1000 PID: 1083 Comm: repro Not tainted 6.16.0-rc5-c1610d2d66b1-next-vm #74 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:emulator_pio_in_out+0x154/0x170 [kvm]
PKRU: 55555554
Call Trace:
<TASK>
kvm_fast_pio+0xd6/0x1d0 [kvm]
vmx_handle_exit+0x149/0x610 [kvm_intel]
kvm_arch_vcpu_ioctl_run+0xda8/0x1ac0 [kvm]
kvm_vcpu_ioctl+0x244/0x8c0 [kvm]
__x64_sys_ioctl+0x8a/0xd0
do_syscall_64+0x5d/0xc60
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
Reported-by: syzbot+cc2032ba16cc2018ca25@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68790db4.a00a0220.3af5df.0020.GAE@google.com
Fixes: 8a76d7f25f8f ("KVM: x86: Add x86 callback for intercept check")
Cc: stable@vger.kernel.org
Cc: Jim Mattson <jmattson@google.com>
Link: https://lore.kernel.org/r/20250715190638.1899116-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ is_guest_mode() was open coded ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/emulate.c | 10 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 9 |
3 files changed, 13 insertions, 8 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 77cf6c11f66b..f8e8799a8757 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -448,7 +448,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_RESTART 1 #define EMULATION_INTERCEPTED 2 void init_decode_cache(struct x86_emulate_ctxt *ctxt); -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1a9fa2903852..90c495b87070 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5605,12 +5605,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.end = 0; } -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) { const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; - unsigned emul_flags; ctxt->mem_read.pos = 0; @@ -5625,7 +5624,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - emul_flags = ctxt->ops->get_hflags(ctxt); if (unlikely(ctxt->d & (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || @@ -5659,7 +5657,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(ctxt, &ctxt->dst); } - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { + if (unlikely(check_intercepts) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5688,7 +5686,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5742,7 +5740,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8eb62dbb3a18..9c53bc25aa36 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6855,7 +6855,14 @@ restart: /* Save the faulting GPA (cr2) in the address field */ ctxt->exception.address = cr2_or_gpa; - r = x86_emulate_insn(ctxt); + /* + * Check L1's instruction intercepts when emulating instructions for + * L2, unless KVM is re-emulating a previously decoded instruction, + * e.g. to complete userspace I/O, in which case KVM has already + * checked the intercepts. + */ + r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) && + !(emulation_type & EMULTYPE_NO_DECODE)); if (r == EMULATION_INTERCEPTED) return 1; |
