summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/mmu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-16 16:15:14 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-16 16:15:14 +0200
commit79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (patch)
tree0507ef82aa3c7766b7b19163a0351882b7d7c5b5 /arch/arm64/kvm/mmu.c
parentcb3f09f9afe5286c0aed7a1c5cc71495de166efb (diff)
parentc862dcd199759d4a45e65dab47b03e3e8a144e3a (diff)
downloadlinux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.tar.gz
linux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.tar.bz2
linux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "RISCV: - Use common KVM implementation of MMU memory caches - SBI v0.2 support for Guest - Initial KVM selftests support - Fix to avoid spurious virtual interrupts after clearing hideleg CSR - Update email address for Anup and Atish ARM: - Simplification of the 'vcpu first run' by integrating it into KVM's 'pid change' flow - Refactoring of the FP and SVE state tracking, also leading to a simpler state and less shared data between EL1 and EL2 in the nVHE case - Tidy up the header file usage for the nvhe hyp object - New HYP unsharing mechanism, finally allowing pages to be unmapped from the Stage-1 EL2 page-tables - Various pKVM cleanups around refcounting and sharing - A couple of vgic fixes for bugs that would trigger once the vcpu xarray rework is merged, but not sooner - Add minimal support for ARMv8.7's PMU extension - Rework kvm_pgtable initialisation ahead of the NV work - New selftest for IRQ injection - Teach selftests about the lack of default IPA space and page sizes - Expand sysreg selftest to deal with Pointer Authentication - The usual bunch of cleanups and doc update s390: - fix sigp sense/start/stop/inconsistency - cleanups x86: - Clean up some function prototypes more - improved gfn_to_pfn_cache with proper invalidation, used by Xen emulation - add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery - completely remove potential TOC/TOU races in nested SVM consistency checks - update some PMCs on emulated instructions - Intel AMX support (joint work between Thomas and Intel) - large MMU cleanups - module parameter to disable PMU virtualization - cleanup register cache - first part of halt handling cleanups - Hyper-V enlightened MSR bitmap support for nested hypervisors Generic: - clean up Makefiles - introduce CONFIG_HAVE_KVM_DIRTY_RING - optimize memslot lookup using a tree - optimize vCPU array usage by converting to xarray" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (268 commits) x86/fpu: Fix inline prefix warnings selftest: kvm: Add amx selftest selftest: kvm: Move struct kvm_x86_state to header selftest: kvm: Reorder vcpu_load_state steps for AMX kvm: x86: Disable interception for IA32_XFD on demand x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state() kvm: selftests: Add support for KVM_CAP_XSAVE2 kvm: x86: Add support for getting/setting expanded xstate buffer x86/fpu: Add uabi_size to guest_fpu kvm: x86: Add CPUID support for Intel AMX kvm: x86: Add XCR0 support for Intel AMX kvm: x86: Disable RDMSR interception of IA32_XFD_ERR kvm: x86: Emulate IA32_XFD_ERR for guest kvm: x86: Intercept #NM for saving IA32_XFD_ERR x86/fpu: Prepare xfd_err in struct fpu_guest kvm: x86: Add emulation for IA32_XFD x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2 x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM x86/fpu: Add guest support to xfd_enable_feature() ...
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r--arch/arm64/kvm/mmu.c177
1 files changed, 151 insertions, 26 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 326cdfec74a1..bc2aba953299 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -239,6 +239,9 @@ void free_hyp_pgds(void)
static bool kvm_host_owns_hyp_mappings(void)
{
+ if (is_kernel_in_hyp_mode())
+ return false;
+
if (static_branch_likely(&kvm_protected_mode_initialized))
return false;
@@ -281,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
-static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+struct hyp_shared_pfn {
+ u64 pfn;
+ int count;
+ struct rb_node node;
+};
+
+static DEFINE_MUTEX(hyp_shared_pfns_lock);
+static struct rb_root hyp_shared_pfns = RB_ROOT;
+
+static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
+ struct rb_node **parent)
{
- phys_addr_t addr;
+ struct hyp_shared_pfn *this;
+
+ *node = &hyp_shared_pfns.rb_node;
+ *parent = NULL;
+ while (**node) {
+ this = container_of(**node, struct hyp_shared_pfn, node);
+ *parent = **node;
+ if (this->pfn < pfn)
+ *node = &((**node)->rb_left);
+ else if (this->pfn > pfn)
+ *node = &((**node)->rb_right);
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static int share_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (this) {
+ this->count++;
+ goto unlock;
+ }
+
+ this = kzalloc(sizeof(*this), GFP_KERNEL);
+ if (!this) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ this->pfn = pfn;
+ this->count = 1;
+ rb_link_node(&this->node, parent, node);
+ rb_insert_color(&this->node, &hyp_shared_pfns);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+static int unshare_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (WARN_ON(!this)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ this->count--;
+ if (this->count)
+ goto unlock;
+
+ rb_erase(&this->node, &hyp_shared_pfns);
+ kfree(this);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+int kvm_share_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
int ret;
- for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
- __phys_to_pfn(addr));
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
+ /*
+ * The share hcall maps things in the 'fixed-offset' region of the hyp
+ * VA space, so we can only share physically contiguous data-structures
+ * for now.
+ */
+ if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
+ return -EINVAL;
+
+ if (kvm_host_owns_hyp_mappings())
+ return create_hyp_mappings(from, to, PAGE_HYP);
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
@@ -296,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
+void kvm_unshare_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
+
+ if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
+ return;
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ WARN_ON(unshare_pfn_hyp(pfn));
+ }
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -316,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
- if (!kvm_host_owns_hyp_mappings()) {
- if (WARN_ON(prot != PAGE_HYP))
- return -EPERM;
- return pkvm_share_hyp(kvm_kaddr_to_phys(from),
- kvm_kaddr_to_phys(to));
- }
+ if (!kvm_host_owns_hyp_mappings())
+ return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -407,6 +525,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
unsigned long addr;
int ret;
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
*kaddr = ioremap(phys_addr, size);
if (!*kaddr)
return -ENOMEM;
@@ -516,7 +637,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
if (!pgt)
return -ENOMEM;
- err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
+ mmu->arch = &kvm->arch;
+ err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -529,7 +651,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
- mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
@@ -595,14 +716,14 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -650,6 +771,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
size += offset_in_page(guest_ipa);
guest_ipa &= PAGE_MASK;
@@ -1463,7 +1587,6 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -1473,25 +1596,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
- kvm_mmu_wp_memory_region(kvm, mem->slot);
+ kvm_mmu_wp_memory_region(kvm, new->id);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
+ hva_t hva, reg_end;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1624,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ reg_end = hva + (new->npages << PAGE_SHIFT);
+
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1661,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
break;
}