// SPDX-License-Identifier: GPL-2.0
/*
* A memslot-related performance benchmark.
*
* Copyright (C) 2021 Oracle and/or its affiliates.
*
* Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
*/
#include <pthread.h>
#include <sched.h>
#include <semaphore.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
#include <linux/compiler.h>
#include <linux/sizes.h>
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
#define MEM_EXTRA_SIZE SZ_64K
#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE)
#define MEM_GPA SZ_256M
#define MEM_AUX_GPA MEM_GPA
#define MEM_SYNC_GPA MEM_AUX_GPA
#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE)
#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
/*
* 32 MiB is max size that gets well over 100 iterations on 509 slots.
* Considering that each slot needs to have at least one page up to
* 8194 slots in use can then be tested (although with slightly
* limited resolution).
*/
#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE)
#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
/*
* 128 MiB is min size that fills 32k slots with at least one page in each
* while at the same time gets 100+ iterations in such test
*
* 2 MiB chunk size like a typical huge page
*/
#define MEM_TEST_UNMAP_SIZE SZ_128M
#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
/*
* For the move active test the middle of the test area is placed on
* a memslot boundary: half lies in the memslot being moved, half in
* other memslot(s).
*
* We have different number of memory slots, excluding the reserved
* memory slot 0, on various architectures and configurations. The
* memory size in this test is calculated by picking the maximal
* last memory slot's memory size, with alignment to the largest
* supported page size (64KB). In this way, the selected memory
* size for this test is compatible with test_memslot_move_prepare().
*
* architecture slots memory-per-slot memory-on-last-slot
* --------------------------------------------------------------
* x86-4KB 32763 16KB 160KB
* arm64-4KB 32766 16KB 112KB
* arm64-16KB 32766 16KB 112KB
* arm64-64KB 8192 64KB 128KB
*/
#define MEM_TEST_MOVE_SIZE (3 * SZ_64K)
#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
"invalid move test region size");
#define MEM_TEST_VAL_1 0x1122334455667788
#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
struct vm_data {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
pthread_t vcpu_thread;
uint32_t nslots;
uint64_t npages;
uint64_t pages_per_slot;
void **hva_slots;
bool mmio_ok;
uint64_t mmio_gpa_min;
uint64_t mmio_gpa_max;
};
struct sync_area {
uint32_t guest_page_size;
atomic_bool start_flag;
atomic_bool exit_flag;
atomic_bool sync_flag;
void *move_area_ptr;
};
/*
* Technically, we need also for the atomic bool to be address-free, which
* is recommended, but not strictly required, by C11 for lockless
* implementations.
* However, in practice both GCC and Clang fulfill this requirement on
* all KVM-supported platforms.
*/
static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
static sem_t vcpu_ready;
static bool map_unmap_verify;
#ifdef __x86_64__
static bool disable_slot_zap_quirk;
#endif
static bool verbose;
#define pr_info_v(...) \
do { \
if (verbose) \
pr_info(__VA_ARGS__); \
} while (0)
static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
{
TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
TEST_ASSERT(run->mmio.len == 8,
"Unexpected exit mmio size = %u", run->mmio.len);
TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
run->mmio.phys_addr <= data->mmio_gpa_max,
"Unexpected exit mmio address = 0x%llx",
run->mmio.phys_addr);
}
static void *vcpu_worker(void *__data)
{
struct vm_data *data = __data;
struct kvm_vcpu *vcpu = data->vcpu;
struct kvm_run *run = vcpu->run;
struct ucall uc;
while (1) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(uc.args[1] == 0,
"Unexpected sync ucall, got %lx",
(ulong)uc.args[1]);
sem_post(&vcpu_ready);
continue;
case UCALL_NONE:
if (run->exit_reason == KVM_EXIT_MMIO)
check_mmio_access(data, run);
else
goto done;
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
done:
return NULL;
}
static void wait_for_vcpu(void)
{
struct timespec ts;
TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
"clock_gettime() failed: %d", errno);
ts.tv_sec += 2;
TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
"sem_timedwait() failed: %d", errno);
}
static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
{
uint64_t gpage, pgoffs;
uint32_t slot, slotoffs;
void