// SPDX-License-Identifier: GPL-2.0
/*
* Core of Xen paravirt_ops implementation.
*
* This file contains the xen_paravirt_ops structure itself, and the
* implementations for:
* - privileged instructions
* - interrupt flags
* - segment operations
* - booting and setup
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/preempt.h>
#include <linux/hardirq.h>
#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/start_kernel.h>
#include <linux/sched.h>
#include <linux/kprobes.h>
#include <linux/kstrtox.h>
#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
#include <linux/reboot.h>
#include <linux/virtio_anchor.h>
#include <linux/stackprotector.h>
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
#include <xen/interface/memory.h>
#include <xen/interface/nmi.h>
#include <xen/interface/xen-mca.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
#include <xen/acpi.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/cpuid.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/msr-index.h>
#include <asm/traps.h>
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
#include <asm/hypervisor.h>
#include <asm/mach_traps.h>
#include <asm/mtrr.h>
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_IOPL_IOPERM
#include <asm/io_bitmap.h>
#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
#include <asm/acpi.h>
#include <acpi/pdc_intel.h>
#include <acpi/processor.h>
#include <xen/interface/platform.h>
#endif
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
#include "multicalls.h"
#include "pmu.h"
#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
struct tls_descs {
struct desc_struct desc[3];
};
/*
* Updating the 3 TLS descriptors in the GDT on every task switch is
* surprisingly expensive so we avoid updating them if they haven't
* changed. Since Xen writes different descriptors than the one
* passed in the update_descriptor hypercall we keep shadow copies to
* compare against.
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
static int __init parse_xen_msr_safe(char *str)
{
if (str)
return kstrtobool(str, &xen_msr_safe);
return -EINVAL;
}
early_param("xen_msr_safe", parse_xen_msr_safe);
/* Get MTRR settings from Xen and put them into mtrr_state. */
static void __init xen_set_mtrr_data(void)
{
#ifdef CONFIG_MTRR
struct xen_platform_op op = {
.cmd = XENPF_read_memtype,
.interface_version = XENPF_INTERFACE_VERSION,
};
unsigned int reg;
unsigned long mask;
uint32_t eax, width;
static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata;
/* Get physical address width (only 64-bit cpus supported). */
width = 36;
eax = cpuid_eax(0x80000000);
if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
eax = cpuid_eax(0x80000008);
width = eax & 0xff;
}
for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) {
op.u.read_memtype.reg = reg;
if (HYPERVISOR_platform_op(&op))
break;
/*
* Only called in dom0, which has all RAM PFNs mapped at
* RAM MFNs, and all PCI space etc. is identity mapped.
* This means we can treat MFN == PFN regarding MTRR settings.
*/
var[reg].base_lo = op.u.read_memtype.type;
var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT;
var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT);
mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1);
mask &= (1UL << width) - 1;
if (mask)
mask |= MTRR_PHYSMASK_V;
var[reg].mask_lo = mask;
var[reg].mask_hi = mask >> 32;
}
/* Only overwrite MTRR state if any MTRR could be got from Xen. */
if (reg)
mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
#endif
}
static void __init xen_pv_init_platform(void)
{
/* PV guests can't operate virtio devices without grants. */
if (IS_ENABLED(CONFIG_XEN_VIRTIO))
virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
/* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
xen_vcpu_info_reset(0);
/* pvclock is in shared info area */
xen_init_time_ops();
if (xen_initial_domain())
xen_set_mtrr_data();
else
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
static void __init xen_pv_guest_late_init(void)
{
#ifndef CONFIG_SMP
/* Setup shared vcpu info for non-smp configurations */
xen_setup_vcpu_info_placement();
#endif
}
static __read_mostly unsigned int cpuid_leaf5_ecx_val;
static __read_mostly unsigned int cpuid_leaf5_edx_val;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
unsigned maskebx = ~0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
switch (*ax) {
case CPUID_MWAIT_LEAF:
/* Synthesize the values.. */
*ax = 0;
*bx = 0;
*cx = cpuid_leaf5_ecx_val;
*dx = cpuid_leaf5_edx_val;
return;
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
break;
}
asm(XEN_EMULATE_PREFIX "cpuid"
: "=a" (*ax),
"=b" (*bx),
"=c" (*cx),
"=d" (*dx)
: "0" (*ax), "2" (*cx));
*bx &= maskebx;
}
static bool __init xen_check_mwait(void)
{
#ifdef CONFIG_ACPI
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.u.set_pminfo.id = -1,
.u.set_pminfo.type = XEN_PM_PDC,
};
uint32_t buf[3];
unsigned int ax, bx, cx, dx;
unsigned int mwait_mask;
/* We need to determine whether it is OK to expose the MWAIT
* capability to the kernel to harvest deeper than C3 states from ACPI
* _CST using the processor_harvest_xen.c module. For this to work, we
* need to gather the MWAIT_LEAF values (which the cstate.c code
* checks against). The hypervisor won't expose the MWAIT flag because
* it would break
|