diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 14:47:04 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 14:47:04 -0700 |
| commit | a5b3d8660b049779880c790549ff3fef02f6922c (patch) | |
| tree | df07a0fd239a926a8713d22325497ac46bebd745 /drivers/hv | |
| parent | dce3ab4c57e662ae019c22e7c2f2aa887617beae (diff) | |
| parent | 628cc040b3a2980df6032766e8ef0688e981ab95 (diff) | |
| download | linux-a5b3d8660b049779880c790549ff3fef02f6922c.tar.gz linux-a5b3d8660b049779880c790549ff3fef02f6922c.tar.bz2 linux-a5b3d8660b049779880c790549ff3fef02f6922c.zip | |
Merge tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu:
- Add support for running as the root partition in Hyper-V (Microsoft
Hypervisor) by exposing /dev/mshv (Nuno and various people)
- Add support for CPU offlining in Hyper-V (Hamza Mahfooz)
- Misc fixes and cleanups (Roman Kisel, Tianyu Lan, Wei Liu, Michael
Kelley, Thorsten Blum)
* tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits)
x86/hyperv: fix an indentation issue in mshyperv.h
x86/hyperv: Add comments about hv_vpset and var size hypercall input args
Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs
hyperv: Add definitions for root partition driver to hv headers
x86: hyperv: Add mshv_handler() irq handler and setup function
Drivers: hv: Introduce per-cpu event ring tail
Drivers: hv: Export some functions for use by root partition module
acpi: numa: Export node_to_pxm()
hyperv: Introduce hv_recommend_using_aeoi()
arm64/hyperv: Add some missing functions to arm64
x86/mshyperv: Add support for extended Hyper-V features
hyperv: Log hypercall status codes as strings
x86/hyperv: Fix check of return value from snp_set_vmsa()
x86/hyperv: Add VTL mode callback for restarting the system
x86/hyperv: Add VTL mode emergency restart callback
hyperv: Remove unused union and structs
hyperv: Add CONFIG_MSHV_ROOT to gate root partition support
hyperv: Change hv_root_partition into a function
hyperv: Convert hypercall statuses to linux error codes
drivers/hv: add CPU offlining support
...
Diffstat (limited to 'drivers/hv')
| -rw-r--r-- | drivers/hv/Kconfig | 17 | ||||
| -rw-r--r-- | drivers/hv/Makefile | 4 | ||||
| -rw-r--r-- | drivers/hv/hv.c | 94 | ||||
| -rw-r--r-- | drivers/hv/hv_common.c | 198 | ||||
| -rw-r--r-- | drivers/hv/hv_proc.c | 195 | ||||
| -rw-r--r-- | drivers/hv/mshv.h | 30 | ||||
| -rw-r--r-- | drivers/hv/mshv_common.c | 161 | ||||
| -rw-r--r-- | drivers/hv/mshv_eventfd.c | 833 | ||||
| -rw-r--r-- | drivers/hv/mshv_eventfd.h | 71 | ||||
| -rw-r--r-- | drivers/hv/mshv_irq.c | 124 | ||||
| -rw-r--r-- | drivers/hv/mshv_portid_table.c | 83 | ||||
| -rw-r--r-- | drivers/hv/mshv_root.h | 311 | ||||
| -rw-r--r-- | drivers/hv/mshv_root_hv_call.c | 849 | ||||
| -rw-r--r-- | drivers/hv/mshv_root_main.c | 2307 | ||||
| -rw-r--r-- | drivers/hv/mshv_synic.c | 665 | ||||
| -rw-r--r-- | drivers/hv/vmbus_drv.c | 54 |
16 files changed, 5927 insertions, 69 deletions
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 862c47b191af..6c1416167bd2 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -55,4 +55,21 @@ config HYPERV_BALLOON help Select this option to enable Hyper-V Balloon driver. +config MSHV_ROOT + tristate "Microsoft Hyper-V root partition support" + depends on HYPERV && (X86_64 || ARM64) + depends on !HYPERV_VTL_MODE + # The hypervisor interface operates on 4k pages. Enforcing it here + # simplifies many assumptions in the root partition code. + # e.g. When withdrawing memory, the hypervisor gives back 4k pages in + # no particular order, making it impossible to reassemble larger pages + depends on PAGE_SIZE_4KB + select EVENTFD + default n + help + Select this option to enable support for booting and running as root + partition on Microsoft Hyper-V. + + If unsure, say N. + endmenu diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index b992c0ed182b..976189c725dc 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_HYPERV) += hv_vmbus.o obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o +obj-$(CONFIG_MSHV_ROOT) += mshv_root.o CFLAGS_hv_trace.o = -I$(src) CFLAGS_hv_balloon.o = -I$(src) @@ -11,6 +12,9 @@ hv_vmbus-y := vmbus_drv.o \ channel_mgmt.o ring_buffer.o hv_trace.o hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o +mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ + mshv_root_hv_call.o mshv_portid_table.o # Code that must be built-in obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o +obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 36d9ba097ff5..308c8f279df8 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -144,7 +144,7 @@ int hv_synic_alloc(void) * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. */ - if (!ms_hyperv.paravisor_present && !hv_root_partition) { + if (!ms_hyperv.paravisor_present && !hv_root_partition()) { hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); if (!hv_cpu->synic_message_page) { @@ -272,7 +272,7 @@ void hv_synic_enable_regs(unsigned int cpu) simp.as_uint64 = hv_get_msr(HV_MSR_SIMP); simp.simp_enabled = 1; - if (ms_hyperv.paravisor_present || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition()) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -291,7 +291,7 @@ void hv_synic_enable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP); siefp.siefp_enabled = 1; - if (ms_hyperv.paravisor_present || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition()) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -313,17 +313,7 @@ void hv_synic_enable_regs(unsigned int cpu) shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; - - /* - * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), - * it doesn't provide a recommendation flag and AEOI must be disabled. - */ -#ifdef HV_DEPRECATING_AEOI_RECOMMENDED - shared_sint.auto_eoi = - !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); -#else - shared_sint.auto_eoi = 0; -#endif + shared_sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); /* Enable the global synic bit */ @@ -367,7 +357,7 @@ void hv_synic_disable_regs(unsigned int cpu) * addresses. */ simp.simp_enabled = 0; - if (ms_hyperv.paravisor_present || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition()) { iounmap(hv_cpu->synic_message_page); hv_cpu->synic_message_page = NULL; } else { @@ -379,7 +369,7 @@ void hv_synic_disable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP); siefp.siefp_enabled = 0; - if (ms_hyperv.paravisor_present || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition()) { iounmap(hv_cpu->synic_event_page); hv_cpu->synic_event_page = NULL; } else { @@ -433,13 +423,47 @@ retry: return pending; } +static int hv_pick_new_cpu(struct vmbus_channel *channel) +{ + int ret = -EBUSY; + int start; + int cpu; + + lockdep_assert_cpus_held(); + lockdep_assert_held(&vmbus_connection.channel_mutex); + + /* + * We can't assume that the relevant interrupts will be sent before + * the cpu is offlined on older versions of hyperv. + */ + if (vmbus_proto_version < VERSION_WIN10_V5_3) + return -EBUSY; + + start = get_random_u32_below(nr_cpu_ids); + + for_each_cpu_wrap(cpu, cpu_online_mask, start) { + if (channel->target_cpu == cpu || + channel->target_cpu == VMBUS_CONNECT_CPU) + continue; + + ret = vmbus_channel_set_cpu(channel, cpu); + if (!ret) + break; + } + + if (ret) + ret = vmbus_channel_set_cpu(channel, VMBUS_CONNECT_CPU); + + return ret; +} + /* * hv_synic_cleanup - Cleanup routine for hv_synic_init(). */ int hv_synic_cleanup(unsigned int cpu) { struct vmbus_channel *channel, *sc; - bool channel_found = false; + int ret = 0; if (vmbus_connection.conn_state != CONNECTED) goto always_cleanup; @@ -456,38 +480,34 @@ int hv_synic_cleanup(unsigned int cpu) /* * Search for channels which are bound to the CPU we're about to - * cleanup. In case we find one and vmbus is still connected, we - * fail; this will effectively prevent CPU offlining. - * - * TODO: Re-bind the channels to different CPUs. + * cleanup. */ mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->target_cpu == cpu) { - channel_found = true; - break; + ret = hv_pick_new_cpu(channel); + if (ret) { + mutex_unlock(&vmbus_connection.channel_mutex); + return ret; + } } list_for_each_entry(sc, &channel->sc_list, sc_list) { if (sc->target_cpu == cpu) { - channel_found = true; - break; + ret = hv_pick_new_cpu(sc); + if (ret) { + mutex_unlock(&vmbus_connection.channel_mutex); + return ret; + } } } - if (channel_found) - break; } mutex_unlock(&vmbus_connection.channel_mutex); - if (channel_found) - return -EBUSY; - /* - * channel_found == false means that any channels that were previously - * assigned to the CPU have been reassigned elsewhere with a call of - * vmbus_send_modifychannel(). Scan the event flags page looking for - * bits that are set and waiting with a timeout for vmbus_chan_sched() - * to process such bits. If bits are still set after this operation - * and VMBus is connected, fail the CPU offlining operation. + * Scan the event flags page looking for bits that are set and waiting + * with a timeout for vmbus_chan_sched() to process such bits. If bits + * are still set after this operation and VMBus is connected, fail the + * CPU offlining operation. */ if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending()) return -EBUSY; @@ -497,5 +517,5 @@ always_cleanup: hv_synic_disable_regs(cpu); - return 0; + return ret; } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index f2e6f55d6ca6..b3b11be11650 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -31,8 +31,14 @@ #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> +u64 hv_current_partition_id = HV_PARTITION_ID_SELF; +EXPORT_SYMBOL_GPL(hv_current_partition_id); + +enum hv_partition_type hv_curr_partition_type; +EXPORT_SYMBOL_GPL(hv_curr_partition_type); + /* - * hv_root_partition, ms_hyperv and hv_nested are defined here with other + * ms_hyperv and hv_nested are defined here with other * Hyper-V specific globals so they are shared across all architectures and are * built only when CONFIG_HYPERV is defined. But on x86, * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not @@ -40,9 +46,6 @@ * here, allowing for an overriding definition in the module containing * ms_hyperv_init_platform(). */ -bool __weak hv_root_partition; -EXPORT_SYMBOL_GPL(hv_root_partition); - bool __weak hv_nested; EXPORT_SYMBOL_GPL(hv_nested); @@ -66,6 +69,16 @@ static void hv_kmsg_dump_unregister(void); static struct ctl_table_header *hv_ctl_table_hdr; /* + * Per-cpu array holding the tail pointer for the SynIC event ring buffer + * for each SINT. + * + * We cannot maintain this in mshv driver because the tail pointer should + * persist even if the mshv driver is unloaded. + */ +u8 * __percpu *hv_synic_eventring_tail; +EXPORT_SYMBOL_GPL(hv_synic_eventring_tail); + +/* * Hyper-V specific initialization and shutdown code that is * common across all architectures. Called from architecture * specific initialization functions. @@ -87,6 +100,9 @@ void __init hv_common_free(void) free_percpu(hyperv_pcpu_input_arg); hyperv_pcpu_input_arg = NULL; + + free_percpu(hv_synic_eventring_tail); + hv_synic_eventring_tail = NULL; } /* @@ -280,7 +296,26 @@ static void hv_kmsg_dump_register(void) static inline bool hv_output_page_exists(void) { - return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); + return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); +} + +void __init hv_get_partition_id(void) +{ + struct hv_output_get_partition_id *output; + unsigned long flags; + u64 status, pt_id; + + local_irq_save(flags); + output = *this_cpu_ptr(hyperv_pcpu_input_arg); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + pt_id = output->partition_id; + local_irq_restore(flags); + + if (hv_result_success(status)) + hv_current_partition_id = pt_id; + else + pr_err("Hyper-V: failed to get partition ID: %#x\n", + hv_result(status)); } int __init hv_common_init(void) @@ -350,6 +385,11 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_output_arg); } + if (hv_root_partition()) { + hv_synic_eventring_tail = alloc_percpu(u8 *); + BUG_ON(!hv_synic_eventring_tail); + } + hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index), GFP_KERNEL); if (!hv_vp_index) { @@ -438,11 +478,12 @@ error: int hv_common_cpu_init(unsigned int cpu) { void **inputarg, **outputarg; + u8 **synic_eventring_tail; u64 msr_vp_index; gfp_t flags; const int pgcount = hv_output_page_exists() ? 2 : 1; void *mem; - int ret; + int ret = 0; /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; @@ -450,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu) inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); /* - * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already - * allocated if this CPU was previously online and then taken offline + * The per-cpu memory is already allocated if this CPU was previously + * online and then taken offline */ if (!*inputarg) { mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); @@ -498,11 +539,21 @@ int hv_common_cpu_init(unsigned int cpu) if (msr_vp_index > hv_max_vp_index) hv_max_vp_index = msr_vp_index; - return 0; + if (hv_root_partition()) { + synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); + *synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, + sizeof(u8), flags); + /* No need to unwind any of the above on failure here */ + if (unlikely(!*synic_eventring_tail)) + ret = -ENOMEM; + } + + return ret; } int hv_common_cpu_die(unsigned int cpu) { + u8 **synic_eventring_tail; /* * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg @@ -515,6 +566,10 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + return 0; } @@ -572,7 +627,7 @@ EXPORT_SYMBOL_GPL(hv_setup_dma_ops); bool hv_is_hibernation_supported(void) { - return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); + return !hv_root_partition() && acpi_sleep_state_supported(ACPI_STATE_S4); } EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); @@ -625,6 +680,11 @@ void __weak hv_remove_vmbus_handler(void) } EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); +void __weak hv_setup_mshv_handler(void (*handler)(void)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_mshv_handler); + void __weak hv_setup_kexec_handler(void (*handler)(void)) { } @@ -661,3 +721,121 @@ u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2) return HV_STATUS_INVALID_PARAMETER; } EXPORT_SYMBOL_GPL(hv_tdx_hypercall); + +void hv_identify_partition_type(void) +{ + /* Assume guest role */ + hv_curr_partition_type = HV_PARTITION_TYPE_GUEST; + /* + * Check partition creation and cpu management privileges + * + * Hyper-V should never specify running as root and as a Confidential + * VM. But to protect against a compromised/malicious Hyper-V trying + * to exploit root behavior to expose Confidential VM memory, ignore + * the root partition setting if also a Confidential VM. + */ + if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) && + (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && + !(ms_hyperv.priv_high & HV_ISOLATION)) { + pr_info("Hyper-V: running as root partition\n"); + if (IS_ENABLED(CONFIG_MSHV_ROOT)) + hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; + else + pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n"); + } +} + +struct hv_status_info { + char *string; + int errno; + u16 code; +}; + +/* + * Note on the errno mappings: + * A failed hypercall is usually only recoverable (or loggable) near + * the call site where the HV_STATUS_* code is known. So the errno + * it gets converted to is not too useful further up the stack. + * Provide a few mappings that could be useful, and revert to -EIO + * as a fallback. + */ +static const struct hv_status_info hv_status_infos[] = { +#define _STATUS_INFO(status, errno) { #status, (errno), (status) } + _STATUS_INFO(HV_STATUS_SUCCESS, 0), + _STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_PARAMETER, -EINVAL), + _STATUS_INFO(HV_STATUS_ACCESS_DENIED, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE, -EIO), + _STATUS_INFO(HV_STATUS_OPERATION_DENIED, -EIO), + _STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO), + _STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO), + _STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM), + _STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL), + _STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_PORT_ID, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID, -EINVAL), + _STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS, -EIO), + _STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_VP_STATE, -EIO), + _STATUS_INFO(HV_STATUS_NO_RESOURCES, -EIO), + _STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EINVAL), + _STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EIO), + _STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EIO), + _STATUS_INFO(HV_STATUS_OPERATION_FAILED, -EIO), + _STATUS_INFO(HV_STATUS_TIME_OUT, -EIO), + _STATUS_INFO(HV_STATUS_CALL_PENDING, -EIO), + _STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED, -EIO), +#undef _STATUS_INFO +}; + +static inline const struct hv_status_info *find_hv_status_info(u64 hv_status) +{ + int i; + u16 code = hv_result(hv_status); + + for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) { + const struct hv_status_info *info = &hv_status_infos[i]; + + if (info->code == code) + return info; + } + + return NULL; +} + +/* Convert a hypercall result into a linux-friendly error code. */ +int hv_result_to_errno(u64 status) +{ + const struct hv_status_info *info; + + /* hv_do_hypercall() may return U64_MAX, hypercalls aren't possible */ + if (unlikely(status == U64_MAX)) + return -EOPNOTSUPP; + + info = find_hv_status_info(status); + if (info) + return info->errno; + + return -EIO; +} +EXPORT_SYMBOL_GPL(hv_result_to_errno); + +const char *hv_result_to_string(u64 status) +{ + const struct hv_status_info *info; + + if (unlikely(status == U64_MAX)) + return "Hypercall page missing!"; + + info = find_hv_status_info(status); + if (info) + return info->string; + + return "Unknown"; +} +EXPORT_SYMBOL_GPL(hv_result_to_string); diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c new file mode 100644 index 000000000000..7d7ecb6f6137 --- /dev/null +++ b/drivers/hv/hv_proc.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/clockchips.h> +#include <linux/slab.h> +#include <linux/cpuhotplug.h> +#include <linux/minmax.h> +#include <asm/mshyperv.h> + +/* + * See struct hv_deposit_memory. The first u64 is partition ID, the rest + * are GPAs. + */ +#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) + +/* Deposits exact number of pages. Must be called with interrupts enabled. */ +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) +{ + struct page **pages, *page; + int *counts; + int num_allocations; + int i, j, page_count; + int order; + u64 status; + int ret; + u64 base_pfn; + struct hv_deposit_memory *input_page; + unsigned long flags; + + if (num_pages > HV_DEPOSIT_MAX) + return -E2BIG; + if (!num_pages) + return 0; + + /* One buffer for page pointers and counts */ + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + pages = page_address(page); + + counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); + if (!counts) { + free_page((unsigned long)pages); + return -ENOMEM; + } + + /* Allocate all the pages before disabling interrupts */ + i = 0; + + while (num_pages) { + /* Find highest order we can actually allocate */ + order = 31 - __builtin_clz(num_pages); + + while (1) { + pages[i] = alloc_pages_node(node, GFP_KERNEL, order); + if (pages[i]) + break; + if (!order) { + ret = -ENOMEM; + num_allocations = i; + goto err_free_allocations; + } + --order; + } + + split_page(pages[i], order); + counts[i] = 1 << order; + num_pages -= counts[i]; + i++; + } + num_allocations = i; + + local_irq_save(flags); + + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input_page->partition_id = partition_id; + + /* Populate gpa_page_list - these will fit on the input page */ + for (i = 0, page_count = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j, ++page_count) + input_page->gpa_page_list[page_count] = base_pfn + j; + } + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, + page_count, 0, input_page, NULL); + local_irq_restore(flags); + if (!hv_result_success(status)) { + hv_status_err(status, "\n"); + ret = hv_result_to_errno(status); + goto err_free_allocations; + } + + ret = 0; + goto free_buf; + +err_free_allocations: + for (i = 0; i < num_allocations; ++i) { + base_pfn = page_to_pfn(pages[i]); + for (j = 0; j < counts[i]; ++j) + __free_page(pfn_to_page(base_pfn + j)); + } + +free_buf: + free_page((unsigned long)pages); + kfree(counts); + return ret; +} +EXPORT_SYMBOL_GPL(hv_call_deposit_pages); + +int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) +{ + struct hv_input_add_logical_processor *input; + struct hv_output_add_logical_processor *output; + u64 status; + unsigned long flags; + int ret = 0; + + /* + * When adding a logical processor, the hypervisor may return + * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more + * pages and retry. + */ + do { + local_irq_save(flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* We don't do anything with the output right now */ + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + input->lp_index = lp_index; + input->apic_id = apic_id; + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); + status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, + input, output); + local_irq_restore(flags); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { + hv_status_err(status, "cpu %u apic ID: %u\n", + lp_index, apic_id); + ret = hv_result_to_errno(status); + } + break; + } + ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); + } while (!ret); + + return ret; +} + +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) +{ + struct hv_create_vp *input; + u64 status; + unsigned long irq_flags; + int ret = 0; + + /* Root VPs don't seem to need pages deposited */ + if (partition_id != hv_current_partition_id) { + /* The value 90 is empirically determined. It may change. */ + ret = hv_call_deposit_pages(node, partition_id, 90); + if (ret) + return ret; + } + + do { + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input->partition_id = partition_id; + input->vp_index = vp_index; + input->flags = flags; + input->subnode_type = HV_SUBNODE_ANY; + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); + local_irq_restore(irq_flags); + + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_success(status)) { + hv_status_err(status, "vcpu: %u, lp: %u\n", + vp_index, flags); + ret = hv_result_to_errno(status); + } + break; + } + ret = hv_call_deposit_pages(node, partition_id, 1); + + } while (!ret); + + return ret; +} +EXPORT_SYMBOL_GPL(hv_call_create_vp); diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h new file mode 100644 index 000000000000..0340a67acd0a --- /dev/null +++ b/drivers/hv/mshv.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Microsoft Corporation. + */ + +#ifndef _MSHV_H_ +#define _MSHV_H_ + +#include <linux/stddef.h> +#include <linux/string.h> +#include <hyperv/hvhdk.h> + +#define mshv_field_nonzero(STRUCT, MEMBER) \ + memchr_inv(&((STRUCT).MEMBER), \ + 0, sizeof_field(typeof(STRUCT), MEMBER)) + +int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count, + union hv_input_vtl input_vtl, + struct hv_register_assoc *registers); + +int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count, + union hv_input_vtl input_vtl, + struct hv_register_assoc *registers); + +int hv_call_get_partition_property(u64 partition_id, u64 property_code, + u64 *property_value); + +int mshv_do_pre_guest_mode_work(ulong th_flags); + +#endif /* _MSHV_H */ diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c new file mode 100644 index 000000000000..2575e6d7a71f --- /dev/null +++ b/drivers/hv/mshv_common.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. + * + * This file contains functions that will be called from one or more modules. + * If any of these modules are configured to build, this file is built and just + * statically linked in. + * + * Authors: Microsoft Linux virtualization team + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/mshyperv.h> +#include <linux/resume_user_mode.h> + +#include "mshv.h" + +#define HV_GET_REGISTER_BATCH_SIZE \ + (HV_HYP_PAGE_SIZE / sizeof(union hv_register_value)) +#define HV_SET_REGISTER_BATCH_SIZE \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \ + / sizeof(struct hv_register_assoc)) + +int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count, + union hv_input_vtl input_vtl, + struct hv_register_assoc *registers) +{ + struct hv_input_get_vp_registers *input_page; + union hv_register_value *output_page; + u16 completed = 0; + unsigned long remaining = count; + int rep_count, i; + u64 status = HV_STATUS_SUCCESS; + unsigned long flags; + + local_irq_save(flags); + + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); + + input_page->partition_id = partition_id; + input_page->vp_index = vp_index; + input_page->input_vtl.as_uint8 = input_vtl.as_uint8; + input_page->rsvd_z8 = 0; + input_page->rsvd_z16 = 0; + + while (remaining) { + rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE); + for (i = 0; i < rep_count; ++i) + input_page->names[i] = registers[i].name; + + status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count, + 0, input_page, output_page); + if (!hv_result_success(status)) + break; + + completed = hv_repcomp(status); + for (i = 0; i < completed; ++i) + registers[i].value = output_page[i]; + + registers += completed; + remaining -= completed; + } + local_irq_restore(flags); + + return hv_result_to_errno(status); +} +EXPORT_SYMBOL_GPL(hv_call_get_vp_registers); + +int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count, + union hv_input_vtl input_vtl, + struct hv_register_assoc *registers) +{ + struct hv_input_set_vp_registers *input_page; + u16 completed = 0; + unsigned long remaining = count; + int rep_count; + u64 status = HV_STATUS_SUCCESS; + unsigned long flags; + + local_irq_save(flags); + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); + + input_page->partition_id = partition_id; + input_page->vp_index = vp_index; + input_page->input_vtl.as_uint8 = input_vtl.as_uint8; + input_page->rsvd_z8 = 0; + input_page->rsvd_z16 = 0; + + while (remaining) { + rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE); + memcpy(input_page->elements, registers, + sizeof(struct hv_register_assoc) * rep_count); + + status = hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count, + 0, input_page, NULL); + if (!hv_result_success(status)) + break; + + completed = hv_repcomp(status); + registers += completed; + remaining -= completed; + } + + local_irq_restore(flags); + + return hv_result_to_errno(status); +} +EXPORT_SYMBOL_GPL(hv_call_set_vp_registers); + +int hv_call_get_partition_property(u64 partition_id, + u64 property_code, + u64 *property_value) +{ + u64 status; + unsigned long flags; + struct hv_input_get_partition_property *input; + struct hv_output_get_partition_property *output; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = partition_id; + input->property_code = property_code; + status = hv_do_hypercall(HVCALL_GET_PARTITION_PROPERTY, input, output); + + if (!hv_result_success(status)) { + local_irq_restore(flags); + return hv_result_to_errno(status); + } + *property_value = output->property_value; + + local_irq_restore(flags); + + return 0; |
