diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-27 17:17:12 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-27 17:17:12 -0700 |
| commit | da46b58ff884146f6153064f18d276806f3c114c (patch) | |
| tree | cdf15e3e00571771fb337dea79d3cf474efeafc4 /drivers | |
| parent | 8ccd54fe45713cd458015b5b08d6098545e70543 (diff) | |
| parent | a494aef23dfc732945cb42e22246a5c31174e4a5 (diff) | |
| download | linux-da46b58ff884146f6153064f18d276806f3c114c.tar.gz linux-da46b58ff884146f6153064f18d276806f3c114c.tar.bz2 linux-da46b58ff884146f6153064f18d276806f3c114c.zip | |
Merge tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu:
- PCI passthrough for Hyper-V confidential VMs (Michael Kelley)
- Hyper-V VTL mode support (Saurabh Sengar)
- Move panic report initialization code earlier (Long Li)
- Various improvements and bug fixes (Dexuan Cui and Michael Kelley)
* tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (22 commits)
PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg
Drivers: hv: move panic report code from vmbus to hv early init code
x86/hyperv: VTL support for Hyper-V
Drivers: hv: Kconfig: Add HYPERV_VTL_MODE
x86/hyperv: Make hv_get_nmi_reason public
x86/hyperv: Add VTL specific structs and hypercalls
x86/init: Make get/set_rtc_noop() public
x86/hyperv: Exclude lazy TLB mode CPUs from enlightened TLB flushes
x86/hyperv: Add callback filter to cpumask_to_vpset()
Drivers: hv: vmbus: Remove the per-CPU post_msg_page
clocksource: hyper-v: make sure Invariant-TSC is used if it is available
PCI: hv: Enable PCI pass-thru devices in Confidential VMs
Drivers: hv: Don't remap addresses that are above shared_gpa_boundary
hv_netvsc: Remove second mapping of send and recv buffers
Drivers: hv: vmbus: Remove second way of mapping ring buffers
Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages
swiotlb: Remove bounce buffer remapping for Hyper-V
Driver: VMBus: Add Devicetree support
dt-bindings: bus: Add Hyper-V VMBus
Drivers: hv: vmbus: Convert acpi_device to more generic platform_device
...
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/clocksource/hyperv_timer.c | 21 | ||||
| -rw-r--r-- | drivers/hv/Kconfig | 30 | ||||
| -rw-r--r-- | drivers/hv/channel_mgmt.c | 2 | ||||
| -rw-r--r-- | drivers/hv/connection.c | 113 | ||||
| -rw-r--r-- | drivers/hv/hv.c | 79 | ||||
| -rw-r--r-- | drivers/hv/hv_common.c | 242 | ||||
| -rw-r--r-- | drivers/hv/hyperv_vmbus.h | 6 | ||||
| -rw-r--r-- | drivers/hv/ring_buffer.c | 62 | ||||
| -rw-r--r-- | drivers/hv/vmbus_drv.c | 312 | ||||
| -rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 2 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc.c | 48 | ||||
| -rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 280 |
12 files changed, 614 insertions, 583 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index c0cef92b12b8..bcd9042a0c9f 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -49,7 +49,7 @@ static bool direct_mode_enabled; static int stimer0_irq = -1; static int stimer0_message_sint; -static DEFINE_PER_CPU(long, stimer0_evt); +static __maybe_unused DEFINE_PER_CPU(long, stimer0_evt); /* * Common code for stimer0 interrupts coming via Direct Mode or @@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(hv_stimer0_isr); * stimer0 interrupt handler for architectures that support * per-cpu interrupts, which also implies Direct Mode. */ -static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) +static irqreturn_t __maybe_unused hv_stimer0_percpu_isr(int irq, void *dev_id) { hv_stimer0_isr(); return IRQ_HANDLED; @@ -196,6 +196,7 @@ void __weak hv_remove_stimer0_handler(void) { }; +#ifdef CONFIG_ACPI /* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ static int hv_setup_stimer0_irq(void) { @@ -230,6 +231,16 @@ static void hv_remove_stimer0_irq(void) stimer0_irq = -1; } } +#else +static int hv_setup_stimer0_irq(void) +{ + return 0; +} + +static void hv_remove_stimer0_irq(void) +{ +} +#endif /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ int hv_stimer_alloc(bool have_percpu_irqs) @@ -506,9 +517,6 @@ static bool __init hv_init_tsc_clocksource(void) { union hv_reference_tsc_msr tsc_msr; - if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) - return false; - /* * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly * handles frequency and offset changes due to live migration, @@ -525,6 +533,9 @@ static bool __init hv_init_tsc_clocksource(void) hyperv_cs_msr.rating = 250; } + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return false; + hv_read_reference_counter = read_hv_clock_tsc; /* diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 0747a8f1fcee..00242107d62e 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -4,15 +4,39 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ - || (ARM64 && !CPU_BIG_ENDIAN)) + depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ + || (ACPI && ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 - select VMAP_PFN + select OF_EARLY_FLATTREE if OF help Select this option to run Linux as a Hyper-V client operating system. +config HYPERV_VTL_MODE + bool "Enable Linux to boot in VTL context" + depends on X86_64 && HYPERV + default n + help + Virtual Secure Mode (VSM) is a set of hypervisor capabilities and + enlightenments offered to host and guest partitions which enables + the creation and management of new security boundaries within + operating system software. + + VSM achieves and maintains isolation through Virtual Trust Levels + (VTLs). Virtual Trust Levels are hierarchical, with higher levels + being more privileged than lower levels. VTL0 is the least privileged + level, and currently only other level supported is VTL2. + + Select this option to build a Linux kernel to run at a VTL other than + the normal VTL0, which currently is only VTL2. This option + initializes the x86 platform for VTL2, and adds the ability to boot + secondary CPUs directly into 64-bit context as required for VTLs other + than 0. A kernel built with this option must run at VTL2, and will + not run as a normal guest. + + If unsure, say N + config HYPERV_TIMER def_bool HYPERV && X86 diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index cc23b90cae02..007f26d5f1a4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = { { .dev_type = HV_PCIE, HV_PCIE_GUID, .perf_device = false, - .allowed_in_isolated = false, + .allowed_in_isolated = true, }, /* Synthetic Frame Buffer */ diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index da51b50787df..5978e9dbc286 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -104,8 +104,14 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; } - msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0]; - msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1]; + /* + * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always + * bitwise OR it + */ + msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | + ms_hyperv.shared_gpa_boundary; + msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | + ms_hyperv.shared_gpa_boundary; msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); @@ -219,72 +225,27 @@ int vmbus_connect(void) * Setup the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ - vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page(); - vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page(); + vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page(); + vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page(); if ((vmbus_connection.monitor_pages[0] == NULL) || (vmbus_connection.monitor_pages[1] == NULL)) { ret = -ENOMEM; goto cleanup; } - vmbus_connection.monitor_pages_original[0] - = vmbus_connection.monitor_pages[0]; - vmbus_connection.monitor_pages_original[1] - = vmbus_connection.monitor_pages[1]; - vmbus_connection.monitor_pages_pa[0] - = virt_to_phys(vmbus_connection.monitor_pages[0]); - vmbus_connection.monitor_pages_pa[1] - = virt_to_phys(vmbus_connection.monitor_pages[1]); - - if (hv_is_isolation_supported()) { - ret = set_memory_decrypted((unsigned long) - vmbus_connection.monitor_pages[0], - 1); - ret |= set_memory_decrypted((unsigned long) - vmbus_connection.monitor_pages[1], - 1); - if (ret) - goto cleanup; - - /* - * Isolation VM with AMD SNP needs to access monitor page via - * address space above shared gpa boundary. - */ - if (hv_isolation_type_snp()) { - vmbus_connection.monitor_pages_pa[0] += - ms_hyperv.shared_gpa_boundary; - vmbus_connection.monitor_pages_pa[1] += - ms_hyperv.shared_gpa_boundary; - - vmbus_connection.monitor_pages[0] - = memremap(vmbus_connection.monitor_pages_pa[0], - HV_HYP_PAGE_SIZE, - MEMREMAP_WB); - if (!vmbus_connection.monitor_pages[0]) { - ret = -ENOMEM; - goto cleanup; - } - - vmbus_connection.monitor_pages[1] - = memremap(vmbus_connection.monitor_pages_pa[1], - HV_HYP_PAGE_SIZE, - MEMREMAP_WB); - if (!vmbus_connection.monitor_pages[1]) { - ret = -ENOMEM; - goto cleanup; - } - } - - /* - * Set memory host visibility hvcall smears memory - * and so zero monitor pages here. - */ - memset(vmbus_connection.monitor_pages[0], 0x00, - HV_HYP_PAGE_SIZE); - memset(vmbus_connection.monitor_pages[1], 0x00, - HV_HYP_PAGE_SIZE); + ret = set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[0], 1); + ret |= set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[1], 1); + if (ret) + goto cleanup; - } + /* + * Set_memory_decrypted() will change the memory contents if + * decryption occurs, so zero monitor pages here. + */ + memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); + memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); msginfo = kzalloc(sizeof(*msginfo) + sizeof(struct vmbus_channel_initiate_contact), @@ -376,31 +337,13 @@ void vmbus_disconnect(void) vmbus_connection.int_page = NULL; } - if (hv_is_isolation_supported()) { - /* - * memunmap() checks input address is ioremap address or not - * inside. It doesn't unmap any thing in the non-SNP CVM and - * so not check CVM type here. - */ - memunmap(vmbus_connection.monitor_pages[0]); - memunmap(vmbus_connection.monitor_pages[1]); - - set_memory_encrypted((unsigned long) - vmbus_connection.monitor_pages_original[0], - 1); - set_memory_encrypted((unsigned long) - vmbus_connection.monitor_pages_original[1], - 1); - } + set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1); + set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1); - hv_free_hyperv_page((unsigned long) - vmbus_connection.monitor_pages_original[0]); - hv_free_hyperv_page((unsigned long) - vmbus_connection.monitor_pages_original[1]); - vmbus_connection.monitor_pages_original[0] = - vmbus_connection.monitor_pages[0] = NULL; - vmbus_connection.monitor_pages_original[1] = - vmbus_connection.monitor_pages[1] = NULL; + hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]); + hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]); + vmbus_connection.monitor_pages[0] = NULL; + vmbus_connection.monitor_pages[1] = NULL; } /* diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 8b0dd8e5244d..de6708dbe0df 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -39,42 +39,6 @@ int hv_init(void) } /* - * Functions for allocating and freeing memory with size and - * alignment HV_HYP_PAGE_SIZE. These functions are needed because - * the guest page size may not be the same as the Hyper-V page - * size. We depend upon kmalloc() aligning power-of-two size - * allocations to the allocation size boundary, so that the - * allocated memory appears to Hyper-V as a page of the size - * it expects. - */ - -void *hv_alloc_hyperv_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); - - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL); - else - return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} - -void *hv_alloc_hyperv_zeroed_page(void) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - else - return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} - -void hv_free_hyperv_page(unsigned long addr) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - free_page(addr); - else - kfree((void *)addr); -} - -/* * hv_post_message - Post a message using the hypervisor message IPC. * * This involves a hypercall. @@ -84,14 +48,15 @@ int hv_post_message(union hv_connection_id connection_id, void *payload, size_t payload_size) { struct hv_input_post_message *aligned_msg; - struct hv_per_cpu_context *hv_cpu; + unsigned long flags; u64 status; if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) return -EMSGSIZE; - hv_cpu = get_cpu_ptr(hv_context.cpu_context); - aligned_msg = hv_cpu->post_msg_page; + local_irq_save(flags); + + aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); aligned_msg->connectionid = connection_id; aligned_msg->reserved = 0; aligned_msg->message_type = message_type; @@ -106,11 +71,7 @@ int hv_post_message(union hv_connection_id connection_id, status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); - /* Preemption must remain disabled until after the hypercall - * so some other thread can't get scheduled onto this cpu and - * corrupt the per-cpu post_msg_page - */ - put_cpu_ptr(hv_cpu); + local_irq_restore(flags); return hv_result(status); } @@ -162,12 +123,6 @@ int hv_synic_alloc(void) goto err; } } - - hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->post_msg_page == NULL) { - pr_err("Unable to allocate post msg page\n"); - goto err; - } } return 0; @@ -190,7 +145,6 @@ void hv_synic_free(void) free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); - free_page((unsigned long)hv_cpu->post_msg_page); } kfree(hv_context.hv_numa_map); @@ -217,11 +171,13 @@ void hv_synic_enable_regs(unsigned int cpu) simp.simp_enabled = 1; if (hv_isolation_type_snp() || hv_root_partition) { + /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & + ~ms_hyperv.shared_gpa_boundary; hv_cpu->synic_message_page - = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, - HV_HYP_PAGE_SIZE, MEMREMAP_WB); + = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_message_page) - pr_err("Fail to map syinc message page.\n"); + pr_err("Fail to map synic message page.\n"); } else { simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) >> HV_HYP_PAGE_SHIFT; @@ -234,12 +190,13 @@ void hv_synic_enable_regs(unsigned int cpu) siefp.siefp_enabled = 1; if (hv_isolation_type_snp() || hv_root_partition) { - hv_cpu->synic_event_page = - memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, - HV_HYP_PAGE_SIZE, MEMREMAP_WB); - + /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & + ~ms_hyperv.shared_gpa_boundary; + hv_cpu->synic_event_page + = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_event_page) - pr_err("Fail to map syinc event page.\n"); + pr_err("Fail to map synic event page.\n"); } else { siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) >> HV_HYP_PAGE_SHIFT; @@ -316,7 +273,7 @@ void hv_synic_disable_regs(unsigned int cpu) */ simp.simp_enabled = 0; if (hv_isolation_type_snp() || hv_root_partition) { - memunmap(hv_cpu->synic_message_page); + iounmap(hv_cpu->synic_message_page); hv_cpu->synic_message_page = NULL; } else { simp.base_simp_gpa = 0; @@ -328,7 +285,7 @@ void hv_synic_disable_regs(unsigned int cpu) siefp.siefp_enabled = 0; if (hv_isolation_type_snp() || hv_root_partition) { - memunmap(hv_cpu->synic_event_page); + iounmap(hv_cpu->synic_event_page); hv_cpu->synic_event_page = NULL; } else { siefp.base_siefp_gpa = 0; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 52a6f89ccdbd..64f9ceca887b 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -17,8 +17,11 @@ #include <linux/export.h> #include <linux/bitfield.h> #include <linux/cpumask.h> +#include <linux/sched/task_stack.h> #include <linux/panic_notifier.h> #include <linux/ptrace.h> +#include <linux/kdebug.h> +#include <linux/kmsg_dump.h> #include <linux/slab.h> #include <linux/dma-map-ops.h> #include <asm/hyperv-tlfs.h> @@ -54,6 +57,10 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); void * __percpu *hyperv_pcpu_output_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); +static void hv_kmsg_dump_unregister(void); + +static struct ctl_table_header *hv_ctl_table_hdr; + /* * Hyper-V specific initialization and shutdown code that is * common across all architectures. Called from architecture @@ -62,6 +69,12 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); void __init hv_common_free(void) { + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; + + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + hv_kmsg_dump_unregister(); + kfree(hv_vp_index); hv_vp_index = NULL; @@ -72,10 +85,203 @@ void __init hv_common_free(void) hyperv_pcpu_input_arg = NULL; } +/* + * Functions for allocating and freeing memory with size and + * alignment HV_HYP_PAGE_SIZE. These functions are needed because + * the guest page size may not be the same as the Hyper-V page + * size. We depend upon kmalloc() aligning power-of-two size + * allocations to the allocation size boundary, so that the + * allocated memory appears to Hyper-V as a page of the size + * it expects. + */ + +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL); + else + return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void *hv_alloc_hyperv_zeroed_page(void) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + else + return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + free_page(addr); + else + kfree((void *)addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + +static void *hv_panic_page; + +/* + * Boolean to control whether to report panic messages over Hyper-V. + * + * It can be set via /proc/sys/kernel/hyperv_record_panic_msg + */ +static int sysctl_record_panic_msg = 1; + +/* + * sysctl option to allow the user to control whether kmsg data should be + * reported to Hyper-V on panic. + */ +static struct ctl_table hv_ctl_table[] = { + { + .procname = "hyperv_record_panic_msg", + .data = &sysctl_record_panic_msg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, + {} +}; + +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args); + +static struct notifier_block hyperv_die_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; + +static struct notifier_block hyperv_panic_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; + +/* + * The following callback works both as die and panic notifier; its + * goal is to provide panic information to the hypervisor unless the + * kmsg dumper is used [see hv_kmsg_dump()], which provides more + * information but isn't always available. + * + * Notice that both the panic/die report notifiers are registered only + * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. + */ +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args) +{ + struct pt_regs *regs; + bool is_die; + + /* Don't notify Hyper-V unless we have a die oops event or panic. */ + if (self == &hyperv_panic_report_block) { + is_die = false; + regs = current_pt_regs(); + } else { /* die event */ + if (val != DIE_OOPS) + return NOTIFY_DONE; + + is_die = true; + regs = ((struct die_args *)args)->regs; + } + + /* + * Hyper-V should be notified only once about a panic/die. If we will + * be calling hv_kmsg_dump() later with kmsg data, don't do the + * notification here. + */ + if (!sysctl_record_panic_msg || !hv_panic_page) + hyperv_report_panic(regs, val, is_die); + + return NOTIFY_DONE; +} + +/* + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg + * buffer and call into Hyper-V to transfer the data. + */ +static void hv_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + struct kmsg_dump_iter iter; + size_t bytes_written; + + /* We are only interested in panics. */ + if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) + return; + + /* + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ + kmsg_dump_rewind(&iter); + kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, + &bytes_written); + if (!bytes_written) + return; + /* + * P3 to contain the physical address of the panic page & P4 to + * contain the size of the panic data in that page. Rest of the + * registers are no-op when the NOTIFY_MSG flag is set. + */ + hv_set_register(HV_REGISTER_CRASH_P0, 0); + hv_set_register(HV_REGISTER_CRASH_P1, 0); + hv_set_register(HV_REGISTER_CRASH_P2, 0); + hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); + hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); + + /* + * Let Hyper-V know there is crash data available along with + * the panic message. + */ + hv_set_register(HV_REGISTER_CRASH_CTL, + (HV_CRASH_CTL_CRASH_NOTIFY | + HV_CRASH_CTL_CRASH_NOTIFY_MSG)); +} + +static struct kmsg_dumper hv_kmsg_dumper = { + .dump = hv_kmsg_dump, +}; + +static void hv_kmsg_dump_unregister(void) +{ + kmsg_dump_unregister(&hv_kmsg_dumper); + unregister_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_report_block); + + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; +} + +static void hv_kmsg_dump_register(void) +{ + int ret; + + hv_panic_page = hv_alloc_hyperv_zeroed_page(); + if (!hv_panic_page) { + pr_err("Hyper-V: panic message page memory allocation failed\n"); + return; + } + + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) { + pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; + } +} + int __init hv_common_init(void) { int i; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Hyper-V expects to get crash register data or kmsg when * crash enlightment is available and system crashes. Set @@ -84,8 +290,33 @@ int __init hv_common_init(void) * kernel. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + u64 hyperv_crash_ctl; + crash_kexec_post_notifiers = true; pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n"); + + /* + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. + */ + hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table); + if (!hv_ctl_table_hdr) + pr_err("Hyper-V: sysctl table register error"); + + /* + * Register for panic kmsg callback only if the right + * capability is supported by the hypervisor. + */ + hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) + hv_kmsg_dump_register(); + + register_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_report_block); } /* @@ -311,14 +542,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s return HV_STATUS_INVALID_PARAMETER; } EXPORT_SYMBOL_GPL(hv_ghcb_hypercall); - -void __weak *hv_map_memory(void *addr, unsigned long size) -{ - return NULL; -} -EXPORT_SYMBOL_GPL(hv_map_memory); - -void __weak hv_unmap_memory(void *addr) -{ -} -EXPORT_SYMBOL_GPL(hv_unmap_memory); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index dc673edf053c..55f2086841ae 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -122,10 +122,6 @@ enum { struct hv_per_cpu_context { void *synic_message_page; void *synic_event_page; - /* - * buffer to post messages to the host. - */ - void *post_msg_page; /* * Starting with win8, we can take channel interrupts on any CPU; @@ -241,8 +237,6 @@ struct vmbus_connection { * is child->parent notification */ struct hv_monitor_page *monitor_pages[2]; - void *monitor_pages_original[2]; - phys_addr_t monitor_pages_pa[2]; struct list_head chn_msg_list; spinlock_t channelmsg_lock; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 2111e97c3b63..3c9b02471760 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -186,8 +186,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, struct page *pages, u32 page_cnt, u32 max_pkt_size) { struct page **pages_wraparound; - unsigned long *pfns_wraparound; - u64 pfn; int i; BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); @@ -196,50 +194,30 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, * First page holds struct hv_ring_buffer, do wraparound mapping for * the rest. */ - if (hv_isolation_type_snp()) { - pfn = page_to_pfn(pages) + - PFN_DOWN(ms_hyperv.shared_gpa_boundary); + pages_wraparound = kcalloc(page_cnt * 2 - 1, + sizeof(struct page *), + GFP_KERNEL); + if (!pages_wraparound) + return -ENOMEM; - pfns_wraparound = kcalloc(page_cnt * 2 - 1, - sizeof(unsigned long), GFP_KERNEL); - if (!pfns_wraparound) - return -ENOMEM; - - pfns_wraparound[0] = pfn; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1; - - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, - pgprot_decrypted(PAGE_KERNEL)); - kfree(pfns_wraparound); - - if (!ring_info->ring_buffer) - return -ENOMEM; - - /* Zero ring buffer after setting memory host visibility. */ - memset(ring_info->ring_buffer, 0x00, PAGE_SIZE * page_cnt); - } else { - pages_wraparound = kcalloc(page_cnt * 2 - 1, - sizeof(struct page *), - GFP_KERNEL); - if (!pages_wraparound) - return -ENOMEM; - - pages_wraparound[0] = pages; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pages_wraparound[i + 1] = - &pages[i % (page_cnt - 1) + 1]; + pages_wraparound[0] = pages; + for (i = 0; i < 2 * (page_cnt - 1); i++) + pages_wraparound[i + 1] = + &pages[i % (page_cnt - 1) + 1]; - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, - PAGE_KERNEL); + ring_info->ring_buffer = (struct hv_ring_buffer *) + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, + pgprot_decrypted(PAGE_KERNEL)); - kfree(pages_wraparound); - if (!ring_info->ring_buffer) - return -ENOMEM; - } + kfree(pages_wraparound); + if (!ring_info->ring_buffer) + return -ENOMEM; + /* + * Ensure the header page is zero'ed since + * encryption status may have changed. + */ + memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE); ring_info->ring_buffer->read_index = ring_info->ring_buffer->write_index = 0; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 734fff86c8dc..1c65a6dfb9fa 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/device.h> +#include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/sysctl.h> #include <linux/slab.h> @@ -19,6 +20,7 @@ #include <linux/completion.h> #include <linux/hyperv.h> #include <linux/kernel_stat.h> +#include <linux/of_address.h> #include <linux/clockchips.h> #include <linux/cpu.h> #include <linux/sched/isolation.h> @@ -28,7 +30,6 @@ #include <linux/panic_notifier.h> #include <linux/ptrace.h> #include <linux/screen_info.h> -#include <linux/kdebug.h> #include <linux/efi.h> #include <linux/random.h> #include <linux/kernel.h> @@ -44,12 +45,10 @@ struct vmbus_dynid { struct hv_vmbus_device_id id; }; -static struct acpi_device *hv_acpi_dev; +static struct device *hv_dev; static int hyperv_cpuhp_online; -static void *hv_panic_page; - static long __percpu *vmbus_evt; /* Values parsed from ACPI DSDT */ @@ -57,18 +56,6 @@ int vmbus_irq; int vmbus_interrupt; /* - * Boolean to control whether to report panic messages over Hyper-V. - * - * It can be set via /proc/sys/kernel/hyperv_record_panic_msg - */ -static int sysctl_record_panic_msg = 1; - -static int hyperv_report_reg(void) -{ - return !sysctl_record_panic_msg || !hv_panic_page; -} - -/* * The panic notifier below is responsible solely for unloading the * vmbus connection, which is necessary in a panic event. * @@ -88,54 +75,6 @@ static struct notifier_block hyperv_panic_vmbus_unload_block = { .priority = INT_MIN + 1, /* almost the latest one to execute */ }; -static int hv_die_panic_notify_crash(struct notifier_block *self, - unsigned long val, void *args); - -static struct notifier_block hyperv_die_report_block = { - .notifier_call = hv_die_panic_notify_crash, -}; -static struct notifier_block hyperv_panic_report_block = { - .notifier_call = hv_die_panic_notify_crash, -}; - -/* - * The following callback works both as die and panic notifier; its - * goal is to provide panic information to the hypervisor unless the - * kmsg dumper is used [see hv_kmsg_dump()], which provides more - * information but isn't always available. - * - * Notice that both the panic/die report notifiers are registered only - * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. - */ -static int hv_die_panic_notify_crash(struct notifier_block *self, - unsigned long val, void *args) -{ - struct pt_regs *regs; - bool is_die; - - /* Don't notify Hyper-V unless we have a die oops event or panic. */ - if (self == &hyperv_panic_report_block) { - is_die = false; - regs = current_pt_regs(); - } else { /* die event */ - if (val != DIE_OOPS) - return NOTIFY_DONE; - - is_die = true; - regs = ((struct die_args *)args)->regs; - } - - /* |
