summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:17:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:17:12 -0700
commitda46b58ff884146f6153064f18d276806f3c114c (patch)
treecdf15e3e00571771fb337dea79d3cf474efeafc4 /drivers
parent8ccd54fe45713cd458015b5b08d6098545e70543 (diff)
parenta494aef23dfc732945cb42e22246a5c31174e4a5 (diff)
downloadlinux-da46b58ff884146f6153064f18d276806f3c114c.tar.gz
linux-da46b58ff884146f6153064f18d276806f3c114c.tar.bz2
linux-da46b58ff884146f6153064f18d276806f3c114c.zip
Merge tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu: - PCI passthrough for Hyper-V confidential VMs (Michael Kelley) - Hyper-V VTL mode support (Saurabh Sengar) - Move panic report initialization code earlier (Long Li) - Various improvements and bug fixes (Dexuan Cui and Michael Kelley) * tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (22 commits) PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg Drivers: hv: move panic report code from vmbus to hv early init code x86/hyperv: VTL support for Hyper-V Drivers: hv: Kconfig: Add HYPERV_VTL_MODE x86/hyperv: Make hv_get_nmi_reason public x86/hyperv: Add VTL specific structs and hypercalls x86/init: Make get/set_rtc_noop() public x86/hyperv: Exclude lazy TLB mode CPUs from enlightened TLB flushes x86/hyperv: Add callback filter to cpumask_to_vpset() Drivers: hv: vmbus: Remove the per-CPU post_msg_page clocksource: hyper-v: make sure Invariant-TSC is used if it is available PCI: hv: Enable PCI pass-thru devices in Confidential VMs Drivers: hv: Don't remap addresses that are above shared_gpa_boundary hv_netvsc: Remove second mapping of send and recv buffers Drivers: hv: vmbus: Remove second way of mapping ring buffers Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages swiotlb: Remove bounce buffer remapping for Hyper-V Driver: VMBus: Add Devicetree support dt-bindings: bus: Add Hyper-V VMBus Drivers: hv: vmbus: Convert acpi_device to more generic platform_device ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/clocksource/hyperv_timer.c21
-rw-r--r--drivers/hv/Kconfig30
-rw-r--r--drivers/hv/channel_mgmt.c2
-rw-r--r--drivers/hv/connection.c113
-rw-r--r--drivers/hv/hv.c79
-rw-r--r--drivers/hv/hv_common.c242
-rw-r--r--drivers/hv/hyperv_vmbus.h6
-rw-r--r--drivers/hv/ring_buffer.c62
-rw-r--r--drivers/hv/vmbus_drv.c312
-rw-r--r--drivers/net/hyperv/hyperv_net.h2
-rw-r--r--drivers/net/hyperv/netvsc.c48
-rw-r--r--drivers/pci/controller/pci-hyperv.c280
12 files changed, 614 insertions, 583 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index c0cef92b12b8..bcd9042a0c9f 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -49,7 +49,7 @@ static bool direct_mode_enabled;
static int stimer0_irq = -1;
static int stimer0_message_sint;
-static DEFINE_PER_CPU(long, stimer0_evt);
+static __maybe_unused DEFINE_PER_CPU(long, stimer0_evt);
/*
* Common code for stimer0 interrupts coming via Direct Mode or
@@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(hv_stimer0_isr);
* stimer0 interrupt handler for architectures that support
* per-cpu interrupts, which also implies Direct Mode.
*/
-static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id)
+static irqreturn_t __maybe_unused hv_stimer0_percpu_isr(int irq, void *dev_id)
{
hv_stimer0_isr();
return IRQ_HANDLED;
@@ -196,6 +196,7 @@ void __weak hv_remove_stimer0_handler(void)
{
};
+#ifdef CONFIG_ACPI
/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */
static int hv_setup_stimer0_irq(void)
{
@@ -230,6 +231,16 @@ static void hv_remove_stimer0_irq(void)
stimer0_irq = -1;
}
}
+#else
+static int hv_setup_stimer0_irq(void)
+{
+ return 0;
+}
+
+static void hv_remove_stimer0_irq(void)
+{
+}
+#endif
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
int hv_stimer_alloc(bool have_percpu_irqs)
@@ -506,9 +517,6 @@ static bool __init hv_init_tsc_clocksource(void)
{
union hv_reference_tsc_msr tsc_msr;
- if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
- return false;
-
/*
* If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
* handles frequency and offset changes due to live migration,
@@ -525,6 +533,9 @@ static bool __init hv_init_tsc_clocksource(void)
hyperv_cs_msr.rating = 250;
}
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return false;
+
hv_read_reference_counter = read_hv_clock_tsc;
/*
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 0747a8f1fcee..00242107d62e 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -4,15 +4,39 @@ menu "Microsoft Hyper-V guest support"
config HYPERV
tristate "Microsoft Hyper-V client drivers"
- depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
- || (ARM64 && !CPU_BIG_ENDIAN))
+ depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
+ || (ACPI && ARM64 && !CPU_BIG_ENDIAN)
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
- select VMAP_PFN
+ select OF_EARLY_FLATTREE if OF
help
Select this option to run Linux as a Hyper-V client operating
system.
+config HYPERV_VTL_MODE
+ bool "Enable Linux to boot in VTL context"
+ depends on X86_64 && HYPERV
+ default n
+ help
+ Virtual Secure Mode (VSM) is a set of hypervisor capabilities and
+ enlightenments offered to host and guest partitions which enables
+ the creation and management of new security boundaries within
+ operating system software.
+
+ VSM achieves and maintains isolation through Virtual Trust Levels
+ (VTLs). Virtual Trust Levels are hierarchical, with higher levels
+ being more privileged than lower levels. VTL0 is the least privileged
+ level, and currently only other level supported is VTL2.
+
+ Select this option to build a Linux kernel to run at a VTL other than
+ the normal VTL0, which currently is only VTL2. This option
+ initializes the x86 platform for VTL2, and adds the ability to boot
+ secondary CPUs directly into 64-bit context as required for VTLs other
+ than 0. A kernel built with this option must run at VTL2, and will
+ not run as a normal guest.
+
+ If unsure, say N
+
config HYPERV_TIMER
def_bool HYPERV && X86
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index cc23b90cae02..007f26d5f1a4 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = {
{ .dev_type = HV_PCIE,
HV_PCIE_GUID,
.perf_device = false,
- .allowed_in_isolated = false,
+ .allowed_in_isolated = true,
},
/* Synthetic Frame Buffer */
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index da51b50787df..5978e9dbc286 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -104,8 +104,14 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
}
- msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0];
- msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1];
+ /*
+ * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always
+ * bitwise OR it
+ */
+ msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) |
+ ms_hyperv.shared_gpa_boundary;
+ msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) |
+ ms_hyperv.shared_gpa_boundary;
msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
@@ -219,72 +225,27 @@ int vmbus_connect(void)
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
- vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
+ vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page();
+ vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
goto cleanup;
}
- vmbus_connection.monitor_pages_original[0]
- = vmbus_connection.monitor_pages[0];
- vmbus_connection.monitor_pages_original[1]
- = vmbus_connection.monitor_pages[1];
- vmbus_connection.monitor_pages_pa[0]
- = virt_to_phys(vmbus_connection.monitor_pages[0]);
- vmbus_connection.monitor_pages_pa[1]
- = virt_to_phys(vmbus_connection.monitor_pages[1]);
-
- if (hv_is_isolation_supported()) {
- ret = set_memory_decrypted((unsigned long)
- vmbus_connection.monitor_pages[0],
- 1);
- ret |= set_memory_decrypted((unsigned long)
- vmbus_connection.monitor_pages[1],
- 1);
- if (ret)
- goto cleanup;
-
- /*
- * Isolation VM with AMD SNP needs to access monitor page via
- * address space above shared gpa boundary.
- */
- if (hv_isolation_type_snp()) {
- vmbus_connection.monitor_pages_pa[0] +=
- ms_hyperv.shared_gpa_boundary;
- vmbus_connection.monitor_pages_pa[1] +=
- ms_hyperv.shared_gpa_boundary;
-
- vmbus_connection.monitor_pages[0]
- = memremap(vmbus_connection.monitor_pages_pa[0],
- HV_HYP_PAGE_SIZE,
- MEMREMAP_WB);
- if (!vmbus_connection.monitor_pages[0]) {
- ret = -ENOMEM;
- goto cleanup;
- }
-
- vmbus_connection.monitor_pages[1]
- = memremap(vmbus_connection.monitor_pages_pa[1],
- HV_HYP_PAGE_SIZE,
- MEMREMAP_WB);
- if (!vmbus_connection.monitor_pages[1]) {
- ret = -ENOMEM;
- goto cleanup;
- }
- }
-
- /*
- * Set memory host visibility hvcall smears memory
- * and so zero monitor pages here.
- */
- memset(vmbus_connection.monitor_pages[0], 0x00,
- HV_HYP_PAGE_SIZE);
- memset(vmbus_connection.monitor_pages[1], 0x00,
- HV_HYP_PAGE_SIZE);
+ ret = set_memory_decrypted((unsigned long)
+ vmbus_connection.monitor_pages[0], 1);
+ ret |= set_memory_decrypted((unsigned long)
+ vmbus_connection.monitor_pages[1], 1);
+ if (ret)
+ goto cleanup;
- }
+ /*
+ * Set_memory_decrypted() will change the memory contents if
+ * decryption occurs, so zero monitor pages here.
+ */
+ memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE);
+ memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE);
msginfo = kzalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_initiate_contact),
@@ -376,31 +337,13 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
- if (hv_is_isolation_supported()) {
- /*
- * memunmap() checks input address is ioremap address or not
- * inside. It doesn't unmap any thing in the non-SNP CVM and
- * so not check CVM type here.
- */
- memunmap(vmbus_connection.monitor_pages[0]);
- memunmap(vmbus_connection.monitor_pages[1]);
-
- set_memory_encrypted((unsigned long)
- vmbus_connection.monitor_pages_original[0],
- 1);
- set_memory_encrypted((unsigned long)
- vmbus_connection.monitor_pages_original[1],
- 1);
- }
+ set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
+ set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
- hv_free_hyperv_page((unsigned long)
- vmbus_connection.monitor_pages_original[0]);
- hv_free_hyperv_page((unsigned long)
- vmbus_connection.monitor_pages_original[1]);
- vmbus_connection.monitor_pages_original[0] =
- vmbus_connection.monitor_pages[0] = NULL;
- vmbus_connection.monitor_pages_original[1] =
- vmbus_connection.monitor_pages[1] = NULL;
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[0] = NULL;
+ vmbus_connection.monitor_pages[1] = NULL;
}
/*
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 8b0dd8e5244d..de6708dbe0df 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -39,42 +39,6 @@ int hv_init(void)
}
/*
- * Functions for allocating and freeing memory with size and
- * alignment HV_HYP_PAGE_SIZE. These functions are needed because
- * the guest page size may not be the same as the Hyper-V page
- * size. We depend upon kmalloc() aligning power-of-two size
- * allocations to the allocation size boundary, so that the
- * allocated memory appears to Hyper-V as a page of the size
- * it expects.
- */
-
-void *hv_alloc_hyperv_page(void)
-{
- BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
-
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL);
- else
- return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
-}
-
-void *hv_alloc_hyperv_zeroed_page(void)
-{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- else
- return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
-}
-
-void hv_free_hyperv_page(unsigned long addr)
-{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- free_page(addr);
- else
- kfree((void *)addr);
-}
-
-/*
* hv_post_message - Post a message using the hypervisor message IPC.
*
* This involves a hypercall.
@@ -84,14 +48,15 @@ int hv_post_message(union hv_connection_id connection_id,
void *payload, size_t payload_size)
{
struct hv_input_post_message *aligned_msg;
- struct hv_per_cpu_context *hv_cpu;
+ unsigned long flags;
u64 status;
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
return -EMSGSIZE;
- hv_cpu = get_cpu_ptr(hv_context.cpu_context);
- aligned_msg = hv_cpu->post_msg_page;
+ local_irq_save(flags);
+
+ aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
aligned_msg->connectionid = connection_id;
aligned_msg->reserved = 0;
aligned_msg->message_type = message_type;
@@ -106,11 +71,7 @@ int hv_post_message(union hv_connection_id connection_id,
status = hv_do_hypercall(HVCALL_POST_MESSAGE,
aligned_msg, NULL);
- /* Preemption must remain disabled until after the hypercall
- * so some other thread can't get scheduled onto this cpu and
- * corrupt the per-cpu post_msg_page
- */
- put_cpu_ptr(hv_cpu);
+ local_irq_restore(flags);
return hv_result(status);
}
@@ -162,12 +123,6 @@ int hv_synic_alloc(void)
goto err;
}
}
-
- hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->post_msg_page == NULL) {
- pr_err("Unable to allocate post msg page\n");
- goto err;
- }
}
return 0;
@@ -190,7 +145,6 @@ void hv_synic_free(void)
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
- free_page((unsigned long)hv_cpu->post_msg_page);
}
kfree(hv_context.hv_numa_map);
@@ -217,11 +171,13 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.simp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
+ /* Mask out vTOM bit. ioremap_cache() maps decrypted */
+ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
+ ~ms_hyperv.shared_gpa_boundary;
hv_cpu->synic_message_page
- = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
- HV_HYP_PAGE_SIZE, MEMREMAP_WB);
+ = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_message_page)
- pr_err("Fail to map syinc message page.\n");
+ pr_err("Fail to map synic message page.\n");
} else {
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> HV_HYP_PAGE_SHIFT;
@@ -234,12 +190,13 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.siefp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
- hv_cpu->synic_event_page =
- memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
- HV_HYP_PAGE_SIZE, MEMREMAP_WB);
-
+ /* Mask out vTOM bit. ioremap_cache() maps decrypted */
+ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
+ ~ms_hyperv.shared_gpa_boundary;
+ hv_cpu->synic_event_page
+ = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_event_page)
- pr_err("Fail to map syinc event page.\n");
+ pr_err("Fail to map synic event page.\n");
} else {
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> HV_HYP_PAGE_SHIFT;
@@ -316,7 +273,7 @@ void hv_synic_disable_regs(unsigned int cpu)
*/
simp.simp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
- memunmap(hv_cpu->synic_message_page);
+ iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
simp.base_simp_gpa = 0;
@@ -328,7 +285,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
- memunmap(hv_cpu->synic_event_page);
+ iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
siefp.base_siefp_gpa = 0;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 52a6f89ccdbd..64f9ceca887b 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -17,8 +17,11 @@
#include <linux/export.h>
#include <linux/bitfield.h>
#include <linux/cpumask.h>
+#include <linux/sched/task_stack.h>
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
+#include <linux/kdebug.h>
+#include <linux/kmsg_dump.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <asm/hyperv-tlfs.h>
@@ -54,6 +57,10 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
void * __percpu *hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
+static void hv_kmsg_dump_unregister(void);
+
+static struct ctl_table_header *hv_ctl_table_hdr;
+
/*
* Hyper-V specific initialization and shutdown code that is
* common across all architectures. Called from architecture
@@ -62,6 +69,12 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
void __init hv_common_free(void)
{
+ unregister_sysctl_table(hv_ctl_table_hdr);
+ hv_ctl_table_hdr = NULL;
+
+ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
+ hv_kmsg_dump_unregister();
+
kfree(hv_vp_index);
hv_vp_index = NULL;
@@ -72,10 +85,203 @@ void __init hv_common_free(void)
hyperv_pcpu_input_arg = NULL;
}
+/*
+ * Functions for allocating and freeing memory with size and
+ * alignment HV_HYP_PAGE_SIZE. These functions are needed because
+ * the guest page size may not be the same as the Hyper-V page
+ * size. We depend upon kmalloc() aligning power-of-two size
+ * allocations to the allocation size boundary, so that the
+ * allocated memory appears to Hyper-V as a page of the size
+ * it expects.
+ */
+
+void *hv_alloc_hyperv_page(void)
+{
+ BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
+
+ if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
+ return (void *)__get_free_page(GFP_KERNEL);
+ else
+ return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
+
+void *hv_alloc_hyperv_zeroed_page(void)
+{
+ if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ else
+ return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
+
+void hv_free_hyperv_page(unsigned long addr)
+{
+ if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
+ free_page(addr);
+ else
+ kfree((void *)addr);
+}
+EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
+
+static void *hv_panic_page;
+
+/*
+ * Boolean to control whether to report panic messages over Hyper-V.
+ *
+ * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
+ */
+static int sysctl_record_panic_msg = 1;
+
+/*
+ * sysctl option to allow the user to control whether kmsg data should be
+ * reported to Hyper-V on panic.
+ */
+static struct ctl_table hv_ctl_table[] = {
+ {
+ .procname = "hyperv_record_panic_msg",
+ .data = &sysctl_record_panic_msg,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {}
+};
+
+static int hv_die_panic_notify_crash(struct notifier_block *self,
+ unsigned long val, void *args);
+
+static struct notifier_block hyperv_die_report_block = {
+ .notifier_call = hv_die_panic_notify_crash,
+};
+
+static struct notifier_block hyperv_panic_report_block = {
+ .notifier_call = hv_die_panic_notify_crash,
+};
+
+/*
+ * The following callback works both as die and panic notifier; its
+ * goal is to provide panic information to the hypervisor unless the
+ * kmsg dumper is used [see hv_kmsg_dump()], which provides more
+ * information but isn't always available.
+ *
+ * Notice that both the panic/die report notifiers are registered only
+ * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
+ */
+static int hv_die_panic_notify_crash(struct notifier_block *self,
+ unsigned long val, void *args)
+{
+ struct pt_regs *regs;
+ bool is_die;
+
+ /* Don't notify Hyper-V unless we have a die oops event or panic. */
+ if (self == &hyperv_panic_report_block) {
+ is_die = false;
+ regs = current_pt_regs();
+ } else { /* die event */
+ if (val != DIE_OOPS)
+ return NOTIFY_DONE;
+
+ is_die = true;
+ regs = ((struct die_args *)args)->regs;
+ }
+
+ /*
+ * Hyper-V should be notified only once about a panic/die. If we will
+ * be calling hv_kmsg_dump() later with kmsg data, don't do the
+ * notification here.
+ */
+ if (!sysctl_record_panic_msg || !hv_panic_page)
+ hyperv_report_panic(regs, val, is_die);
+
+ return NOTIFY_DONE;
+}
+
+/*
+ * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
+ * buffer and call into Hyper-V to transfer the data.
+ */
+static void hv_kmsg_dump(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason)
+{
+ struct kmsg_dump_iter iter;
+ size_t bytes_written;
+
+ /* We are only interested in panics. */
+ if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
+ return;
+
+ /*
+ * Write dump contents to the page. No need to synchronize; panic should
+ * be single-threaded.
+ */
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
+ &bytes_written);
+ if (!bytes_written)
+ return;
+ /*
+ * P3 to contain the physical address of the panic page & P4 to
+ * contain the size of the panic data in that page. Rest of the
+ * registers are no-op when the NOTIFY_MSG flag is set.
+ */
+ hv_set_register(HV_REGISTER_CRASH_P0, 0);
+ hv_set_register(HV_REGISTER_CRASH_P1, 0);
+ hv_set_register(HV_REGISTER_CRASH_P2, 0);
+ hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
+ hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
+
+ /*
+ * Let Hyper-V know there is crash data available along with
+ * the panic message.
+ */
+ hv_set_register(HV_REGISTER_CRASH_CTL,
+ (HV_CRASH_CTL_CRASH_NOTIFY |
+ HV_CRASH_CTL_CRASH_NOTIFY_MSG));
+}
+
+static struct kmsg_dumper hv_kmsg_dumper = {
+ .dump = hv_kmsg_dump,
+};
+
+static void hv_kmsg_dump_unregister(void)
+{
+ kmsg_dump_unregister(&hv_kmsg_dumper);
+ unregister_die_notifier(&hyperv_die_report_block);
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &hyperv_panic_report_block);
+
+ hv_free_hyperv_page((unsigned long)hv_panic_page);
+ hv_panic_page = NULL;
+}
+
+static void hv_kmsg_dump_register(void)
+{
+ int ret;
+
+ hv_panic_page = hv_alloc_hyperv_zeroed_page();
+ if (!hv_panic_page) {
+ pr_err("Hyper-V: panic message page memory allocation failed\n");
+ return;
+ }
+
+ ret = kmsg_dump_register(&hv_kmsg_dumper);
+ if (ret) {
+ pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
+ hv_free_hyperv_page((unsigned long)hv_panic_page);
+ hv_panic_page = NULL;
+ }
+}
+
int __init hv_common_init(void)
{
int i;
+ if (hv_is_isolation_supported())
+ sysctl_record_panic_msg = 0;
+
/*
* Hyper-V expects to get crash register data or kmsg when
* crash enlightment is available and system crashes. Set
@@ -84,8 +290,33 @@ int __init hv_common_init(void)
* kernel.
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+ u64 hyperv_crash_ctl;
+
crash_kexec_post_notifiers = true;
pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
+
+ /*
+ * Panic message recording (sysctl_record_panic_msg)
+ * is enabled by default in non-isolated guests and
+ * disabled by default in isolated guests; the panic
+ * message recording won't be available in isolated
+ * guests should the following registration fail.
+ */
+ hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
+ if (!hv_ctl_table_hdr)
+ pr_err("Hyper-V: sysctl table register error");
+
+ /*
+ * Register for panic kmsg callback only if the right
+ * capability is supported by the hypervisor.
+ */
+ hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
+ if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
+ hv_kmsg_dump_register();
+
+ register_die_notifier(&hyperv_die_report_block);
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &hyperv_panic_report_block);
}
/*
@@ -311,14 +542,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
-
-void __weak *hv_map_memory(void *addr, unsigned long size)
-{
- return NULL;
-}
-EXPORT_SYMBOL_GPL(hv_map_memory);
-
-void __weak hv_unmap_memory(void *addr)
-{
-}
-EXPORT_SYMBOL_GPL(hv_unmap_memory);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index dc673edf053c..55f2086841ae 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -122,10 +122,6 @@ enum {
struct hv_per_cpu_context {
void *synic_message_page;
void *synic_event_page;
- /*
- * buffer to post messages to the host.
- */
- void *post_msg_page;
/*
* Starting with win8, we can take channel interrupts on any CPU;
@@ -241,8 +237,6 @@ struct vmbus_connection {
* is child->parent notification
*/
struct hv_monitor_page *monitor_pages[2];
- void *monitor_pages_original[2];
- phys_addr_t monitor_pages_pa[2];
struct list_head chn_msg_list;
spinlock_t channelmsg_lock;
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 2111e97c3b63..3c9b02471760 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -186,8 +186,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 page_cnt, u32 max_pkt_size)
{
struct page **pages_wraparound;
- unsigned long *pfns_wraparound;
- u64 pfn;
int i;
BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
@@ -196,50 +194,30 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
* First page holds struct hv_ring_buffer, do wraparound mapping for
* the rest.
*/
- if (hv_isolation_type_snp()) {
- pfn = page_to_pfn(pages) +
- PFN_DOWN(ms_hyperv.shared_gpa_boundary);
+ pages_wraparound = kcalloc(page_cnt * 2 - 1,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pages_wraparound)
+ return -ENOMEM;
- pfns_wraparound = kcalloc(page_cnt * 2 - 1,
- sizeof(unsigned long), GFP_KERNEL);
- if (!pfns_wraparound)
- return -ENOMEM;
-
- pfns_wraparound[0] = pfn;
- for (i = 0; i < 2 * (page_cnt - 1); i++)
- pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1;
-
- ring_info->ring_buffer = (struct hv_ring_buffer *)
- vmap_pfn(pfns_wraparound, page_cnt * 2 - 1,
- pgprot_decrypted(PAGE_KERNEL));
- kfree(pfns_wraparound);
-
- if (!ring_info->ring_buffer)
- return -ENOMEM;
-
- /* Zero ring buffer after setting memory host visibility. */
- memset(ring_info->ring_buffer, 0x00, PAGE_SIZE * page_cnt);
- } else {
- pages_wraparound = kcalloc(page_cnt * 2 - 1,
- sizeof(struct page *),
- GFP_KERNEL);
- if (!pages_wraparound)
- return -ENOMEM;
-
- pages_wraparound[0] = pages;
- for (i = 0; i < 2 * (page_cnt - 1); i++)
- pages_wraparound[i + 1] =
- &pages[i % (page_cnt - 1) + 1];
+ pages_wraparound[0] = pages;
+ for (i = 0; i < 2 * (page_cnt - 1); i++)
+ pages_wraparound[i + 1] =
+ &pages[i % (page_cnt - 1) + 1];
- ring_info->ring_buffer = (struct hv_ring_buffer *)
- vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
- PAGE_KERNEL);
+ ring_info->ring_buffer = (struct hv_ring_buffer *)
+ vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
+ pgprot_decrypted(PAGE_KERNEL));
- kfree(pages_wraparound);
- if (!ring_info->ring_buffer)
- return -ENOMEM;
- }
+ kfree(pages_wraparound);
+ if (!ring_info->ring_buffer)
+ return -ENOMEM;
+ /*
+ * Ensure the header page is zero'ed since
+ * encryption status may have changed.
+ */
+ memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE);
ring_info->ring_buffer->read_index =
ring_info->ring_buffer->write_index = 0;
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 734fff86c8dc..1c65a6dfb9fa 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
@@ -19,6 +20,7 @@
#include <linux/completion.h>
#include <linux/hyperv.h>
#include <linux/kernel_stat.h>
+#include <linux/of_address.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
#include <linux/sched/isolation.h>
@@ -28,7 +30,6 @@
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
-#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
#include <linux/kernel.h>
@@ -44,12 +45,10 @@ struct vmbus_dynid {
struct hv_vmbus_device_id id;
};
-static struct acpi_device *hv_acpi_dev;
+static struct device *hv_dev;
static int hyperv_cpuhp_online;
-static void *hv_panic_page;
-
static long __percpu *vmbus_evt;
/* Values parsed from ACPI DSDT */
@@ -57,18 +56,6 @@ int vmbus_irq;
int vmbus_interrupt;
/*
- * Boolean to control whether to report panic messages over Hyper-V.
- *
- * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
- */
-static int sysctl_record_panic_msg = 1;
-
-static int hyperv_report_reg(void)
-{
- return !sysctl_record_panic_msg || !hv_panic_page;
-}
-
-/*
* The panic notifier below is responsible solely for unloading the
* vmbus connection, which is necessary in a panic event.
*
@@ -88,54 +75,6 @@ static struct notifier_block hyperv_panic_vmbus_unload_block = {
.priority = INT_MIN + 1, /* almost the latest one to execute */
};
-static int hv_die_panic_notify_crash(struct notifier_block *self,
- unsigned long val, void *args);
-
-static struct notifier_block hyperv_die_report_block = {
- .notifier_call = hv_die_panic_notify_crash,
-};
-static struct notifier_block hyperv_panic_report_block = {
- .notifier_call = hv_die_panic_notify_crash,
-};
-
-/*
- * The following callback works both as die and panic notifier; its
- * goal is to provide panic information to the hypervisor unless the
- * kmsg dumper is used [see hv_kmsg_dump()], which provides more
- * information but isn't always available.
- *
- * Notice that both the panic/die report notifiers are registered only
- * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
- */
-static int hv_die_panic_notify_crash(struct notifier_block *self,
- unsigned long val, void *args)
-{
- struct pt_regs *regs;
- bool is_die;
-
- /* Don't notify Hyper-V unless we have a die oops event or panic. */
- if (self == &hyperv_panic_report_block) {
- is_die = false;
- regs = current_pt_regs();
- } else { /* die event */
- if (val != DIE_OOPS)
- return NOTIFY_DONE;
-
- is_die = true;
- regs = ((struct die_args *)args)->regs;
- }
-
- /*