diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 12:29:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 12:29:52 -0700 |
commit | bb97be23db2a296c5f8b8b4c40feb0435b068c5e (patch) | |
tree | a9155e90b8cc786ddee3e87f962d9efbb2674c89 | |
parent | b7a7d1c1ec688104fdc922568c26395a756f616d (diff) | |
parent | d05e4c8600c36084ce9de6249bb972c9bdd75b7e (diff) | |
download | linux-bb97be23db2a296c5f8b8b4c40feb0435b068c5e.tar.gz linux-bb97be23db2a296c5f8b8b4c40feb0435b068c5e.tar.bz2 linux-bb97be23db2a296c5f8b8b4c40feb0435b068c5e.zip |
Merge tag 'iommu-updates-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
- A big cleanup and optimization patch-set for the Tegra GART driver
- Documentation updates and fixes for the IOMMU-API
- Support for page request in Intel VT-d scalable mode
- Intel VT-d dma_[un]map_resource() support
- Updates to the ATS enabling code for PCI (acked by Bjorn) and Intel
VT-d to align with the latest version of the ATS spec
- Relaxed IRQ source checking in the Intel VT-d driver for some aliased
devices, needed for future devices which send IRQ messages from more
than on request-ID
- IRQ remapping driver for Hyper-V
- Patches to make generic IOVA and IO-Page-Table code usable outside of
the IOMMU code
- Various other small fixes and cleanups
* tag 'iommu-updates-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits)
iommu/vt-d: Get domain ID before clear pasid entry
iommu/vt-d: Fix NULL pointer reference in intel_svm_bind_mm()
iommu/vt-d: Set context field after value initialized
iommu/vt-d: Disable ATS support on untrusted devices
iommu/mediatek: Fix semicolon code style issue
MAINTAINERS: Add Hyper-V IOMMU driver into Hyper-V CORE AND DRIVERS scope
iommu/hyper-v: Add Hyper-V stub IOMMU driver
x86/Hyper-V: Set x2apic destination mode to physical when x2apic is available
PCI/ATS: Add inline to pci_prg_resp_pasid_required()
iommu/vt-d: Check identity map for hot-added devices
iommu: Fix IOMMU debugfs fallout
iommu: Document iommu_ops.is_attach_deferred()
iommu: Document iommu_ops.iotlb_sync_map()
iommu/vt-d: Enable ATS only if the device uses page aligned address.
PCI/ATS: Add pci_ats_page_aligned() interface
iommu/vt-d: Fix PRI/PASID dependency issue.
PCI/ATS: Add pci_prg_resp_pasid_required() interface.
iommu/vt-d: Allow interrupts from the entire bus for aliased devices
iommu/vt-d: Add helper to set an IRTE to verify only the bus number
iommu: Fix flush_tlb_all typo
...
44 files changed, 838 insertions, 611 deletions
diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt deleted file mode 100644 index 099d9362ebc1..000000000000 --- a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt +++ /dev/null @@ -1,14 +0,0 @@ -NVIDIA Tegra 20 GART - -Required properties: -- compatible: "nvidia,tegra20-gart" -- reg: Two pairs of cells specifying the physical address and size of - the memory controller registers and the GART aperture respectively. - -Example: - - gart { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ - }; diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt index 7d60a50a4fa1..e55328237df4 100644 --- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt +++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt @@ -1,26 +1,37 @@ NVIDIA Tegra20 MC(Memory Controller) Required properties: -- compatible : "nvidia,tegra20-mc" -- reg : Should contain 2 register ranges(address and length); see the - example below. Note that the MC registers are interleaved with the - GART registers, and hence must be represented as multiple ranges. +- compatible : "nvidia,tegra20-mc-gart" +- reg : Should contain 2 register ranges: physical base address and length of + the controller's registers and the GART aperture respectively. +- clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names: Must include the following entries: + - mc: the module's clock input - interrupts : Should contain MC General interrupt. - #reset-cells : Should be 1. This cell represents memory client module ID. The assignments may be found in header file <dt-bindings/memory/tegra20-mc.h> or in the TRM documentation. +- #iommu-cells: Should be 0. This cell represents the number of cells in an + IOMMU specifier needed to encode an address. GART supports only a single + address space that is shared by all devices, therefore no additional + information needed for the address encoding. Example: mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; - interrupts = <0 77 0x04>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; + interrupts = <GIC_SPI 77 0x04>; #reset-cells = <1>; + #iommu-cells = <0>; }; video-codec@6001a000 { compatible = "nvidia,tegra20-vde"; ... resets = <&mc TEGRA20_MC_RESET_VDE>; + iommus = <&mc>; }; diff --git a/MAINTAINERS b/MAINTAINERS index b5bffd2d7f8c..08995da6e16a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7170,6 +7170,7 @@ F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c +F: drivers/iommu/hyperv_iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/linux/hyperv.h F: include/uapi/linux/hyperv.h diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index dcad6d6128cf..8c942e60703e 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -616,17 +616,14 @@ }; mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; #reset-cells = <1>; - }; - - iommu@7000f024 { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ + #iommu-cells = <0>; }; memory-controller@7000f400 { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e81a2db42df7..3fa238a137d2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; # endif + + /* + * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, + * set x2apic destination mode to physcial mode when x2apic is available + * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs + * have 8-bit APIC id. + */ +# ifdef CONFIG_X86_X2APIC + if (x2apic_supported()) + x2apic_phys = 1; +# endif + #endif } diff --git a/drivers/Makefile b/drivers/Makefile index bb15b9d0e793..c61cde554340 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -56,7 +56,7 @@ obj-y += tty/ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers -obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ +obj-y += iommu/ # gpu/ comes after char for AGP vs DRM startup and after iommu obj-y += gpu/ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9a25715650e..6f07f3b21816 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -1,3 +1,7 @@ +# The IOVA library may also be used by non-IOMMU_API users +config IOMMU_IOVA + tristate + # IOMMU_API always gets selected by whoever wants it. config IOMMU_API bool @@ -81,9 +85,6 @@ config IOMMU_DEFAULT_PASSTHROUGH If unsure, say N here. -config IOMMU_IOVA - tristate - config OF_IOMMU def_bool y depends on OF && IOMMU_API @@ -282,6 +283,7 @@ config ROCKCHIP_IOMMU config TEGRA_IOMMU_GART bool "Tegra GART IOMMU Support" depends on ARCH_TEGRA_2x_SOC + depends on TEGRA_MC select IOMMU_API help Enables support for remapping discontiguous physical memory @@ -435,4 +437,13 @@ config QCOM_IOMMU help Support for IOMMU on certain Qualcomm SoCs. +config HYPERV_IOMMU + bool "Hyper-V x2APIC IRQ Handling" + depends on HYPERV + select IOMMU_API + default HYPERV + help + Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux + guests to run with x2APIC mode enabled. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index a158a68c8ea8..8c71a15e986b 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o +obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2a7b78bb98b4..6b0760dafb3e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include <linux/ratelimit.h> #include <linux/pci.h> @@ -279,10 +280,10 @@ static u16 get_alias(struct device *dev) return pci_alias; } - pr_info("Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " + pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d " + "for device [%04x:%04x], kernel reported alias " "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device, PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), PCI_FUNC(pci_alias)); @@ -293,9 +294,8 @@ static u16 get_alias(struct device *dev) if (pci_alias == devid && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pr_info("Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); + pci_info(pdev, "Added PCI DMA alias %02x.%d\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias)); } return ivrs_alias; @@ -545,7 +545,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, dev_data = get_dev_data(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { - dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", + pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", domain_id, address, flags); } else if (printk_ratelimit()) { pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", @@ -2258,8 +2258,7 @@ static int amd_iommu_add_device(struct device *dev) ret = iommu_init_device(dev); if (ret) { if (ret != -ENOTSUPP) - pr_err("Failed to initialize device %s - trying to proceed anyway\n", - dev_name(dev)); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_ignore_device(dev); dev->dma_ops = NULL; @@ -2569,6 +2568,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; unsigned long address; u64 dma_mask; + int ret; domain = get_domain(dev); if (IS_ERR(domain)) @@ -2591,7 +2591,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for (j = 0; j < pages; ++j) { unsigned long bus_addr, phys_addr; - int ret; bus_addr = address + s->dma_address + (j << PAGE_SHIFT); phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); @@ -2612,8 +2611,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, return nelems; out_unmap: - pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", - dev_name(dev), npages); + dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n", + npages, ret); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); @@ -2807,7 +2806,7 @@ static int init_reserved_iova_ranges(void) IOVA_PFN(r->start), IOVA_PFN(r->end)); if (!val) { - pr_err("Reserve pci-resource range failed\n"); + pci_err(pdev, "Reserve pci-resource range %pR failed\n", r); return -ENOMEM; } } @@ -3177,8 +3176,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, length, prot, IOMMU_RESV_DIRECT); if (!region) { - pr_err("Out of memory allocating dm-regions for %s\n", - dev_name(dev)); + dev_err(dev, "Out of memory allocating dm-regions\n"); return; } list_add_tail(®ion->list, head); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 66123b911ec8..f773792d77fd 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include <linux/pci.h> #include <linux/acpi.h> @@ -1457,8 +1458,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); - pr_info("Applying erratum 746 workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying erratum 746 workaround\n"); /* Clear the enable writing bit */ pci_write_config_dword(iommu->dev, 0xf0, 0x90); @@ -1488,8 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ iommu_write_l2(iommu, 0x47, value | BIT(0)); - pr_info("Applying ATS write check workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying ATS write check workaround\n"); } /* @@ -1665,6 +1664,7 @@ static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, static void init_iommu_perf_ctr(struct amd_iommu *iommu) { + struct pci_dev *pdev = iommu->dev; u64 val = 0xabcd, val2 = 0; if (!iommu_feature(iommu, FEATURE_PC)) @@ -1676,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { - pr_err("Unable to write to IOMMU perf counter.\n"); + pci_err(pdev, "Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; return; } - pr_info("IOMMU performance counters supported\n"); + pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); iommu->max_banks = (u8) ((val >> 12) & 0x3f); @@ -1840,14 +1840,14 @@ static void print_iommu_info(void) struct amd_iommu *iommu; for_each_iommu(iommu) { + struct pci_dev *pdev = iommu->dev; int i; - pr_info("Found IOMMU at %s cap 0x%hx\n", - dev_name(&iommu->dev->dev), iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx):\n", - iommu->features); + pci_info(pdev, "Extended features (%#llx):\n", + iommu->features); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 23dae9348ace..5d7ef750e4a0 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -370,29 +370,6 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn) return container_of(mn, struct pasid_state, mn); } -static void __mn_flush_page(struct mmu_notifier *mn, - unsigned long address) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); -} - -static int mn_clear_flush_young(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - for (; start < end; start += PAGE_SIZE) - __mn_flush_page(mn, start); - - return 0; -} - static void mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -430,7 +407,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops iommu_mn = { .release = mn_release, - .clear_flush_young = mn_clear_flush_young, .invalidate_range = mn_invalidate_range, }; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 0d284029dc73..d3880010c6cf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -18,6 +18,7 @@ #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/init.h> @@ -32,8 +33,6 @@ #include <linux/amba/bus.h> -#include "io-pgtable.h" - /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index af18a7e7f917..045d93884164 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -39,6 +39,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/init.h> @@ -56,7 +57,6 @@ #include <linux/amba/bus.h> #include <linux/fsl/mc.h> -#include "io-pgtable.h" #include "arm-smmu-regs.h" #define ARM_MMU500_ACTLR_CPRE (1 << 1) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d19f3d6b43c1..77aabe637a60 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -289,7 +289,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long order, base_pfn, end_pfn; + unsigned long order, base_pfn; int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) @@ -298,7 +298,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Use the smallest supported page size for IOVA granularity */ order = __ffs(domain->pgsize_bitmap); base_pfn = max_t(unsigned long, 1, base >> order); - end_pfn = (base + size - 1) >> order; /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c new file mode 100644 index 000000000000..a386b83e0e34 --- /dev/null +++ b/drivers/iommu/hyperv-iommu.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V stub IOMMU driver. + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author : Lan Tianyu <Tianyu.Lan@microsoft.com> + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/iommu.h> +#include <linux/module.h> + +#include <asm/apic.h> +#include <asm/cpu.h> +#include <asm/hw_irq.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> +#include <asm/hypervisor.h> + +#include "irq_remapping.h" + +#ifdef CONFIG_IRQ_REMAP + +/* + * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt + * Redirection Table. Hyper-V exposes one single IO-APIC and so define + * 24 IO APIC remmapping entries. + */ +#define IOAPIC_REMAPPING_ENTRY 24 + +static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; +static struct irq_domain *ioapic_ir_domain; + +static int hyperv_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + struct IO_APIC_route_entry *entry; + int ret; + + /* Return error If new irq affinity is out of ioapic_max_cpumask. */ + if (!cpumask_subset(mask, &ioapic_max_cpumask)) + return -EINVAL; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + entry = data->chip_data; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + send_cleanup_vector(cfg); + + return 0; +} + +static struct irq_chip hyperv_ir_chip = { + .name = "HYPERV-IR", + .irq_ack = apic_ack_irq, + .irq_set_affinity = hyperv_ir_set_affinity, +}; + +static int hyperv_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct irq_desc *desc; + int ret = 0; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) { + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -EINVAL; + } + + irq_data->chip = &hyperv_ir_chip; + + /* + * If there is interrupt remapping function of IOMMU, setting irq + * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't + * support interrupt remapping function, setting irq affinity of IO-APIC + * interrupts still needs to change IO-APIC registers. But ioapic_ + * configure_entry() will ignore value of cfg->vector and cfg-> + * dest_apicid when IO-APIC's parent irq domain is not the vector + * domain.(See ioapic_configure_entry()) In order to setting vector + * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved + * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_ + * affinity() set vector and dest_apicid directly into IO-APIC entry. + */ + irq_data->chip_data = info->ioapic_entry; + + /* + * Hypver-V IO APIC irq affinity should be in the scope of + * ioapic_max_cpumask because no irq remapping support. + */ + desc = irq_data_to_desc(irq_data); + cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask); + + return 0; +} + +static void hyperv_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static int hyperv_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve) +{ + struct irq_cfg *cfg = irqd_cfg(irq_data); + struct IO_APIC_route_entry *entry = irq_data->chip_data; + + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + + return 0; +} + +static struct irq_domain_ops hyperv_ir_domain_ops = { + .alloc = hyperv_irq_remapping_alloc, + .free = hyperv_irq_remapping_free, + .activate = hyperv_irq_remapping_activate, +}; + +static int __init hyperv_prepare_irq_remapping(void) +{ + struct fwnode_handle *fn; + int i; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + !x2apic_supported()) + return -ENODEV; + + fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); + if (!fn) + return -ENOMEM; + + ioapic_ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, IOAPIC_REMAPPING_ENTRY, fn, + &hyperv_ir_domain_ops, NU |