diff options
28 files changed, 2316 insertions, 1659 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index 947863acc2d4..7b94c88cf2ee 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -1,6 +1,6 @@ * ARM SMMUv3 Architecture Implementation -The SMMUv3 architecture is a significant deparature from previous +The SMMUv3 architecture is a significant departure from previous revisions, replacing the MMIO register interface with in-memory command and event queues and adding support for the ATS and PRI components of the PCIe specification. diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt index cd1b1cd7b5c4..53c20cae309f 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt @@ -1,7 +1,9 @@ * Mediatek IOMMU Architecture Implementation - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U) which -uses the ARM Short-Descriptor translation table format for address translation. + Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and +this M4U have two generations of HW architecture. Generation one uses flat +pagetable, and only supports 4K size page mapping. Generation two uses the +ARM Short-Descriptor translation table format for address translation. About the M4U Hardware Block Diagram, please check below: @@ -36,7 +38,9 @@ in each larb. Take a example, There are many ports like MC, PP, VLD in the video decode local arbiter, all these ports are according to the video HW. Required properties: -- compatible : must be "mediatek,mt8173-m4u". +- compatible : must be one of the following string: + "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. + "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. - reg : m4u register base and size. - interrupts : the interrupt of m4u. - clocks : must contain one entry for each clock-names. @@ -46,7 +50,8 @@ Required properties: according to the local arbiter index, like larb0, larb1, larb2... - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as defined in - dt-binding/memory/mt8173-larb-port.h. + dt-binding/memory/mt2701-larb-port.h for mt2701 and + dt-binding/memory/mt8173-larb-port.h for mt8173 Example: iommu: iommu@10205000 { diff --git a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt new file mode 100644 index 000000000000..20236385f26e --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt @@ -0,0 +1,64 @@ +* QCOM IOMMU + +The MSM IOMMU is an implementation compatible with the ARM VMSA short +descriptor page tables. It provides address translation for bus masters outside +of the CPU, each connected to the IOMMU through a port called micro-TLB. + +Required Properties: + + - compatible: Must contain "qcom,apq8064-iommu". + - reg: Base address and size of the IOMMU registers. + - interrupts: Specifiers for the MMU fault interrupts. For instances that + support secure mode two interrupts must be specified, for non-secure and + secure mode, in that order. For instances that don't support secure mode a + single interrupt must be specified. + - #iommu-cells: The number of cells needed to specify the stream id. This + is always 1. + - qcom,ncb: The total number of context banks in the IOMMU. + - clocks : List of clocks to be used during SMMU register access. See + Documentation/devicetree/bindings/clock/clock-bindings.txt + for information about the format. For each clock specified + here, there must be a corresponding entry in clock-names + (see below). + + - clock-names : List of clock names corresponding to the clocks specified in + the "clocks" property (above). + Should be "smmu_pclk" for specifying the interface clock + required for iommu's register accesses. + Should be "smmu_clk" for specifying the functional clock + required by iommu for bus accesses. + +Each bus master connected to an IOMMU must reference the IOMMU in its device +node with the following property: + + - iommus: A reference to the IOMMU in multiple cells. The first cell is a + phandle to the IOMMU and the second cell is the stream id. + A single master device can be connected to more than one iommu + and multiple contexts in each of the iommu. So multiple entries + are required to list all the iommus and the stream ids that the + master is connected to. + +Example: mdp iommu and its bus master + + mdp_port0: iommu@7500000 { + compatible = "qcom,apq8064-iommu"; + #iommu-cells = <1>; + clock-names = + "smmu_pclk", + "smmu_clk"; + clocks = + <&mmcc SMMU_AHB_CLK>, + <&mmcc MDP_AXI_CLK>; + reg = <0x07500000 0x100000>; + interrupts = + <GIC_SPI 63 0>, + <GIC_SPI 64 0>; + qcom,ncb = <2>; + }; + + mdp: qcom,mdp@5100000 { + compatible = "qcom,mdp"; + ... + iommus = <&mdp_port0 0 + &mdp_port0 2>; + }; diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt index 06a83ceebba7..aa614b2d7cab 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt @@ -2,16 +2,31 @@ SMI (Smart Multimedia Interface) Common The hardware block diagram please check bindings/iommu/mediatek,iommu.txt +Mediatek SMI have two generations of HW architecture, mt8173 uses the second +generation of SMI HW while mt2701 uses the first generation HW of SMI. + +There's slight differences between the two SMI, for generation 2, the +register which control the iommu port is at each larb's register base. But +for generation 1, the register is at smi ao base(smi always on register +base). Besides that, the smi async clock should be prepared and enabled for +SMI generation 1 to transform the smi clock into emi clock domain, but that is +not needed for SMI generation 2. + Required properties: -- compatible : must be "mediatek,mt8173-smi-common" +- compatible : must be one of : + "mediatek,mt2701-smi-common" + "mediatek,mt8173-smi-common" - reg : the register and size of the SMI block. - power-domains : a phandle to the power domain of this local arbiter. - clocks : Must contain an entry for each entry in clock-names. -- clock-names : must contain 2 entries, as follows: +- clock-names : must contain 3 entries for generation 1 smi HW and 2 entries + for generation 2 smi HW as follows: - "apb" : Advanced Peripheral Bus clock, It's the clock for setting the register. - "smi" : It's the clock for transfer data and command. - They may be the same if both source clocks are the same. + They may be the same if both source clocks are the same. + - "async" : asynchronous clock, it help transform the smi clock into the emi + clock domain, this clock is only needed by generation 1 smi HW. Example: smi_common: smi@14022000 { diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt index 55ff3b7e0bb9..21277a56e94c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt @@ -3,7 +3,9 @@ SMI (Smart Multimedia Interface) Local Arbiter The hardware block diagram please check bindings/iommu/mediatek,iommu.txt Required properties: -- compatible : must be "mediatek,mt8173-smi-larb" +- compatible : must be one of : + "mediatek,mt8173-smi-larb" + "mediatek,mt2701-smi-larb" - reg : the register and size of this local arbiter. - mediatek,smi : a phandle to the smi_common node. - power-domains : a phandle to the power domain of this local arbiter. diff --git a/MAINTAINERS b/MAINTAINERS index 8c20323d1277..2dfccbcc3d70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6144,6 +6144,7 @@ M: Joerg Roedel <joro@8bytes.org> L: iommu@lists.linux-foundation.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git S: Maintained +F: Documentation/devicetree/bindings/iommu/ F: drivers/iommu/ IP MASQUERADING diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ad0860383cb3..d432ca828472 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -89,8 +89,8 @@ config MSM_IOMMU bool "MSM IOMMU Support" depends on ARM depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST - depends on BROKEN select IOMMU_API + select IOMMU_IO_PGTABLE_ARMV7S help Support for the IOMMUs found on certain Qualcomm SOCs. These IOMMUs allow virtualization of the address space used by most @@ -111,6 +111,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API + select IOMMU_IOVA depends on X86_64 && PCI && ACPI ---help--- With this option you can enable support for AMD IOMMU hardware in @@ -343,4 +344,22 @@ config MTK_IOMMU If unsure, say N here. +config MTK_IOMMU_V1 + bool "MTK IOMMU Version 1 (M4U gen1) Support" + depends on ARM + depends on ARCH_MEDIATEK || COMPILE_TEST + select ARM_DMA_USE_IOMMU + select IOMMU_API + select MEMORY + select MTK_SMI + select COMMON_CLK_MT2701_MMSYS + select COMMON_CLK_MT2701_IMGSYS + select COMMON_CLK_MT2701_VDECSYS + help + Support for the M4U on certain Mediatek SoCs. M4U generation 1 HW is + Multimedia Memory Managememt Unit. This option enables remapping of + DMA memory accesses for the multimedia subsystem. + + if unsure, say N here. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6edb31bf8c6..195f7b997d8e 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o -obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o +obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm-smmu.o @@ -18,6 +18,7 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o +obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 634f636393d5..33c177ba93be 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -21,6 +21,7 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/amba/bus.h> +#include <linux/platform_device.h> #include <linux/pci-ats.h> #include <linux/bitmap.h> #include <linux/slab.h> @@ -38,6 +39,7 @@ #include <linux/dma-contiguous.h> #include <linux/irqdomain.h> #include <linux/percpu.h> +#include <linux/iova.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> @@ -56,6 +58,17 @@ #define LOOP_TIMEOUT 100000 +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) + +/* Reserved IOVA ranges */ +#define MSI_RANGE_START (0xfee00000) +#define MSI_RANGE_END (0xfeefffff) +#define HT_RANGE_START (0xfd00000000ULL) +#define HT_RANGE_END (0xffffffffffULL) + /* * This bitmap is used to advertise the page sizes our hardware support * to the IOMMU core, which will then use this information to split @@ -76,6 +89,25 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); +#define FLUSH_QUEUE_SIZE 256 + +struct flush_queue_entry { + unsigned long iova_pfn; + unsigned long pages; + struct dma_ops_domain *dma_dom; +}; + +struct flush_queue { + spinlock_t lock; + unsigned next; + struct flush_queue_entry *entries; +}; + +DEFINE_PER_CPU(struct flush_queue, flush_queue); + +static atomic_t queue_timer_on; +static struct timer_list queue_timer; + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -121,44 +153,19 @@ static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); /* - * For dynamic growth the aperture size is split into ranges of 128MB of - * DMA address space each. This struct represents one such range. - */ -struct aperture_range { - - spinlock_t bitmap_lock; - - /* address allocation bitmap */ - unsigned long *bitmap; - unsigned long offset; - unsigned long next_bit; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 *pte_pages[64]; -}; - -/* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; - /* size of the aperture for the mappings */ - unsigned long aperture_size; - - /* aperture index we start searching for free addresses */ - u32 __percpu *next_index; - - /* address space relevant data */ - struct aperture_range *aperture[APERTURE_MAX_RANGES]; + /* IOVA RB-Tree */ + struct iova_domain iovad; }; +static struct iova_domain reserved_iova_ranges; +static struct lock_class_key reserved_rbtree_key; + /**************************************************************************** * * Helper functions @@ -224,6 +231,12 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain) +{ + BUG_ON(domain->flags != PD_DMA_OPS_MASK); + return container_of(domain, struct dma_ops_domain, domain); +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -391,43 +404,6 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) } /* - * This function actually applies the mapping to the page table of the - * dma_ops domain. - */ -static void alloc_unity_mapping(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e) -{ - u64 addr; - - for (addr = e->address_start; addr < e->address_end; - addr += PAGE_SIZE) { - if (addr < dma_dom->aperture_size) - __set_bit(addr >> PAGE_SHIFT, - dma_dom->aperture[0]->bitmap); - } -} - -/* - * Inits the unity mappings required for a specific device - */ -static void init_unity_mappings_for_device(struct device *dev, - struct dma_ops_domain *dma_dom) -{ - struct unity_map_entry *e; - int devid; - - devid = get_device_id(dev); - if (devid < 0) - return; - - list_for_each_entry(e, &amd_iommu_unity_map, list) { - if (!(devid >= e->devid_start && devid <= e->devid_end)) - continue; - alloc_unity_mapping(dma_dom, e); - } -} - -/* * This function checks if the driver got a valid device from the caller to * avoid dereferencing invalid pointers. */ @@ -454,22 +430,12 @@ static bool check_device(struct device *dev) static void init_iommu_group(struct device *dev) { - struct dma_ops_domain *dma_domain; - struct iommu_domain *domain; struct iommu_group *group; group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return; - domain = iommu_group_default_domain(group); - if (!domain) - goto out; - - dma_domain = to_pdomain(domain)->priv; - - init_unity_mappings_for_device(dev, dma_domain); -out: iommu_group_put(group); } @@ -1220,7 +1186,7 @@ static void domain_flush_complete(struct protection_domain *domain) int i; for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) + if (domain && !domain->dev_iommu[i]) continue; /* @@ -1397,8 +1363,9 @@ static u64 *fetch_pte(struct protection_domain *domain, static int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, + unsigned long page_size, int prot, - unsigned long page_size) + gfp_t gfp) { u64 __pte, *pte; int i, count; @@ -1410,7 +1377,7 @@ static int iommu_map_page(struct protection_domain *dom, return -EINVAL; count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); if (!pte) return -ENOMEM; @@ -1474,320 +1441,37 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /**************************************************************************** * * The next functions belong to the address allocator for the dma_ops - * interface functions. They work like the allocators in the other IOMMU - * drivers. Its basically a bitmap which marks the allocated pages in - * the aperture. Maybe it could be enhanced in the future to a more - * efficient allocator. + * interface functions. * ****************************************************************************/ -/* - * The address allocator core functions. - * - * called with domain->lock held - */ -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) +static unsigned long dma_ops_alloc_iova(struct device *dev, + struct dma_ops_domain *dma_dom, + unsigned int pages, u64 dma_mask) { - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} + unsigned long pfn = 0; -/* - * This function is used to add a new aperture range to an existing - * aperture in case of dma_ops domain allocation or address allocation - * failure. - */ -static int alloc_new_range(struct dma_ops_domain *dma_dom, - bool populate, gfp_t gfp) -{ - int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - unsigned long i, old_size, pte_pgsize; - struct aperture_range *range; - struct amd_iommu *iommu; - unsigned long flags; + pages = __roundup_pow_of_two(pages); -#ifdef CONFIG_IOMMU_STRESS - populate = false; -#endif + if (dma_mask > DMA_BIT_MASK(32)) + pfn = alloc_iova_fast(&dma_dom->iovad, pages, + IOVA_PFN(DMA_BIT_MASK(32))); - if (index >= APERTURE_MAX_RANGES) - return -ENOMEM; - - range = kzalloc(sizeof(struct aperture_range), gfp); - if (!range) - return -ENOMEM; - - range->bitmap = (void *)get_zeroed_page(gfp); - if (!range->bitmap) - goto out_free; - - range->offset = dma_dom->aperture_size; - - spin_lock_init(&range->bitmap_lock); - - if (populate) { - unsigned long address = dma_dom->aperture_size; - int i, num_ptes = APERTURE_RANGE_PAGES / 512; - u64 *pte, *pte_page; - - for (i = 0; i < num_ptes; ++i) { - pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, - &pte_page, gfp); - if (!pte) - goto out_free; - - range->pte_pages[i] = pte_page; - - address += APERTURE_RANGE_SIZE / 64; - } - } + if (!pfn) + pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); - spin_lock_irqsave(&dma_dom->domain.lock, flags); - - /* First take the bitmap_lock and then publish the range */ - spin_lock(&range->bitmap_lock); - - old_size = dma_dom->aperture_size; - dma_dom->aperture[index] = range; - dma_dom->aperture_size += APERTURE_RANGE_SIZE; - - /* Reserve address range used for MSI messages */ - if (old_size < MSI_ADDR_BASE_LO && - dma_dom->aperture_size > MSI_ADDR_BASE_LO) { - unsigned long spage; - int pages; - - pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE); - spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT; - - dma_ops_reserve_addresses(dma_dom, spage, pages); - } - - /* Initialize the exclusion range if necessary */ - for_each_iommu(iommu) { - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset - && iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - startpage = iommu->exclusion_start >> PAGE_SHIFT; - dma_ops_reserve_addresses(dma_dom, startpage, pages); - } - } - - /* - * Check for areas already mapped as present in the new aperture - * range and mark those pages as reserved in the allocator. Such - * mappings may already exist as a result of requested unity - * mappings for devices. - */ - for (i = dma_dom->aperture[index]->offset; - i < dma_dom->aperture_size; - i += pte_pgsize) { - u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize); - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - continue; - - dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, - pte_pgsize >> 12); - } - - update_domain(&dma_dom->domain); - - spin_unlock(&range->bitmap_lock); - - spin_unlock_irqrestore(&dma_dom->domain.lock, flags); - - return 0; - -out_free: - update_domain(&dma_dom->domain); - - free_page((unsigned long)range->bitmap); - - kfree(range); - - return -ENOMEM; + return (pfn << PAGE_SHIFT); } -static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, - struct aperture_range *range, - unsigned long pages, - unsigned long dma_mask, - unsigned long boundary_size, - unsigned long align_mask, - bool trylock) +static void dma_ops_free_iova(struct dma_ops_domain *dma_dom, + unsigned long address, + unsigned int pages) { - unsigned long offset, limit, flags; - dma_addr_t address; - bool flush = false; - - offset = range->offset >> PAGE_SHIFT; - limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, - dma_mask >> PAGE_SHIFT); - - if (trylock) { - if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) - return -1; - } else { - spin_lock_irqsave(&range->bitmap_lock, flags); - } - - address = iommu_area_alloc(range->bitmap, limit, range->next_bit, - pages, offset, boundary_size, align_mask); - if (address == -1) { - /* Nothing found, retry one time */ - address = iommu_area_alloc(range->bitmap, limit, - 0, pages, offset, boundary_size, - align_mask); - flush = true; - } - - if (address != -1) - range->next_bit = address + pages; - - spin_unlock_irqrestore(&range->bitmap_lock, flags); - - if (flush) { - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); - } - - return address; -} - -static unsigned long dma_ops_area_alloc(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask) -{ - unsigned long boundary_size, mask; - unsigned long address = -1; - bool first = true; - u32 start, i; - - preempt_disable(); - - mask = dma_get_seg_boundary(dev); - -again: - start = this_cpu_read(*dom->next_index); - - /* Sanity check - is it really necessary? */ - if (unlikely(start > APERTURE_MAX_RANGES)) { - start = 0; - this_cpu_write(*dom->next_index, 0); - } - - boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : - 1UL << (BITS_PER_LONG - PAGE_SHIFT); - - for (i = 0; i < APERTURE_MAX_RANGES; ++i) { - struct aperture_range *range; - int index; - - index = (start + i) % APERTURE_MAX_RANGES; - - range = dom->aperture[index]; - - if (!range || range->offset >= dma_mask) - continue; - - address = dma_ops_aperture_alloc(dom, range, pages, - dma_mask, boundary_size, - align_mask, first); - if (address != -1) { - address = range->offset + (address << PAGE_SHIFT); - this_cpu_write(*dom->next_index, index); - break; - } - } - - if (address == -1 && first) { - first = false; - goto again; - } - - preempt_enable(); - - return address; -} - -static unsigned long dma_ops_alloc_addresses(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask) -{ - unsigned long address = -1; - - while (address == -1) { - address = dma_ops_area_alloc(dev, dom, pages, - align_mask, dma_mask); - - if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) - break; - } - - if (unlikely(address == -1)) - address = DMA_ERROR_CODE; - - WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); - - return address; -} - -/* - * The address free function. - * - * called with domain->lock held - */ -static void dma_ops_free_addresses(struct dma_ops_domain *dom, - unsigned long address, - unsigned int pages) -{ - unsigned i = address >> APERTURE_RANGE_SHIFT; - struct aperture_range *range = dom->aperture[i]; - unsigned long flags; - - BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); - -#ifdef CONFIG_IOMMU_STRESS - if (i < 4) - return; -#endif - - if (amd_iommu_unmap_flush) { - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); - } - - address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; - - spin_lock_irqsave(&range->bitmap_lock, flags); - if (address + pages > range->next_bit) - range->next_bit = address + pages; - bitmap_clear(range->bitmap, address, pages); - spin_unlock_irqrestore(&range->bitmap_lock, flags); + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + free_iova_fast(&dma_dom->iovad, address, pages); } /**************************************************************************** @@ -1961,44 +1645,18 @@ static void free_gcr3_table(struct protection_domain *domain) */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { - int i; - if (!dom) return; - free_percpu(dom->next_index); - del_domain_from_list(&dom->domain); - free_pagetable(&dom->domain); + put_iova_domain(&dom->iovad); - for (i = 0; i < APERTURE_MAX_RANGES; ++i) { - if (!dom->aperture[i]) - continue; - free_page((unsigned long)dom->aperture[i]->bitmap); - kfree(dom->aperture[i]); - } + free_pagetable(&dom->domain); kfree(dom); } -static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, - int max_apertures) -{ - int ret, i, apertures; - - apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - ret = 0; - - for (i = apertures; i < max_apertures; ++i) { - ret = alloc_new_range(dma_dom, false, GFP_KERNEL); - if (ret) - break; - } - |
