summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt2
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.txt13
-rw-r--r--Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt64
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt21
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt4
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/iommu/Kconfig21
-rw-r--r--drivers/iommu/Makefile3
-rw-r--r--drivers/iommu/amd_iommu.c1032
-rw-r--r--drivers/iommu/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd_iommu_v2.c2
-rw-r--r--drivers/iommu/arm-smmu-v3.c2
-rw-r--r--drivers/iommu/arm-smmu.c28
-rw-r--r--drivers/iommu/dmar.c21
-rw-r--r--drivers/iommu/exynos-iommu.c107
-rw-r--r--drivers/iommu/intel-iommu.c6
-rw-r--r--drivers/iommu/io-pgtable-arm.c2
-rw-r--r--drivers/iommu/iommu.c32
-rw-r--r--drivers/iommu/msm_iommu.c870
-rw-r--r--drivers/iommu/msm_iommu.h73
-rw-r--r--drivers/iommu/msm_iommu_dev.c381
-rw-r--r--drivers/iommu/mtk_iommu.c49
-rw-r--r--drivers/iommu/mtk_iommu.h77
-rw-r--r--drivers/iommu/mtk_iommu_v1.c727
-rw-r--r--drivers/iommu/rockchip-iommu.c181
-rw-r--r--drivers/memory/mtk-smi.c167
-rw-r--r--include/dt-bindings/memory/mt2701-larb-port.h85
-rw-r--r--include/linux/iommu.h3
28 files changed, 2316 insertions, 1659 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index 947863acc2d4..7b94c88cf2ee 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -1,6 +1,6 @@
* ARM SMMUv3 Architecture Implementation
-The SMMUv3 architecture is a significant deparature from previous
+The SMMUv3 architecture is a significant departure from previous
revisions, replacing the MMIO register interface with in-memory command
and event queues and adding support for the ATS and PRI components of
the PCIe specification.
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
index cd1b1cd7b5c4..53c20cae309f 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
@@ -1,7 +1,9 @@
* Mediatek IOMMU Architecture Implementation
- Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U) which
-uses the ARM Short-Descriptor translation table format for address translation.
+ Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and
+this M4U have two generations of HW architecture. Generation one uses flat
+pagetable, and only supports 4K size page mapping. Generation two uses the
+ARM Short-Descriptor translation table format for address translation.
About the M4U Hardware Block Diagram, please check below:
@@ -36,7 +38,9 @@ in each larb. Take a example, There are many ports like MC, PP, VLD in the
video decode local arbiter, all these ports are according to the video HW.
Required properties:
-- compatible : must be "mediatek,mt8173-m4u".
+- compatible : must be one of the following string:
+ "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
+ "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
- reg : m4u register base and size.
- interrupts : the interrupt of m4u.
- clocks : must contain one entry for each clock-names.
@@ -46,7 +50,8 @@ Required properties:
according to the local arbiter index, like larb0, larb1, larb2...
- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
Specifies the mtk_m4u_id as defined in
- dt-binding/memory/mt8173-larb-port.h.
+ dt-binding/memory/mt2701-larb-port.h for mt2701 and
+ dt-binding/memory/mt8173-larb-port.h for mt8173
Example:
iommu: iommu@10205000 {
diff --git a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt
new file mode 100644
index 000000000000..20236385f26e
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt
@@ -0,0 +1,64 @@
+* QCOM IOMMU
+
+The MSM IOMMU is an implementation compatible with the ARM VMSA short
+descriptor page tables. It provides address translation for bus masters outside
+of the CPU, each connected to the IOMMU through a port called micro-TLB.
+
+Required Properties:
+
+ - compatible: Must contain "qcom,apq8064-iommu".
+ - reg: Base address and size of the IOMMU registers.
+ - interrupts: Specifiers for the MMU fault interrupts. For instances that
+ support secure mode two interrupts must be specified, for non-secure and
+ secure mode, in that order. For instances that don't support secure mode a
+ single interrupt must be specified.
+ - #iommu-cells: The number of cells needed to specify the stream id. This
+ is always 1.
+ - qcom,ncb: The total number of context banks in the IOMMU.
+ - clocks : List of clocks to be used during SMMU register access. See
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+ for information about the format. For each clock specified
+ here, there must be a corresponding entry in clock-names
+ (see below).
+
+ - clock-names : List of clock names corresponding to the clocks specified in
+ the "clocks" property (above).
+ Should be "smmu_pclk" for specifying the interface clock
+ required for iommu's register accesses.
+ Should be "smmu_clk" for specifying the functional clock
+ required by iommu for bus accesses.
+
+Each bus master connected to an IOMMU must reference the IOMMU in its device
+node with the following property:
+
+ - iommus: A reference to the IOMMU in multiple cells. The first cell is a
+ phandle to the IOMMU and the second cell is the stream id.
+ A single master device can be connected to more than one iommu
+ and multiple contexts in each of the iommu. So multiple entries
+ are required to list all the iommus and the stream ids that the
+ master is connected to.
+
+Example: mdp iommu and its bus master
+
+ mdp_port0: iommu@7500000 {
+ compatible = "qcom,apq8064-iommu";
+ #iommu-cells = <1>;
+ clock-names =
+ "smmu_pclk",
+ "smmu_clk";
+ clocks =
+ <&mmcc SMMU_AHB_CLK>,
+ <&mmcc MDP_AXI_CLK>;
+ reg = <0x07500000 0x100000>;
+ interrupts =
+ <GIC_SPI 63 0>,
+ <GIC_SPI 64 0>;
+ qcom,ncb = <2>;
+ };
+
+ mdp: qcom,mdp@5100000 {
+ compatible = "qcom,mdp";
+ ...
+ iommus = <&mdp_port0 0
+ &mdp_port0 2>;
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
index 06a83ceebba7..aa614b2d7cab 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
@@ -2,16 +2,31 @@ SMI (Smart Multimedia Interface) Common
The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
+Mediatek SMI have two generations of HW architecture, mt8173 uses the second
+generation of SMI HW while mt2701 uses the first generation HW of SMI.
+
+There's slight differences between the two SMI, for generation 2, the
+register which control the iommu port is at each larb's register base. But
+for generation 1, the register is at smi ao base(smi always on register
+base). Besides that, the smi async clock should be prepared and enabled for
+SMI generation 1 to transform the smi clock into emi clock domain, but that is
+not needed for SMI generation 2.
+
Required properties:
-- compatible : must be "mediatek,mt8173-smi-common"
+- compatible : must be one of :
+ "mediatek,mt2701-smi-common"
+ "mediatek,mt8173-smi-common"
- reg : the register and size of the SMI block.
- power-domains : a phandle to the power domain of this local arbiter.
- clocks : Must contain an entry for each entry in clock-names.
-- clock-names : must contain 2 entries, as follows:
+- clock-names : must contain 3 entries for generation 1 smi HW and 2 entries
+ for generation 2 smi HW as follows:
- "apb" : Advanced Peripheral Bus clock, It's the clock for setting
the register.
- "smi" : It's the clock for transfer data and command.
- They may be the same if both source clocks are the same.
+ They may be the same if both source clocks are the same.
+ - "async" : asynchronous clock, it help transform the smi clock into the emi
+ clock domain, this clock is only needed by generation 1 smi HW.
Example:
smi_common: smi@14022000 {
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
index 55ff3b7e0bb9..21277a56e94c 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
@@ -3,7 +3,9 @@ SMI (Smart Multimedia Interface) Local Arbiter
The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
Required properties:
-- compatible : must be "mediatek,mt8173-smi-larb"
+- compatible : must be one of :
+ "mediatek,mt8173-smi-larb"
+ "mediatek,mt2701-smi-larb"
- reg : the register and size of this local arbiter.
- mediatek,smi : a phandle to the smi_common node.
- power-domains : a phandle to the power domain of this local arbiter.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c20323d1277..2dfccbcc3d70 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6144,6 +6144,7 @@ M: Joerg Roedel <joro@8bytes.org>
L: iommu@lists.linux-foundation.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
S: Maintained
+F: Documentation/devicetree/bindings/iommu/
F: drivers/iommu/
IP MASQUERADING
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ad0860383cb3..d432ca828472 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -89,8 +89,8 @@ config MSM_IOMMU
bool "MSM IOMMU Support"
depends on ARM
depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST
- depends on BROKEN
select IOMMU_API
+ select IOMMU_IO_PGTABLE_ARMV7S
help
Support for the IOMMUs found on certain Qualcomm SOCs.
These IOMMUs allow virtualization of the address space used by most
@@ -111,6 +111,7 @@ config AMD_IOMMU
select PCI_PRI
select PCI_PASID
select IOMMU_API
+ select IOMMU_IOVA
depends on X86_64 && PCI && ACPI
---help---
With this option you can enable support for AMD IOMMU hardware in
@@ -343,4 +344,22 @@ config MTK_IOMMU
If unsure, say N here.
+config MTK_IOMMU_V1
+ bool "MTK IOMMU Version 1 (M4U gen1) Support"
+ depends on ARM
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ select ARM_DMA_USE_IOMMU
+ select IOMMU_API
+ select MEMORY
+ select MTK_SMI
+ select COMMON_CLK_MT2701_MMSYS
+ select COMMON_CLK_MT2701_IMGSYS
+ select COMMON_CLK_MT2701_VDECSYS
+ help
+ Support for the M4U on certain Mediatek SoCs. M4U generation 1 HW is
+ Multimedia Memory Managememt Unit. This option enables remapping of
+ DMA memory accesses for the multimedia subsystem.
+
+ if unsure, say N here.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6edb31bf8c6..195f7b997d8e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
-obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
+obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
@@ -18,6 +18,7 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
+obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 634f636393d5..33c177ba93be 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -21,6 +21,7 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/amba/bus.h>
+#include <linux/platform_device.h>
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
@@ -38,6 +39,7 @@
#include <linux/dma-contiguous.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
+#include <linux/iova.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -56,6 +58,17 @@
#define LOOP_TIMEOUT 100000
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN (1)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
+
+/* Reserved IOVA ranges */
+#define MSI_RANGE_START (0xfee00000)
+#define MSI_RANGE_END (0xfeefffff)
+#define HT_RANGE_START (0xfd00000000ULL)
+#define HT_RANGE_END (0xffffffffffULL)
+
/*
* This bitmap is used to advertise the page sizes our hardware support
* to the IOMMU core, which will then use this information to split
@@ -76,6 +89,25 @@ LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+ unsigned long iova_pfn;
+ unsigned long pages;
+ struct dma_ops_domain *dma_dom;
+};
+
+struct flush_queue {
+ spinlock_t lock;
+ unsigned next;
+ struct flush_queue_entry *entries;
+};
+
+DEFINE_PER_CPU(struct flush_queue, flush_queue);
+
+static atomic_t queue_timer_on;
+static struct timer_list queue_timer;
+
/*
* Domain for untranslated devices - only allocated
* if iommu=pt passed on kernel cmd line.
@@ -121,44 +153,19 @@ static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
/*
- * For dynamic growth the aperture size is split into ranges of 128MB of
- * DMA address space each. This struct represents one such range.
- */
-struct aperture_range {
-
- spinlock_t bitmap_lock;
-
- /* address allocation bitmap */
- unsigned long *bitmap;
- unsigned long offset;
- unsigned long next_bit;
-
- /*
- * Array of PTE pages for the aperture. In this array we save all the
- * leaf pages of the domain page table used for the aperture. This way
- * we don't need to walk the page table to find a specific PTE. We can
- * just calculate its address in constant time.
- */
- u64 *pte_pages[64];
-};
-
-/*
* Data container for a dma_ops specific protection domain
*/
struct dma_ops_domain {
/* generic protection domain information */
struct protection_domain domain;
- /* size of the aperture for the mappings */
- unsigned long aperture_size;
-
- /* aperture index we start searching for free addresses */
- u32 __percpu *next_index;
-
- /* address space relevant data */
- struct aperture_range *aperture[APERTURE_MAX_RANGES];
+ /* IOVA RB-Tree */
+ struct iova_domain iovad;
};
+static struct iova_domain reserved_iova_ranges;
+static struct lock_class_key reserved_rbtree_key;
+
/****************************************************************************
*
* Helper functions
@@ -224,6 +231,12 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
+static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
+{
+ BUG_ON(domain->flags != PD_DMA_OPS_MASK);
+ return container_of(domain, struct dma_ops_domain, domain);
+}
+
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -391,43 +404,6 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
}
/*
- * This function actually applies the mapping to the page table of the
- * dma_ops domain.
- */
-static void alloc_unity_mapping(struct dma_ops_domain *dma_dom,
- struct unity_map_entry *e)
-{
- u64 addr;
-
- for (addr = e->address_start; addr < e->address_end;
- addr += PAGE_SIZE) {
- if (addr < dma_dom->aperture_size)
- __set_bit(addr >> PAGE_SHIFT,
- dma_dom->aperture[0]->bitmap);
- }
-}
-
-/*
- * Inits the unity mappings required for a specific device
- */
-static void init_unity_mappings_for_device(struct device *dev,
- struct dma_ops_domain *dma_dom)
-{
- struct unity_map_entry *e;
- int devid;
-
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
- list_for_each_entry(e, &amd_iommu_unity_map, list) {
- if (!(devid >= e->devid_start && devid <= e->devid_end))
- continue;
- alloc_unity_mapping(dma_dom, e);
- }
-}
-
-/*
* This function checks if the driver got a valid device from the caller to
* avoid dereferencing invalid pointers.
*/
@@ -454,22 +430,12 @@ static bool check_device(struct device *dev)
static void init_iommu_group(struct device *dev)
{
- struct dma_ops_domain *dma_domain;
- struct iommu_domain *domain;
struct iommu_group *group;
group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
return;
- domain = iommu_group_default_domain(group);
- if (!domain)
- goto out;
-
- dma_domain = to_pdomain(domain)->priv;
-
- init_unity_mappings_for_device(dev, dma_domain);
-out:
iommu_group_put(group);
}
@@ -1220,7 +1186,7 @@ static void domain_flush_complete(struct protection_domain *domain)
int i;
for (i = 0; i < amd_iommus_present; ++i) {
- if (!domain->dev_iommu[i])
+ if (domain && !domain->dev_iommu[i])
continue;
/*
@@ -1397,8 +1363,9 @@ static u64 *fetch_pte(struct protection_domain *domain,
static int iommu_map_page(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
+ unsigned long page_size,
int prot,
- unsigned long page_size)
+ gfp_t gfp)
{
u64 __pte, *pte;
int i, count;
@@ -1410,7 +1377,7 @@ static int iommu_map_page(struct protection_domain *dom,
return -EINVAL;
count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
+ pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
if (!pte)
return -ENOMEM;
@@ -1474,320 +1441,37 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
/****************************************************************************
*
* The next functions belong to the address allocator for the dma_ops
- * interface functions. They work like the allocators in the other IOMMU
- * drivers. Its basically a bitmap which marks the allocated pages in
- * the aperture. Maybe it could be enhanced in the future to a more
- * efficient allocator.
+ * interface functions.
*
****************************************************************************/
-/*
- * The address allocator core functions.
- *
- * called with domain->lock held
- */
-/*
- * Used to reserve address ranges in the aperture (e.g. for exclusion
- * ranges.
- */
-static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
- unsigned long start_page,
- unsigned int pages)
+static unsigned long dma_ops_alloc_iova(struct device *dev,
+ struct dma_ops_domain *dma_dom,
+ unsigned int pages, u64 dma_mask)
{
- unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
-
- if (start_page + pages > last_page)
- pages = last_page - start_page;
-
- for (i = start_page; i < start_page + pages; ++i) {
- int index = i / APERTURE_RANGE_PAGES;
- int page = i % APERTURE_RANGE_PAGES;
- __set_bit(page, dom->aperture[index]->bitmap);
- }
-}
+ unsigned long pfn = 0;
-/*
- * This function is used to add a new aperture range to an existing
- * aperture in case of dma_ops domain allocation or address allocation
- * failure.
- */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
- bool populate, gfp_t gfp)
-{
- int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
- unsigned long i, old_size, pte_pgsize;
- struct aperture_range *range;
- struct amd_iommu *iommu;
- unsigned long flags;
+ pages = __roundup_pow_of_two(pages);
-#ifdef CONFIG_IOMMU_STRESS
- populate = false;
-#endif
+ if (dma_mask > DMA_BIT_MASK(32))
+ pfn = alloc_iova_fast(&dma_dom->iovad, pages,
+ IOVA_PFN(DMA_BIT_MASK(32)));
- if (index >= APERTURE_MAX_RANGES)
- return -ENOMEM;
-
- range = kzalloc(sizeof(struct aperture_range), gfp);
- if (!range)
- return -ENOMEM;
-
- range->bitmap = (void *)get_zeroed_page(gfp);
- if (!range->bitmap)
- goto out_free;
-
- range->offset = dma_dom->aperture_size;
-
- spin_lock_init(&range->bitmap_lock);
-
- if (populate) {
- unsigned long address = dma_dom->aperture_size;
- int i, num_ptes = APERTURE_RANGE_PAGES / 512;
- u64 *pte, *pte_page;
-
- for (i = 0; i < num_ptes; ++i) {
- pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
- &pte_page, gfp);
- if (!pte)
- goto out_free;
-
- range->pte_pages[i] = pte_page;
-
- address += APERTURE_RANGE_SIZE / 64;
- }
- }
+ if (!pfn)
+ pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask));
- spin_lock_irqsave(&dma_dom->domain.lock, flags);
-
- /* First take the bitmap_lock and then publish the range */
- spin_lock(&range->bitmap_lock);
-
- old_size = dma_dom->aperture_size;
- dma_dom->aperture[index] = range;
- dma_dom->aperture_size += APERTURE_RANGE_SIZE;
-
- /* Reserve address range used for MSI messages */
- if (old_size < MSI_ADDR_BASE_LO &&
- dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
- unsigned long spage;
- int pages;
-
- pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
- spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
-
- dma_ops_reserve_addresses(dma_dom, spage, pages);
- }
-
- /* Initialize the exclusion range if necessary */
- for_each_iommu(iommu) {
- if (iommu->exclusion_start &&
- iommu->exclusion_start >= dma_dom->aperture[index]->offset
- && iommu->exclusion_start < dma_dom->aperture_size) {
- unsigned long startpage;
- int pages = iommu_num_pages(iommu->exclusion_start,
- iommu->exclusion_length,
- PAGE_SIZE);
- startpage = iommu->exclusion_start >> PAGE_SHIFT;
- dma_ops_reserve_addresses(dma_dom, startpage, pages);
- }
- }
-
- /*
- * Check for areas already mapped as present in the new aperture
- * range and mark those pages as reserved in the allocator. Such
- * mappings may already exist as a result of requested unity
- * mappings for devices.
- */
- for (i = dma_dom->aperture[index]->offset;
- i < dma_dom->aperture_size;
- i += pte_pgsize) {
- u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize);
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- continue;
-
- dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT,
- pte_pgsize >> 12);
- }
-
- update_domain(&dma_dom->domain);
-
- spin_unlock(&range->bitmap_lock);
-
- spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
-
- return 0;
-
-out_free:
- update_domain(&dma_dom->domain);
-
- free_page((unsigned long)range->bitmap);
-
- kfree(range);
-
- return -ENOMEM;
+ return (pfn << PAGE_SHIFT);
}
-static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
- struct aperture_range *range,
- unsigned long pages,
- unsigned long dma_mask,
- unsigned long boundary_size,
- unsigned long align_mask,
- bool trylock)
+static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
+ unsigned long address,
+ unsigned int pages)
{
- unsigned long offset, limit, flags;
- dma_addr_t address;
- bool flush = false;
-
- offset = range->offset >> PAGE_SHIFT;
- limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
- dma_mask >> PAGE_SHIFT);
-
- if (trylock) {
- if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
- return -1;
- } else {
- spin_lock_irqsave(&range->bitmap_lock, flags);
- }
-
- address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
- pages, offset, boundary_size, align_mask);
- if (address == -1) {
- /* Nothing found, retry one time */
- address = iommu_area_alloc(range->bitmap, limit,
- 0, pages, offset, boundary_size,
- align_mask);
- flush = true;
- }
-
- if (address != -1)
- range->next_bit = address + pages;
-
- spin_unlock_irqrestore(&range->bitmap_lock, flags);
-
- if (flush) {
- domain_flush_tlb(&dom->domain);
- domain_flush_complete(&dom->domain);
- }
-
- return address;
-}
-
-static unsigned long dma_ops_area_alloc(struct device *dev,
- struct dma_ops_domain *dom,
- unsigned int pages,
- unsigned long align_mask,
- u64 dma_mask)
-{
- unsigned long boundary_size, mask;
- unsigned long address = -1;
- bool first = true;
- u32 start, i;
-
- preempt_disable();
-
- mask = dma_get_seg_boundary(dev);
-
-again:
- start = this_cpu_read(*dom->next_index);
-
- /* Sanity check - is it really necessary? */
- if (unlikely(start > APERTURE_MAX_RANGES)) {
- start = 0;
- this_cpu_write(*dom->next_index, 0);
- }
-
- boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
- 1UL << (BITS_PER_LONG - PAGE_SHIFT);
-
- for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
- struct aperture_range *range;
- int index;
-
- index = (start + i) % APERTURE_MAX_RANGES;
-
- range = dom->aperture[index];
-
- if (!range || range->offset >= dma_mask)
- continue;
-
- address = dma_ops_aperture_alloc(dom, range, pages,
- dma_mask, boundary_size,
- align_mask, first);
- if (address != -1) {
- address = range->offset + (address << PAGE_SHIFT);
- this_cpu_write(*dom->next_index, index);
- break;
- }
- }
-
- if (address == -1 && first) {
- first = false;
- goto again;
- }
-
- preempt_enable();
-
- return address;
-}
-
-static unsigned long dma_ops_alloc_addresses(struct device *dev,
- struct dma_ops_domain *dom,
- unsigned int pages,
- unsigned long align_mask,
- u64 dma_mask)
-{
- unsigned long address = -1;
-
- while (address == -1) {
- address = dma_ops_area_alloc(dev, dom, pages,
- align_mask, dma_mask);
-
- if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
- break;
- }
-
- if (unlikely(address == -1))
- address = DMA_ERROR_CODE;
-
- WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
-
- return address;
-}
-
-/*
- * The address free function.
- *
- * called with domain->lock held
- */
-static void dma_ops_free_addresses(struct dma_ops_domain *dom,
- unsigned long address,
- unsigned int pages)
-{
- unsigned i = address >> APERTURE_RANGE_SHIFT;
- struct aperture_range *range = dom->aperture[i];
- unsigned long flags;
-
- BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
-
-#ifdef CONFIG_IOMMU_STRESS
- if (i < 4)
- return;
-#endif
-
- if (amd_iommu_unmap_flush) {
- domain_flush_tlb(&dom->domain);
- domain_flush_complete(&dom->domain);
- }
-
- address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&range->bitmap_lock, flags);
- if (address + pages > range->next_bit)
- range->next_bit = address + pages;
- bitmap_clear(range->bitmap, address, pages);
- spin_unlock_irqrestore(&range->bitmap_lock, flags);
+ pages = __roundup_pow_of_two(pages);
+ address >>= PAGE_SHIFT;
+ free_iova_fast(&dma_dom->iovad, address, pages);
}
/****************************************************************************
@@ -1961,44 +1645,18 @@ static void free_gcr3_table(struct protection_domain *domain)
*/
static void dma_ops_domain_free(struct dma_ops_domain *dom)
{
- int i;
-
if (!dom)
return;
- free_percpu(dom->next_index);
-
del_domain_from_list(&dom->domain);
- free_pagetable(&dom->domain);
+ put_iova_domain(&dom->iovad);
- for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
- if (!dom->aperture[i])
- continue;
- free_page((unsigned long)dom->aperture[i]->bitmap);
- kfree(dom->aperture[i]);
- }
+ free_pagetable(&dom->domain);
kfree(dom);
}
-static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
- int max_apertures)
-{
- int ret, i, apertures;
-
- apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
- ret = 0;
-
- for (i = apertures; i < max_apertures; ++i) {
- ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
- if (ret)
- break;
- }
-