From b8f80bffd51f4ef029051d6898d9c2e3e5637dc3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 14 Mar 2014 14:00:56 +0100 Subject: iommu/ipmmu-vmsa: Cleanup failures of ARM mapping creation or attachment The ARM IOMMU mapping needs to be released when attaching the device fails. Add arm_iommu_release_mapping() to the error code path. This is safe to call with a NULL mapping, so no specific check is needed. Cleanup is also missing when failing to create a mapping. Jump to the error code path in that case instead of returning immediately. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 748693192c20..034293ca4b9a 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1090,7 +1090,8 @@ static int ipmmu_add_device(struct device *dev) SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); - return PTR_ERR(mapping); + ret = PTR_ERR(mapping); + goto error; } mmu->mapping = mapping; @@ -1106,6 +1107,7 @@ static int ipmmu_add_device(struct device *dev) return 0; error: + arm_iommu_release_mapping(mmu->mapping); kfree(dev->archdata.iommu); dev->archdata.iommu = NULL; iommu_group_remove_device(dev); -- cgit v1.2.3 From 22463cab3f96baf6b568200a35c7648438eea7ff Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 15:34:54 +0200 Subject: iommu/ipmmu-vmsa: Flush P[UM]D entry before freeing the child page table When clearing PUD or PMD entries the child page table (if any) is freed and the PUD or PMD entry is then cleared. This result in a small race condition window during which a free page table could be accessed by the IPMMU. Fix it by clearing and flushing the PUD or PMD entry before freeing the child page table. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 034293ca4b9a..a32d237c0f22 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -678,30 +678,33 @@ done: static void ipmmu_clear_pud(struct ipmmu_vmsa_device *mmu, pud_t *pud) { - /* Free the page table. */ pgtable_t table = pud_pgtable(*pud); - __free_page(table); /* Clear the PUD. */ *pud = __pud(0); ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); + + /* Free the page table. */ + __free_page(table); } static void ipmmu_clear_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud, pmd_t *pmd) { + pmd_t pmdval = *pmd; unsigned int i; - /* Free the page table. */ - if (pmd_table(*pmd)) { - pgtable_t table = pmd_pgtable(*pmd); - __free_page(table); - } - /* Clear the PMD. */ *pmd = __pmd(0); ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + /* Free the page table. */ + if (pmd_table(pmdval)) { + pgtable_t table = pmd_pgtable(pmdval); + + __free_page(table); + } + /* Check whether the PUD is still needed. */ pmd = pmd_offset(pud, 0); for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { -- cgit v1.2.3 From 9eca0a5875403027e10d68dd162df9790d42839e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 15:34:54 +0200 Subject: iommu/ipmmu-vmsa: Invalidate TLB after unmapping The TLB must be invalidated after unmapping memory to remove stale TLB entries. this was supposed to be performed already, but a bug in the driver prevented the TLB invalidate function from being called. Fix it. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a32d237c0f22..3a61103ef108 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -785,7 +785,6 @@ static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, pud_t *pud; pmd_t *pmd; pte_t *pte; - int ret = 0; if (!pgd) return -EINVAL; @@ -847,8 +846,7 @@ static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, done: spin_unlock_irqrestore(&domain->lock, flags); - if (ret) - ipmmu_tlb_invalidate(domain); + ipmmu_tlb_invalidate(domain); return 0; } -- cgit v1.2.3 From 4a93f21d87e769477c0cb2b98d7e7911b8d37c03 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 17 Mar 2014 01:02:46 +0100 Subject: iommu/ipmmu-vmsa: Add device tree bindings documentation Signed-off-by: Laurent Pinchart Acked-by: Geert Uytterhoeven --- .../bindings/iommu/renesas,ipmmu-vmsa.txt | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt new file mode 100644 index 000000000000..cd29083e16ec --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -0,0 +1,41 @@ +* Renesas VMSA-Compatible IOMMU + +The IPMMU is an IOMMU implementation compatible with the ARM VMSA page tables. +It provides address translation for bus masters outside of the CPU, each +connected to the IPMMU through a port called micro-TLB. + + +Required Properties: + + - compatible: Must contain "renesas,ipmmu-vmsa". + - reg: Base address and size of the IPMMU registers. + - interrupts: Specifiers for the MMU fault interrupts. For instances that + support secure mode two interrupts must be specified, for non-secure and + secure mode, in that order. For instances that don't support secure mode a + single interrupt must be specified. + + - #iommu-cells: Must be 1. + +Each bus master connected to an IPMMU must reference the IPMMU in its device +node with the following property: + + - iommus: A reference to the IPMMU in two cells. The first cell is a phandle + to the IPMMU and the second cell the number of the micro-TLB that the + device is connected to. + + +Example: R8A7791 IPMMU-MX and VSP1-D0 bus master + + ipmmu_mx: mmu@fe951000 { + compatible = "renasas,ipmmu-vmsa"; + reg = <0 0xfe951000 0 0x1000>; + interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, + <0 221 IRQ_TYPE_LEVEL_HIGH>; + #iommu-cells = <1>; + }; + + vsp1@fe928000 { + ... + iommus = <&ipmmu_mx 13>; + ... + }; -- cgit v1.2.3 From 275f5053c7786285d2f20d2dd12908f834c47ad8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 17 Mar 2014 01:02:46 +0100 Subject: iommu/ipmmu-vmsa: Add device tree support Make platform data optional when the device is instantiated from DT and look up the micro-TLB number in the bus master DT node. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 55 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3a61103ef108..368c852d9ee7 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ static LIST_HEAD(ipmmu_devices); * Registers Definition */ +#define IM_NS_ALIAS_OFFSET 0x800 + #define IM_CTX_SIZE 0x40 #define IMCTR 0x0000 @@ -1002,16 +1005,33 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) { - const struct ipmmu_vmsa_master *master = mmu->pdata->masters; - const char *devname = dev_name(dev); - unsigned int i; + struct of_phandle_args args; + int ret; + + if (mmu->pdata) { + const struct ipmmu_vmsa_master *master = mmu->pdata->masters; + const char *devname = dev_name(dev); + unsigned int i; - for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) - return master->utlb; + for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { + if (strcmp(master->name, devname) == 0) + return master->utlb; + } + + return -1; } - return -1; + ret = of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", 0, &args); + if (ret < 0) + return -1; + + of_node_put(args.np); + + if (args.np != mmu->dev->of_node || args.args_count != 1) + return -1; + + return args.args[0]; } static int ipmmu_add_device(struct device *dev) @@ -1157,7 +1177,7 @@ static int ipmmu_probe(struct platform_device *pdev) int irq; int ret; - if (!pdev->dev.platform_data) { + if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { dev_err(&pdev->dev, "missing platform data\n"); return -EINVAL; } @@ -1178,6 +1198,20 @@ static int ipmmu_probe(struct platform_device *pdev) if (IS_ERR(mmu->base)) return PTR_ERR(mmu->base); + /* + * The IPMMU has two register banks, for secure and non-secure modes. + * The bank mapped at the beginning of the IPMMU address space + * corresponds to the running mode of the CPU. When running in secure + * mode the non-secure register bank is also available at an offset. + * + * Secure mode operation isn't clearly documented and is thus currently + * not implemented in the driver. Furthermore, preliminary tests of + * non-secure operation with the main register bank were not successful. + * Offset the registers base unconditionally to point to the non-secure + * alias space for now. + */ + mmu->base += IM_NS_ALIAS_OFFSET; + irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "no IRQ found\n"); @@ -1223,9 +1257,14 @@ static int ipmmu_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id ipmmu_of_ids[] = { + { .compatible = "renesas,ipmmu-vmsa", }, +}; + static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", + .of_match_table = of_match_ptr(ipmmu_of_ids), }, .probe = ipmmu_probe, .remove = ipmmu_remove, -- cgit v1.2.3 From a166d31ee56e8fc56ce2497d8de9da5359f4ee41 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 01:36:43 +0200 Subject: iommu/ipmmu-vmsa: Support multiple micro TLBs per device Devices such as the system DMA controller are connected to multiple micro TLBs of the same IOMMU. Support this. Selective enabling of micro TLBs based on runtime device usage isn't possible at the moment due to lack of support in the IOMMU and DMA mapping APIs. Support for devices connected to different IOMMUs is also unsupported for the same reason. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 115 +++++++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 368c852d9ee7..5d080cf11ba5 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -47,7 +47,8 @@ struct ipmmu_vmsa_domain { struct ipmmu_vmsa_archdata { struct ipmmu_vmsa_device *mmu; - unsigned int utlb; + unsigned int *utlbs; + unsigned int num_utlbs; }; static DEFINE_SPINLOCK(ipmmu_devices_lock); @@ -900,6 +901,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, struct ipmmu_vmsa_device *mmu = archdata->mmu; struct ipmmu_vmsa_domain *domain = io_domain->priv; unsigned long flags; + unsigned int i; int ret = 0; if (!mmu) { @@ -928,7 +930,8 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, if (ret < 0) return ret; - ipmmu_utlb_enable(domain, archdata->utlb); + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_enable(domain, archdata->utlbs[i]); return 0; } @@ -938,8 +941,10 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain, { struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; struct ipmmu_vmsa_domain *domain = io_domain->priv; + unsigned int i; - ipmmu_utlb_disable(domain, archdata->utlb); + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_disable(domain, archdata->utlbs[i]); /* * TODO: Optimize by disabling the context when no device is attached. @@ -1003,10 +1008,12 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); } -static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) +static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, + unsigned int **_utlbs) { - struct of_phandle_args args; - int ret; + unsigned int *utlbs; + unsigned int i; + int count; if (mmu->pdata) { const struct ipmmu_vmsa_master *master = mmu->pdata->masters; @@ -1014,32 +1021,64 @@ static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) unsigned int i; for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) - return master->utlb; + if (strcmp(master->name, devname) == 0) { + utlbs = kmalloc(sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + + utlbs[0] = master->utlb; + + *_utlbs = utlbs; + return 1; + } } - return -1; + return -EINVAL; } - ret = of_parse_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells", 0, &args); - if (ret < 0) - return -1; + count = of_count_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells"); + if (count < 0) + return -EINVAL; + + utlbs = kcalloc(count, sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + + for (i = 0; i < count; ++i) { + struct of_phandle_args args; + int ret; + + ret = of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", i, &args); + if (ret < 0) + goto error; + + of_node_put(args.np); + + if (args.np != mmu->dev->of_node || args.args_count != 1) + goto error; + + utlbs[i] = args.args[0]; + } - of_node_put(args.np); + *_utlbs = utlbs; - if (args.np != mmu->dev->of_node || args.args_count != 1) - return -1; + return count; - return args.args[0]; +error: + kfree(utlbs); + return -EINVAL; } static int ipmmu_add_device(struct device *dev) { struct ipmmu_vmsa_archdata *archdata; struct ipmmu_vmsa_device *mmu; - struct iommu_group *group; - int utlb = -1; + struct iommu_group *group = NULL; + unsigned int *utlbs = NULL; + unsigned int i; + int num_utlbs = 0; int ret; if (dev->archdata.iommu) { @@ -1052,8 +1091,8 @@ static int ipmmu_add_device(struct device *dev) spin_lock(&ipmmu_devices_lock); list_for_each_entry(mmu, &ipmmu_devices, list) { - utlb = ipmmu_find_utlb(mmu, dev); - if (utlb >= 0) { + num_utlbs = ipmmu_find_utlbs(mmu, dev, &utlbs); + if (num_utlbs) { /* * TODO Take a reference to the MMU to protect * against device removal. @@ -1064,17 +1103,22 @@ static int ipmmu_add_device(struct device *dev) spin_unlock(&ipmmu_devices_lock); - if (utlb < 0) + if (num_utlbs <= 0) return -ENODEV; - if (utlb >= mmu->num_utlbs) - return -EINVAL; + for (i = 0; i < num_utlbs; ++i) { + if (utlbs[i] >= mmu->num_utlbs) { + ret = -EINVAL; + goto error; + } + } /* Create a device group and add the device to it. */ group = iommu_group_alloc(); if (IS_ERR(group)) { dev_err(dev, "Failed to allocate IOMMU group\n"); - return PTR_ERR(group); + ret = PTR_ERR(group); + goto error; } ret = iommu_group_add_device(group, dev); @@ -1082,7 +1126,8 @@ static int ipmmu_add_device(struct device *dev) if (ret < 0) { dev_err(dev, "Failed to add device to IPMMU group\n"); - return ret; + group = NULL; + goto error; } archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); @@ -1092,7 +1137,8 @@ static int ipmmu_add_device(struct device *dev) } archdata->mmu = mmu; - archdata->utlb = utlb; + archdata->utlbs = utlbs; + archdata->num_utlbs = num_utlbs; dev->archdata.iommu = archdata; /* @@ -1129,17 +1175,28 @@ static int ipmmu_add_device(struct device *dev) error: arm_iommu_release_mapping(mmu->mapping); + kfree(dev->archdata.iommu); + kfree(utlbs); + dev->archdata.iommu = NULL; - iommu_group_remove_device(dev); + + if (!IS_ERR_OR_NULL(group)) + iommu_group_remove_device(dev); + return ret; } static void ipmmu_remove_device(struct device *dev) { + struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + arm_iommu_detach_device(dev); iommu_group_remove_device(dev); - kfree(dev->archdata.iommu); + + kfree(archdata->utlbs); + kfree(archdata); + dev->archdata.iommu = NULL; } -- cgit v1.2.3 From 78e1f974dd351ac82d978e3aac2d27422013f914 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2014 02:37:09 +0200 Subject: iommu/ipmmu-vmsa: Remove platform data support No board file instantiates the IPMMU using platform data. Now that we have DT support, get rid of platform data. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 24 ------------------------ include/linux/platform_data/ipmmu-vmsa.h | 24 ------------------------ 2 files changed, 48 deletions(-) delete mode 100644 include/linux/platform_data/ipmmu-vmsa.h diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5d080cf11ba5..791c3daec7c0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +29,6 @@ struct ipmmu_vmsa_device { void __iomem *base; struct list_head list; - const struct ipmmu_vmsa_platform_data *pdata; unsigned int num_utlbs; struct dma_iommu_mapping *mapping; @@ -1015,27 +1013,6 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, unsigned int i; int count; - if (mmu->pdata) { - const struct ipmmu_vmsa_master *master = mmu->pdata->masters; - const char *devname = dev_name(dev); - unsigned int i; - - for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) { - utlbs = kmalloc(sizeof(*utlbs), GFP_KERNEL); - if (!utlbs) - return -ENOMEM; - - utlbs[0] = master->utlb; - - *_utlbs = utlbs; - return 1; - } - } - - return -EINVAL; - } - count = of_count_phandle_with_args(dev->of_node, "iommus", "#iommu-cells"); if (count < 0) @@ -1246,7 +1223,6 @@ static int ipmmu_probe(struct platform_device *pdev) } mmu->dev = &pdev->dev; - mmu->pdata = pdev->dev.platform_data; mmu->num_utlbs = 32; /* Map I/O memory and request IRQ. */ diff --git a/include/linux/platform_data/ipmmu-vmsa.h b/include/linux/platform_data/ipmmu-vmsa.h deleted file mode 100644 index 5275b3ac6d37..000000000000 --- a/include/linux/platform_data/ipmmu-vmsa.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * IPMMU VMSA Platform Data - * - * Copyright (C) 2014 Renesas Electronics Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#ifndef __IPMMU_VMSA_H__ -#define __IPMMU_VMSA_H__ - -struct ipmmu_vmsa_master { - const char *name; - unsigned int utlb; -}; - -struct ipmmu_vmsa_platform_data { - const struct ipmmu_vmsa_master *masters; - unsigned int num_masters; -}; - -#endif /* __IPMMU_VMSA_H__ */ -- cgit v1.2.3 From 114150d8f4fc9e7a003be2bdb0efd31796d241ff Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:13 +0000 Subject: iommu: Allow building iova.c independently In preparation for sharing the IOVA allocator, split it out under its own Kconfig symbol. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 4 ++++ drivers/iommu/Makefile | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 325188eef1c1..a839ca93376b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -13,6 +13,9 @@ menuconfig IOMMU_SUPPORT if IOMMU_SUPPORT +config IOMMU_IOVA + bool + config OF_IOMMU def_bool y depends on OF && IOMMU_API @@ -91,6 +94,7 @@ config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) select IOMMU_API + select IOMMU_IOVA select DMAR_TABLE help DMA remapping (DMAR) devices support enables independent address diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 7b976f294a69..0b1b94ef13ab 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,13 +1,14 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_DMAR_TABLE) += dmar.o -obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o +obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o -- cgit v1.2.3 From 85b4545629663486b7f71047ce3b54fa0ad3eb28 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:14 +0000 Subject: iommu: Consolidate IOVA allocator code In order to share the IOVA allocator with other architectures, break the unnecssary dependency on the Intel IOMMU driver and move the remaining IOVA internals to iova.c Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 33 ++------------------------------- drivers/iommu/iova.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/iova.h | 3 +++ 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 40dfbc0444c0..e758d8ed8fb5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -485,7 +485,6 @@ __setup("intel_iommu=", intel_iommu_setup); static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; -static struct kmem_cache *iommu_iova_cache; static inline void *alloc_pgtable_page(int node) { @@ -523,16 +522,6 @@ static inline void free_devinfo_mem(void *vaddr) kmem_cache_free(iommu_devinfo_cache, vaddr); } -struct iova *alloc_iova_mem(void) -{ - return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); -} - -void free_iova_mem(struct iova *iova) -{ - kmem_cache_free(iommu_iova_cache, iova); -} - static inline int domain_type_is_vm(struct dmar_domain *domain) { return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3427,23 +3416,6 @@ static inline int iommu_devinfo_cache_init(void) return ret; } -static inline int iommu_iova_cache_init(void) -{ - int ret = 0; - - iommu_iova_cache = kmem_cache_create("iommu_iova", - sizeof(struct iova), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_iova_cache) { - printk(KERN_ERR "Couldn't create iova cache\n"); - ret = -ENOMEM; - } - - return ret; -} - static int __init iommu_init_mempool(void) { int ret; @@ -3461,7 +3433,7 @@ static int __init iommu_init_mempool(void) kmem_cache_destroy(iommu_domain_cache); domain_error: - kmem_cache_destroy(iommu_iova_cache); + iommu_iova_cache_destroy(); return -ENOMEM; } @@ -3470,8 +3442,7 @@ static void __init iommu_exit_mempool(void) { kmem_cache_destroy(iommu_devinfo_cache); kmem_cache_destroy(iommu_domain_cache); - kmem_cache_destroy(iommu_iova_cache); - + iommu_iova_cache_destroy(); } static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f6b17e6af2fb..520b8c8ae0c4 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,6 +18,41 @@ */ #include +#include + +static struct kmem_cache *iommu_iova_cache; + +int iommu_iova_cache_init(void) +{ + int ret = 0; + + iommu_iova_cache = kmem_cache_create("iommu_iova", + sizeof(struct iova), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_iova_cache) { + pr_err("Couldn't create iova cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +void iommu_iova_cache_destroy(void) +{ + kmem_cache_destroy(iommu_iova_cache); +} + +struct iova *alloc_iova_mem(void) +{ + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); +} + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iommu_iova_cache, iova); +} void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) diff --git a/include/linux/iova.h b/include/linux/iova.h index 19e81d5ccb6d..ad0507c61cc7 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -39,6 +39,9 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +int iommu_iova_cache_init(void); +void iommu_iova_cache_destroy(void); + struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); -- cgit v1.2.3 From 1b72250076dde4276acecf3a7da722b185703e78 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:15 +0000 Subject: iommu: Make IOVA domain low limit flexible To share the IOVA allocator with other architectures, it needs to accommodate more general aperture restrictions; move the lower limit from a compile-time constant to a runtime domain property to allow IOVA domains with different requirements to co-exist. Also reword the slightly unclear description of alloc_iova since we're touching it anyway. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++--- drivers/iommu/iova.c | 10 ++++++---- include/linux/iova.h | 7 +++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e758d8ed8fb5..86f9e82b015b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -71,6 +71,9 @@ __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) @@ -1632,7 +1635,7 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, IOVA_START_PFN, DMA_32BIT_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1690,7 +1693,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) int adjust_width, agaw; unsigned long sagaw; - init_iova_domain(&domain->iovad, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -4313,7 +4316,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 520b8c8ae0c4..a3dbba8caa19 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,11 +55,13 @@ void free_iova_mem(struct iova *iova) } void -init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) +init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit) { spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; + iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } @@ -162,7 +164,7 @@ move_left: if (!curr) { if (size_aligned) pad_size = iova_get_pad_size(size, limit_pfn); - if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { + if ((iovad->start_pfn + size + pad_size) > limit_pfn) { spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; } @@ -237,8 +239,8 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova) * @size: - size of page frames to allocate * @limit_pfn: - max limit address * @size_aligned: - set if size_aligned address range is required - * This function allocates an iova in the range limit_pfn to IOVA_START_PFN - * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned + * This function allocates an iova in the range iovad->start_pfn to limit_pfn, + * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned * flag is set then the allocated address iova->pfn_lo will be naturally * aligned on roundup_power_of_two(size). */ diff --git a/include/linux/iova.h b/include/linux/iova.h index ad0507c61cc7..591b19626b46 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -16,9 +16,6 @@ #include #include -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - /* iova structure */ struct iova { struct rb_node node; @@ -31,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -52,7 +50,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From 0fb5fe874c42942e16c450ae05da453e13a1c09e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:16 +0000 Subject: iommu: Make IOVA domain page size explicit Systems may contain heterogeneous IOMMUs supporting differing minimum page sizes, which may also not be common with the CPU page size. Thus it is practical to have an explicit notion of IOVA granularity to simplify handling of mapping and allocation constraints. As an initial step, move the IOVA page granularity from an implicit compile-time constant to a per-domain property so we can make use of it in IOVA domain context at runtime. To keep the abstraction tidy, extend the little API of inline iova_* helpers to parallel some of the equivalent PAGE_* macros. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++--- drivers/iommu/iova.c | 12 ++++++++++-- include/linux/iova.h | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 86f9e82b015b..ae4c1a854e57 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1635,7 +1635,8 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1693,7 +1694,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) int adjust_width, agaw; unsigned long sagaw; - init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -4316,7 +4318,8 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a3dbba8caa19..9dd8208312c2 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,12 +55,20 @@ void free_iova_mem(struct iova *iova) } void -init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit) +init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit) { + /* + * IOVA granularity will normally be equal to the smallest + * supported IOMMU page size; both *must* be capable of + * representing individual CPU pages exactly. + */ + BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); + spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; + iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } diff --git a/include/linux/iova.h b/include/linux/iova.h index 591b19626b46..3920a19d8194 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -37,6 +38,36 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +static inline unsigned long iova_shift(struct iova_domain *iovad) +{ + return __ffs(iovad->granule); +} + +static inline unsigned long iova_mask(struct iova_domain *iovad) +{ + return iovad->granule - 1; +} + +static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova & iova_mask(iovad); +} + +static inline size_t iova_align(struct iova_domain *iovad, size_t size) +{ + return ALIGN(size, iovad->granule); +} + +static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) +{ + return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); +} + +static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova >> iova_shift(iovad); +} + int iommu_iova_cache_init(void); void iommu_iova_cache_destroy(void); @@ -50,8 +81,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From 6fd492fd746d9858a41dc85eef44bd627b809109 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 16 Jan 2015 16:47:19 -0700 Subject: iommu: Fix trace_unmap() to report original iova iommu_unmap() calls trace_unmap() with changed iova and original size. trace_unmap() should report original iova instead. Change iommu_unmap() to call trace_unmap() with original iova. Signed-off-by: Shuah Khan Reported-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f7718d73e984..d4c3db5abf25 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1094,6 +1094,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { size_t unmapped_page, unmapped = 0; unsigned int min_pagesz; + unsigned long orig_iova = iova; if (unlikely(domain->ops->unmap == NULL || domain->ops->pgsize_bitmap == 0UL)) @@ -1133,7 +1134,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } - trace_unmap(iova, 0, size); + trace_unmap(orig_iova, 0, size); return unmapped; } EXPORT_SYMBOL_GPL(iommu_unmap); -- cgit v1.2.3 From db8614d35bb8fc6d032792c801bd5b38ce860f19 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 16 Jan 2015 20:53:17 -0700 Subject: iommu: Change trace unmap api to report unmapped size Currently map and unmap are implemented as events under a common trace class declaration. The common class forces trace_unmap() to require a bogus physical address argument that it doesn't use. Changing unmap to report unmapped size will provide useful information for debugging. Remove common map_unmap trace class and change map and unmap into separate events as opposed to events under the same class to allow for differences in the reporting information. In addition, map and unmap are changed to handle size value as size_t instead of int to match the passed size value and avoid overflow. Signed-off-by: Shuah Khan Suggested-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- include/trace/events/iommu.h | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d4c3db5abf25..3a4fb6274c99 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1134,7 +1134,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } - trace_unmap(orig_iova, 0, size); + trace_unmap(orig_iova, size, unmapped); return unmapped; } EXPORT_SYMBOL_GPL(iommu_unmap); diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index a8f5c32d174b..2c7befb10f13 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -83,7 +83,7 @@ DEFINE_EVENT(iommu_device_event, detach_device_from_domain, TP_ARGS(dev) ); -DECLARE_EVENT_CLASS(iommu_map_unmap, +TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), @@ -92,7 +92,7 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, TP_STRUCT__entry( __field(u64, iova) __field(u64, paddr) - __field(int, size) + __field(size_t, size) ), TP_fast_assign( @@ -101,26 +101,31 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, __entry->size = size; ), - TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=0x%x", + TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", __entry->iova, __entry->paddr, __entry->size ) ); -DEFINE_EVENT(iommu_map_unmap, map, +TRACE_EVENT(unmap, - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), - - TP_ARGS(iova, paddr, size) -); + TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size), -DEFINE_EVENT_PRINT(iommu_map_unmap, unmap, + TP_ARGS(iova, size, unmapped_size), - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + __field(size_t, unmapped_size) + ), - TP_ARGS(iova, paddr, size), + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + __entry->unmapped_size = unmapped_size; + ), - TP_printk("IOMMU: iova=0x%016llx size=0x%x", - __entry->iova, __entry->size + TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", + __entry->iova, __entry->size, __entry->unmapped_size ) ); -- cgit v1.2.3 From fdb1d7be7c4d452e9735aeb2b60ae8a2fcf0a514 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Nov 2014 17:16:49 +0000 Subject: iommu: introduce generic page table allocation framework This patch introduces a generic framework for allocating page tables for an IOMMU. There are a number of reasons we want to do this: - It avoids duplication of complex table management code in IOMMU drivers that use the same page table format - It removes any coupling with the CPU table format (and even the architecture!) - It defines an API for IOMMU TLB maintenance Tested-by: Laurent Pinchart Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 8 +++ drivers/iommu/Makefile | 1 + drivers/iommu/io-pgtable.c | 71 +++++++++++++++++++++++++ drivers/iommu/io-pgtable.h | 128 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 drivers/iommu/io-pgtable.c create mode 100644 drivers/iommu/io-pgtable.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 325188eef1c1..3faaa41db8ff 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -13,6 +13,14 @@ menuconfig IOMMU_SUPPORT if IOMMU_SUPPORT +menu "Generic IOMMU Pagetable Support" + +# Selected by the actual pagetable implementations +config IOMMU_IO_PGTABLE + bool + +endmenu + config OF_IOMMU def_bool y depends on OF && IOMMU_API diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 7b976f294a69..701c9516e2ae 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c new file mode 100644 index 000000000000..f664a1ca49cf --- /dev/null +++ b/drivers/iommu/io-pgtable.c @@ -0,0 +1,71 @@ +/* + * Generic page table allocator for IOMMUs. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon + */ + +#include +#include +#include + +#include "io-pgtable.h" + +static const struct io_pgtable_init_fns * +io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = +{ +}; + +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie) +{ + struct io_pgtable *iop; + const struct io_pgtable_init_fns *fns; + + if (fmt >= IO_PGTABLE_NUM_FMTS) + return NULL; + + fns = io_pgtable_init_table[fmt]; + if (!fns) + return NULL; + + iop = fns->alloc(cfg, cookie); + if (!iop) + return NULL; + + iop->fmt = fmt; + iop->cookie = cookie; + iop->cfg = *cfg; + + return &iop->ops; +} + +/* + * It is the IOMMU driver's responsibility to ensure that the page table + * is no longer accessible to the walker by this point. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops) +{ + struct io_pgtable *iop; + + if (!ops) + return; + + iop = container_of(ops, struct io_pgtable, ops); + iop->cfg.tlb->tlb_flush_all(iop->cookie); + io_pgtable_init_table[iop->fmt]->free(iop); +} diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h new file mode 100644 index 000000000000..fdd792c37698 --- /dev/null +++ b/drivers/iommu/io-pgtable.h @@ -0,0 +1,128 @@ +#ifndef __IO_PGTABLE_H +#define __IO_PGTABLE_H + +/* + * Public API for use by IOMMU drivers + */ +enum io_pgtable_fmt { + IO_PGTABLE_NUM_FMTS, +}; + +/** + * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * + * @tlb_flush_all: Synchronously invalidate the entire TLB context. + * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. + * @tlb_sync: Ensure any queue TLB invalidation has taken effect. + * @flush_pgtable: Ensure page table updates are visible to the IOMMU. + * + * Note that these can all be called in atomic context and must therefore + * not block. + */ +struct iommu_gather_ops { + void (*tlb_flush_all)(void *cookie); + void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, + void *cookie); + void (*tlb_sync)(void *cookie); + void (*flush_pgtable)(void *ptr, size_t size, void *cookie); +}; + +/** + * struct io_pgtable_cfg - Configuration data for a set of page tables. + * + * @quirks: A bitmap of hardware quirks that require some special + * action by the low-level page table allocator. + * @pgsize_bitmap: A bitmap of page sizes supported by this set of page + * tables. + * @ias: Input address (iova) size, in bits. + * @oas: Output address (paddr) size, in bits. + * @tlb: TLB management callbacks for this set of tables. + */ +struct io_pgtable_cfg { + int quirks; /* IO_PGTABLE_QUIRK_* */ + unsigned long pgsize_bitmap; + unsigned int ias; + unsigned int oas; + const struct iommu_gather_ops *tlb; + + /* Low-level data specific to the table format */ + union { + }; +}; + +/** + * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. + * + * @map: Map a physically contiguous memory region. + * @unmap: Unmap a physically contiguous memory region. + * @iova_to_phys: Translate iova to physical address. + * + * These functions map directly onto the iommu_ops member functions with + * the same names. + */ +struct io_pgtable_ops { + int (*map)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, + unsigned long iova); +}; + +/** + * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU. + * + * @fmt: The page table format. + * @cfg: The page table configuration. This will be modified to represent + * the configuration actually provided by the allocator (e.g. the + * pgsize_bitmap may be restricted). + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * the callback routines in cfg->tlb. + */ +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie); + +/** + * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller + * *must* ensure that the page table is no longer + * live, but the TLB can be dirty. + * + * @ops: The ops returned from alloc_io_pgtable_ops. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops); + + +/* + * Internal structures for page table allocator implementations. + */ + +/** + * struct io_pgtable - Internal structure describing a set of page tables. + * + * @fmt: The page table format. + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * any callback routines. + * @cfg: A copy of the page table configuration. + * @ops: The page table operations in use for this set of page tables. + */ +struct io_pgtable { + enum io_pgtable_fmt fmt; + void *cookie; + struct io_pgtable_cfg cfg; + struct io_pgtable_ops ops; +}; + +/** + * struct io_pgtable_init_fns - Alloc/free a set of page tables for a + * particular format. + * + * @alloc: Allocate a set of page tables described by cfg. + * @free: Free the page tables associated with iop. + */ +struct io_pgtable_init_fns { + struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); + void (*free)(struct io_pgtable *iop); +}; + +#endif /* __IO_PGTABLE_H */ -- cgit v1.2.3 From e1d3c0fd701df831169b116cd5c5d6203ac07f70 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Nov 2014 17:18:23 +0000 Subject: iommu: add ARM LPAE page table allocator A number of IOMMUs found in ARM SoCs can walk architecture-compatible page tables. This patch adds a generic allocator for Stage-1 and Stage-2 v7/v8 long-descriptor page tables. 4k, 16k and 64k pages are supported, with up to 4-levels of walk to cover a 48-bit address space. Tested-by: Laurent Pinchart Signed-off-by: Will Deacon --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 9 + drivers/iommu/Makefile | 1 + drivers/iommu/io-pgtable-arm.c | 781 +++++++++++++++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 11 + drivers/iommu/io-pgtable.h | 14 + 6 files changed, 817 insertions(+) create mode 100644 drivers/iommu/io-pgtable-arm.c diff --git a/MAINTAINERS b/MAINTAINERS index 3589d67437f8..00b27863384e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1589,6 +1589,7 @@ M: Will Deacon L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/iommu/arm-smmu.c +F: drivers/iommu/io-pgtable-arm.c ARM64 PORT (AARCH64 ARCHITECTURE) M: Catalin Marinas diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3faaa41db8ff..306454fbc52d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -19,6 +19,15 @@ menu "Generic IOMMU Pagetable Support" config IOMMU_IO_PGTABLE bool +config IOMMU_IO_PGTABLE_LPAE + bool "ARMv7/v8 Long Descriptor Format" + select IOMMU_IO_PGTABLE + help + Enable support for the ARM long descriptor pagetable format. + This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page + sizes at both stage-1 and stage-2, as well as address spaces + up to 48-bits in size. + endmenu config OF_IOMMU diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 701c9516e2ae..d6889b487d55 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o +obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c new file mode 100644 index 000000000000..dbe6178a53e9 --- /dev/null +++ b/drivers/iommu/io-pgtable-arm.c @@ -0,0 +1,781 @@ +/* + * CPU-agnostic ARM page table allocator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon + */ + +#define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt + +#include +#include +#include +#include +#include + +#include "io-pgtable.h" + +#define ARM_LPAE_MAX_ADDR_BITS 48 +#define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 +#define ARM_LPAE_MAX_LEVELS 4 + +/* Struct accessors */ +#define io_pgtable_to_data(x) \ + container_of((x), struct arm_lpae_io_pgtable, iop) + +#define io_pgtable_ops_to_pgtable(x) \ + container_of((x), struct io_pgtable, ops) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +/* + * For consistency with the architecture, we always consider + * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0 + */ +#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels) + +/* + * Calculate the right shift amount to get to the portion describing level l + * in a virtual address mapped by the pagetable in d. + */ +#define ARM_LPAE_LVL_SHIFT(l,d) \ + ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ + * (d)->bits_per_level) + (d)->pg_shift) + +#define ARM_LPAE_PAGES_PER_PGD(d) ((d)->pgd_size >> (d)->pg_shift) + +/* + * Calculate the index at level l used to map virtual address a using the + * pagetable in d. + */ +#define ARM_LPAE_PGD_IDX(l,d) \ + ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) + +#define ARM_LPAE_LVL_IDX(a,l,d) \ + (((a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ + ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) + +/* Calculate the block/page mapping size at level l for pagetable in d. */ +#define ARM_LPAE_BLOCK_SIZE(l,d) \ + (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) + +/* Page table bits */ +#define ARM_LPAE_PTE_TYPE_SHIFT 0 +#define ARM_LPAE_PTE_TYPE_MASK 0x3 + +#define ARM_LPAE_PTE_TYPE_BLOCK 1 +#define ARM_LPAE_PTE_TYPE_TABLE 3 +#define ARM_LPAE_PTE_TYPE_PAGE 3 + +#define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) +#define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) +#define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) +#define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) +#define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) + +#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) +/* Ignore the contiguous bit for block splitting */ +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ + ARM_LPAE_PTE_ATTR_HI_MASK) + +/* Stage-1 PTE */ +#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 +#define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) + +/* Stage-2 PTE */ +#define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) +#define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) +#define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) +#define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) + +/* Register bits */ +#define ARM_32_LPAE_TCR_EAE (1 << 31) +#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) + +#define ARM_LPAE_TCR_TG0_4K (0 << 14) +#define ARM_LPAE_TCR_TG0_64K (1 << 14) +#define ARM_LPAE_TCR_TG0_16K (2 << 14) + +#define ARM_LPAE_TCR_SH0_SHIFT 12 +#define ARM_LPAE_TCR_SH0_MASK 0x3 +#define ARM_LPAE_TCR_SH_NS 0 +#define ARM_LPAE_TCR_SH_OS 2 +#define ARM_LPAE_TCR_SH_IS 3 + +#define ARM_LPAE_TCR_ORGN0_SHIFT 10 +#define ARM_LPAE_TCR_IRGN0_SHIFT 8 +#define ARM_LPAE_TCR_RGN_MASK 0x3 +#define ARM_LPAE_TCR_RGN_NC 0 +#define ARM_LPAE_TCR_RGN_WBWA 1 +#define ARM_LPAE_TCR_RGN_WT 2 +#define ARM_LPAE_TCR_RGN_WB 3 + +#define ARM_LPAE_TCR_SL0_SHIFT 6 +#define ARM_LPAE_TCR_SL0_MASK 0x3 + +#define ARM_LPAE_TCR_T0SZ_SHIFT 0 +#define ARM_LPAE_TCR_SZ_MASK 0xf + +#define ARM_LPAE_TCR_PS_SHIFT 16 +#define ARM_LPAE_TCR_PS_MASK 0x7 + +#define ARM_LPAE_TCR_IPS_SHIFT 32 +#define ARM_LPAE_TCR_IPS_MASK 0x7 + +#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL +#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL +#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL +#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL +#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL +#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL + +#define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) +#define ARM_LPAE_MAIR_ATTR_MASK 0xff +#define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 +#define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_WBRWA 0xff +#define ARM_LPAE_MAIR_ATTR_IDX_NC 0 +#define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 +#define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 + +/* IOPTE accessors */ +#define iopte_deref(pte,d) \ + (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ + & ~((1ULL << (d)->pg_shift) - 1))) + +#define iopte_type(pte,l) \ + (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) + +#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) + +#define iopte_leaf(pte,l) \ + (l == (ARM_LPAE_MAX_LEVELS - 1) ? \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK)) + +#define iopte_to_pfn(pte,d) \ + (((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift) + +#define pfn_to_iopte(pfn,d) \ + (((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) + +struct arm_lpae_io_pgtable { + struct io_pgtable iop; + + int levels; + size_t pgd_size; + unsigned long pg_shift; + unsigned long bits_per_level; + + void *pgd; +}; + +typedef u64 arm_lpae_iopte; + +static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + unsigned long iova, phys_addr_t paddr, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + /* We require an unmap first */ + if (WARN_ON(iopte_leaf(*ptep, lvl))) + return -EEXIST; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); + return 0; +} + +static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, + phys_addr_t paddr, size_t size, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *cptep, pte; + void *cookie = data->iop.cookie; + size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + /* Find our entry at the current level */ + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + + /* If we can install a leaf entry at this level, then do so */ + if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) + return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); + + /* We can't allocate tables at the final level */ + if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) + return -EINVAL; + + /* Grab a pointer to the next level */ + pte = *ptep; + if (!pte) { + cptep = alloc_pages_exact(1UL << data->pg_shift, + GFP_ATOMIC | __GFP_ZERO); + if (!cptep) + return -ENOMEM; + + data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, + cookie); + pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + } else { + cptep = iopte_deref(pte, data); + } + + /* Rinse, repeat */ + return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); +} + +static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, + int prot) +{ + arm_lpae_iopte pte; + + if (data->iop.fmt == ARM_64_LPAE_S1 || + data->iop.fmt == ARM_32_LPAE_S1) { + pte = ARM_LPAE_PTE_AP_UNPRIV | ARM_LPAE_PTE_nG; + + if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) + pte |= ARM_LPAE_PTE_AP_RDONLY; + + if (prot & IOMMU_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); + } else { + pte = ARM_LPAE_PTE_HAP_FAULT; + if (prot & IOMMU_READ) + pte |= ARM_LPAE_PTE_HAP_READ; + if (prot & IOMMU_WRITE) + pte |= ARM_LPAE_PTE_HAP_WRITE; + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + else + pte |= ARM_LPAE_PTE_MEMATTR_NC; + } + + if (prot & IOMMU_NOEXEC) + pte |= ARM_LPAE_PTE_XN; + + return pte; +} + +static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int iommu_prot) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + arm_lpae_iopte *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + arm_lpae_iopte prot; + + /* If no access, then nothing to do */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return 0; + + prot = arm_lpae_prot_to_pte(data, iommu_prot); + return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); +} + +static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *start, *end; + unsigned long table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + return; + + if (lvl == ARM_LPAE_START_LVL(data)) + table_size = data->pgd_size; + else + table_size = 1UL << data->pg_shift; + + start = ptep; + end = (void *)ptep + table_size; + + while (ptep != end) { + arm_lpae_iopte pte = *ptep++; + + if (!pte || iopte_leaf(pte, lvl)) + continue; + + __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); + } + + free_pages_exact(start, table_size); +} + +static void arm_lpae_free_pgtable(struct io_pgtable *iop) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); + + __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd); + kfree(data); +} + +static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep, size_t blk_size) +{ + unsigned long blk_start, blk_end; + phys_addr_t blk_paddr; + arm_lpae_iopte table = 0; + void *cookie = data->iop.cookie; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + + blk_start = iova & ~(blk_size - 1); + blk_end = blk_start + blk_size; + blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + + for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { + arm_lpae_iopte *tablep; + + /* Unmap! */ + if (blk_start == iova) + continue; + + /* __arm_lpae_map expects a pointer to the start of the table */ + tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); + if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, + tablep) < 0) { + if (table) { + /* Free the table we allocated */ + tablep = iopte_deref(table, data); + __arm_lpae_free_pgtable(data, lvl + 1, tablep); + } + return 0; /* Bytes unmapped */ + } + } + + *ptep = table; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + iova &= ~(blk_size - 1); + tlb->tlb_add_flush(iova, blk_size, true, cookie); + return size; +} + +static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + void *cookie = data->iop.cookie; + size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = *ptep; + + /* Something went horribly wrong and we ran out of page table */ + if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) + return 0; + + /* If the size matches this level, we're in the right place */ + if (size == blk_size) { + *ptep = 0; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + + if (!iopte_leaf(pte, lvl)) { + /* Also flush any partial walks */ + tlb->tlb_add_flush(iova, size, false, cookie); + tlb->tlb_sync(data->iop.cookie); + ptep = iopte_deref(pte, data); + __arm_lpae_free_pgtable(data, lvl + 1, ptep); + }