diff options
22 files changed, 866 insertions, 286 deletions
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt deleted file mode 100644 index ac949f7fe3d4..000000000000 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ /dev/null @@ -1,105 +0,0 @@ -* Mediatek IOMMU Architecture Implementation - - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and -this M4U have two generations of HW architecture. Generation one uses flat -pagetable, and only supports 4K size page mapping. Generation two uses the -ARM Short-Descriptor translation table format for address translation. - - About the M4U Hardware Block Diagram, please check below: - - EMI (External Memory Interface) - | - m4u (Multimedia Memory Management Unit) - | - +--------+ - | | - gals0-rx gals1-rx (Global Async Local Sync rx) - | | - | | - gals0-tx gals1-tx (Global Async Local Sync tx) - | | Some SoCs may have GALS. - +--------+ - | - SMI Common(Smart Multimedia Interface Common) - | - +----------------+------- - | | - | gals-rx There may be GALS in some larbs. - | | - | | - | gals-tx - | | - SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). - (display) (vdec) - | | - | | - +-----+-----+ +----+----+ - | | | | | | - | | |... | | | ... There are different ports in each larb. - | | | | | | -OVL0 RDMA0 WDMA0 MC PP VLD - - As above, The Multimedia HW will go through SMI and M4U while it -access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain -smi local arbiter and smi common. It will control whether the Multimedia -HW should go though the m4u for translation or bypass it and talk -directly with EMI. And also SMI help control the power domain and clocks for -each local arbiter. - Normally we specify a local arbiter(larb) for each multimedia HW -like display, video decode, and camera. And there are different ports -in each larb. Take a example, There are many ports like MC, PP, VLD in the -video decode local arbiter, all these ports are according to the video HW. - In some SoCs, there may be a GALS(Global Async Local Sync) module between -smi-common and m4u, and additional GALS module between smi-larb and -smi-common. GALS can been seen as a "asynchronous fifo" which could help -synchronize for the modules in different clock frequency. - -Required properties: -- compatible : must be one of the following string: - "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. - "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW. - "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW. - "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses - generation one m4u HW. - "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW. - "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. - "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW. -- reg : m4u register base and size. -- interrupts : the interrupt of m4u. -- clocks : must contain one entry for each clock-names. -- clock-names : Only 1 optional clock: - - "bclk": the block clock of m4u. - Here is the list which require this "bclk": - - mt2701, mt2712, mt7623 and mt8173. - Note that m4u use the EMI clock which always has been enabled before kernel - if there is no this "bclk". -- mediatek,larbs : List of phandle to the local arbiters in the current Socs. - Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort - according to the local arbiter index, like larb0, larb1, larb2... -- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. - Specifies the mtk_m4u_id as defined in - dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 - dt-binding/memory/mt2712-larb-port.h for mt2712, - dt-binding/memory/mt6779-larb-port.h for mt6779, - dt-binding/memory/mt8167-larb-port.h for mt8167, - dt-binding/memory/mt8173-larb-port.h for mt8173, and - dt-binding/memory/mt8183-larb-port.h for mt8183. - -Example: - iommu: iommu@10205000 { - compatible = "mediatek,mt8173-m4u"; - reg = <0 0x10205000 0 0x1000>; - interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>; - clocks = <&infracfg CLK_INFRA_M4U>; - clock-names = "bclk"; - mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>; - #iommu-cells = <1>; - }; - -Example for a client device: - display { - compatible = "mediatek,mt8173-disp"; - iommus = <&iommu M4U_PORT_DISP_OVL0>, - <&iommu M4U_PORT_DISP_RDMA0>; - ... - }; diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml new file mode 100644 index 000000000000..0f26fe14c8e2 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -0,0 +1,183 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek IOMMU Architecture Implementation + +maintainers: + - Yong Wu <yong.wu@mediatek.com> + +description: |+ + Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and + this M4U have two generations of HW architecture. Generation one uses flat + pagetable, and only supports 4K size page mapping. Generation two uses the + ARM Short-Descriptor translation table format for address translation. + + About the M4U Hardware Block Diagram, please check below: + + EMI (External Memory Interface) + | + m4u (Multimedia Memory Management Unit) + | + +--------+ + | | + gals0-rx gals1-rx (Global Async Local Sync rx) + | | + | | + gals0-tx gals1-tx (Global Async Local Sync tx) + | | Some SoCs may have GALS. + +--------+ + | + SMI Common(Smart Multimedia Interface Common) + | + +----------------+------- + | | + | gals-rx There may be GALS in some larbs. + | | + | | + | gals-tx + | | + SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). + (display) (vdec) + | | + | | + +-----+-----+ +----+----+ + | | | | | | + | | |... | | | ... There are different ports in each larb. + | | | | | | + OVL0 RDMA0 WDMA0 MC PP VLD + + As above, The Multimedia HW will go through SMI and M4U while it + access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain + smi local arbiter and smi common. It will control whether the Multimedia + HW should go though the m4u for translation or bypass it and talk + directly with EMI. And also SMI help control the power domain and clocks for + each local arbiter. + + Normally we specify a local arbiter(larb) for each multimedia HW + like display, video decode, and camera. And there are different ports + in each larb. Take a example, There are many ports like MC, PP, VLD in the + video decode local arbiter, all these ports are according to the video HW. + + In some SoCs, there may be a GALS(Global Async Local Sync) module between + smi-common and m4u, and additional GALS module between smi-larb and + smi-common. GALS can been seen as a "asynchronous fifo" which could help + synchronize for the modules in different clock frequency. + +properties: + compatible: + oneOf: + - enum: + - mediatek,mt2701-m4u # generation one + - mediatek,mt2712-m4u # generation two + - mediatek,mt6779-m4u # generation two + - mediatek,mt8167-m4u # generation two + - mediatek,mt8173-m4u # generation two + - mediatek,mt8183-m4u # generation two + - mediatek,mt8192-m4u # generation two + + - description: mt7623 generation one + items: + - const: mediatek,mt7623-m4u + - const: mediatek,mt2701-m4u + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: bclk is the block clock. + + clock-names: + items: + - const: bclk + + mediatek,larbs: + $ref: /schemas/types.yaml#/definitions/phandle-array + minItems: 1 + maxItems: 32 + description: | + List of phandle to the local arbiters in the current Socs. + Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort + according to the local arbiter index, like larb0, larb1, larb2... + + '#iommu-cells': + const: 1 + description: | + This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as + defined in + dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, + dt-binding/memory/mt2712-larb-port.h for mt2712, + dt-binding/memory/mt6779-larb-port.h for mt6779, + dt-binding/memory/mt8167-larb-port.h for mt8167, + dt-binding/memory/mt8173-larb-port.h for mt8173, + dt-binding/memory/mt8183-larb-port.h for mt8183, + dt-binding/memory/mt8192-larb-port.h for mt8192. + + power-domains: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - mediatek,larbs + - '#iommu-cells' + +allOf: + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt2701-m4u + - mediatek,mt2712-m4u + - mediatek,mt8173-m4u + - mediatek,mt8192-m4u + + then: + required: + - clocks + + - if: + properties: + compatible: + enum: + - mediatek,mt8192-m4u + + then: + required: + - power-domains + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/mt8173-clk.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + iommu: iommu@10205000 { + compatible = "mediatek,mt8173-m4u"; + reg = <0x10205000 0x1000>; + interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>; + clocks = <&infracfg CLK_INFRA_M4U>; + clock-names = "bclk"; + mediatek,larbs = <&larb0 &larb1 &larb2 + &larb3 &larb4 &larb5>; + #iommu-cells = <1>; + }; + + - | + #include <dt-bindings/memory/mt8173-larb-port.h> + + /* Example for a client device */ + display { + compatible = "mediatek,mt8173-disp"; + iommus = <&iommu M4U_PORT_DISP_OVL0>, + <&iommu M4U_PORT_DISP_RDMA0>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..35bc20398139 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11182,6 +11182,15 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt F: drivers/i2c/busses/i2c-mt65xx.c +MEDIATEK IOMMU DRIVER +M: Yong Wu <yong.wu@mediatek.com> +L: iommu@lists.linux-foundation.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) +S: Supported +F: Documentation/devicetree/bindings/iommu/mediatek* +F: drivers/iommu/mtk-iommu* +F: include/dt-bindings/memory/mt*-port.h + MEDIATEK JPEG DRIVER M: Rick Chang <rick.chang@mediatek.com> M: Bin Liu <bin.liu@mediatek.com> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index baebaac34a83..8594b4a83043 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2306,7 +2306,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); arm_smmu_tlb_inv_range_domain(gather->start, - gather->end - gather->start, + gather->end - gather->start + 1, gather->pgsize, true, smmu_domain); } diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 1d92ac948db7..d4004bcf333a 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -44,26 +44,25 @@ /* * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, - * and 12 bits in a page. With some carefully-chosen coefficients we can - * hide the ugly inconsistencies behind these macros and at least let the - * rest of the code pretend to be somewhat sane. + * and 12 bits in a page. + * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2. */ #define ARM_V7S_ADDR_BITS 32 -#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) -#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) +#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8) +#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) #define ARM_V7S_TABLE_SHIFT 10 -#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) -#define ARM_V7S_TABLE_SIZE(lvl) \ - (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) +#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg)) +#define ARM_V7S_TABLE_SIZE(lvl, cfg) \ + (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl)) #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) -#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) -#define ARM_V7S_LVL_IDX(addr, lvl) ({ \ +#define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) +#define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ + ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -112,9 +111,10 @@ #define ARM_V7S_TEX_MASK 0x7 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) -/* MediaTek extend the two bits for PA 32bit/33bit */ +/* MediaTek extend the bits below for PA 32bit/33bit/34bit */ #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9) #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4) +#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5) /* *well, except for TEX on level 2 large pages, of course :( */ #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6 @@ -194,6 +194,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, pte |= ARM_V7S_ATTR_MTK_PA_BIT32; if (paddr & BIT_ULL(33)) pte |= ARM_V7S_ATTR_MTK_PA_BIT33; + if (paddr & BIT_ULL(34)) + pte |= ARM_V7S_ATTR_MTK_PA_BIT34; return pte; } @@ -218,6 +220,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl, paddr |= BIT_ULL(32); if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) paddr |= BIT_ULL(33); + if (pte & ARM_V7S_ATTR_MTK_PA_BIT34) + paddr |= BIT_ULL(34); return paddr; } @@ -234,7 +238,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct device *dev = cfg->iommu_dev; phys_addr_t phys; dma_addr_t dma; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); void *table = NULL; if (lvl == 1) @@ -280,7 +284,7 @@ static void __arm_v7s_free_table(void *table, int lvl, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, @@ -424,7 +428,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, arm_v7s_iopte *tblp; size_t sz = ARM_V7S_BLOCK_SIZE(lvl); - tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); + tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg); if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, sz, lvl, tblp) != sz)) return -EINVAL; @@ -477,7 +481,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); /* Find our entry at the current level */ - ptep += ARM_V7S_LVL_IDX(iova, lvl); + ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg); /* If we can install a leaf entry at this level, then do so */ if (num_entries) @@ -519,7 +523,6 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - struct io_pgtable *iop = &data->iop; int ret; if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || @@ -535,12 +538,7 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. */ - if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { - io_pgtable_tlb_flush_walk(iop, iova, size, - ARM_V7S_BLOCK_SIZE(2)); - } else { - wmb(); - } + wmb(); return ret; } @@ -550,7 +548,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); int i; - for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { + for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) { arm_v7s_iopte pte = data->pgd[i]; if (ARM_V7S_PTE_IS_TABLE(pte, 1)) @@ -602,9 +600,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, if (!tablep) return 0; /* Bytes unmapped */ - num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg); num_entries = size >> ARM_V7S_LVL_SHIFT(2); - unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg); pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); if (num_entries > 1) @@ -646,7 +644,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, if (WARN_ON(lvl > 2)) return 0; - idx = ARM_V7S_LVL_IDX(iova, lvl); + idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg); ptep += idx; do { pte[i] = READ_ONCE(ptep[i]); @@ -717,7 +715,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - if (WARN_ON(upper_32_bits(iova))) + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); @@ -732,7 +730,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg); pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl, data); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -751,15 +749,14 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, { struct arm_v7s_io_pgtable *data; - if (cfg->ias > ARM_V7S_ADDR_BITS) + if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) return NULL; - if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) + if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -775,8 +772,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", - ARM_V7S_TABLE_SIZE(2), - ARM_V7S_TABLE_SIZE(2), + ARM_V7S_TABLE_SIZE(2, cfg), + ARM_V7S_TABLE_SIZE(2, cfg), ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ffeebda8d6de..3d099a31ddca 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2426,9 +2426,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); - /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); @@ -2438,18 +2435,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static int _iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; + int ret; + + ret = __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + if (ret == 0 && ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, size); + + return ret; +} + int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { might_sleep(); - return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); } EXPORT_SYMBOL_GPL(iommu_map); int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iommu_map_atomic); @@ -2533,6 +2543,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { + const struct iommu_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; @@ -2563,6 +2574,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, sg = sg_next(sg); } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, mapped); return mapped; out_err: diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 040e85f70861..f0ba6a09b434 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -343,7 +343,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv) spin_lock_init(&priv->pgtlock); priv->cfg = (struct io_pgtable_cfg) { - .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP, .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, @@ -490,6 +489,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + struct msm_priv *priv = to_msm_priv(domain); + + __flush_iotlb_range(iova, size, SZ_4K, false, priv); +} + static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t len, struct iommu_iotlb_gather *gather) { @@ -680,6 +687,7 @@ static struct iommu_ops msm_iommu_ops = { * kick starting the other master. */ .iotlb_sync = NULL, + .iotlb_sync_map = msm_iommu_sync_map, .iova_to_phys = msm_iommu_iova_to_phys, .probe_device = msm_iommu_probe_device, .release_device = msm_iommu_release_device, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8e56cec532e7..0ad14a7604b1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -3,10 +3,12 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu <yong.wu@mediatek.com> */ +#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/clk.h> #include <linux/component.h> #include <linux/device.h> +#include <linux/dma-direct.h> #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> @@ -20,6 +22,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -88,6 +91,9 @@ #define F_REG_MMU1_FAULT_MASK GENMASK(13, 7) #define REG_MMU0_FAULT_VA 0x13c +#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12) +#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9) +#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6) #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -103,13 +109,6 @@ #define MTK_PROTECT_PA_ALIGN 256 -/* - * Get the local arbiter ID and the portid within the larb arbiter - * from mtk_m4u_id which is defined by MTK_M4U_ID. - */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) -#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) - #define HAS_4GB_MODE BIT(0) /* HW will use the EMI clock if there isn't the "bclk". */ #define HAS_BCLK BIT(1) @@ -119,6 +118,7 @@ #define HAS_SUB_COMM BIT(5) #define WR_THROT_EN BIT(6) #define HAS_LEGACY_IVRP_PADDR BIT(7) +#define IOVA_34_EN BIT(8) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ ((((pdata)->flags) & (_x)) == (_x)) @@ -127,11 +127,19 @@ struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; + struct mtk_iommu_data *data; struct iommu_domain domain; }; static const struct iommu_ops mtk_iommu_ops; +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); + +#define MTK_IOMMU_TLB_ADDR(iova) ({ \ + dma_addr_t _addr = iova; \ + ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\ +}) + /* * In M4U 4GB mode, the physical address is remapped as below: * @@ -160,6 +168,25 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) +struct mtk_iommu_iova_region { + dma_addr_t iova_base; + unsigned long long size; +}; + +static const struct mtk_iommu_iova_region single_domain[] = { + {.iova_base = 0, .size = SZ_4G}, +}; + +static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { + { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */ + #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) + { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ + { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ + { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ + #endif +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -182,33 +209,43 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) return container_of(dom, struct mtk_iommu_domain, domain); } -static void mtk_iommu_tlb_flush_all(void *cookie) +static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; - for_each_m4u(data) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); wmb(); /* Make sure the tlb flush all done */ + + pm_runtime_put(data->dev); } } static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, - size_t granule, void *cookie) + size_t granule, + struct mtk_iommu_data *data) { - struct mtk_iommu_data *data = cookie; + bool has_pm = !!data->dev->pm_domain; unsigned long flags; int ret; u32 tmp; for_each_m4u(data) { + if (has_pm) { + if (pm_runtime_get_if_in_use(data->dev) <= 0) + continue; + } + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), + data->base + REG_MMU_INVLD_START_A); + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1), data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); @@ -219,36 +256,24 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, if (ret) { dev_warn(data->dev, "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(cookie); + mtk_iommu_tlb_flush_all(data); } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); spin_unlock_irqrestore(&data->tlb_lock, flags); - } -} - -static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) -{ - struct mtk_iommu_data *data = cookie; - struct iommu_domain *domain = &data->m4u_dom->domain; - iommu_iotlb_gather_add_page(domain, gather, iova, granule); + if (has_pm) + pm_runtime_put(data->dev); + } } -static const struct iommu_flush_ops mtk_iommu_flush_ops = { - .tlb_flush_all = mtk_iommu_tlb_flush_all, - .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync, - .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, -}; - static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; - u32 int_state, regval, fault_iova, fault_pa; unsigned int fault_larb, fault_port, sub_comm = 0; + u32 int_state, regval, va34_32, pa34_32; + u64 fault_iova, fault_pa; bool layer, write; /* Read error info from registers */ @@ -264,6 +289,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { + va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); + fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; + fault_iova |= (u64)va34_32 << 32; + fault_pa |= (u64)pa34_32 << 32; + } + fault_port = F_MMU_INT_ID_PORT_ID(regval); |
