summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.txt105
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml183
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c2
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c65
-rw-r--r--drivers/iommu/iommu.c23
-rw-r--r--drivers/iommu/msm_iommu.c10
-rw-r--r--drivers/iommu/mtk_iommu.c409
-rw-r--r--drivers/iommu/mtk_iommu.h12
-rw-r--r--drivers/iommu/tegra-gart.c7
-rw-r--r--drivers/memory/mtk-smi.c8
-rw-r--r--include/dt-bindings/memory/mt2701-larb-port.h4
-rw-r--r--include/dt-bindings/memory/mt2712-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt6779-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8167-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8173-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8183-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8192-larb-port.h243
-rw-r--r--include/dt-bindings/memory/mtk-memory-port.h15
-rw-r--r--include/linux/io-pgtable.h17
-rw-r--r--include/linux/iommu.h7
-rw-r--r--include/soc/mediatek/smi.h3
22 files changed, 866 insertions, 286 deletions
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
deleted file mode 100644
index ac949f7fe3d4..000000000000
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-* Mediatek IOMMU Architecture Implementation
-
- Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and
-this M4U have two generations of HW architecture. Generation one uses flat
-pagetable, and only supports 4K size page mapping. Generation two uses the
-ARM Short-Descriptor translation table format for address translation.
-
- About the M4U Hardware Block Diagram, please check below:
-
- EMI (External Memory Interface)
- |
- m4u (Multimedia Memory Management Unit)
- |
- +--------+
- | |
- gals0-rx gals1-rx (Global Async Local Sync rx)
- | |
- | |
- gals0-tx gals1-tx (Global Async Local Sync tx)
- | | Some SoCs may have GALS.
- +--------+
- |
- SMI Common(Smart Multimedia Interface Common)
- |
- +----------------+-------
- | |
- | gals-rx There may be GALS in some larbs.
- | |
- | |
- | gals-tx
- | |
- SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
- (display) (vdec)
- | |
- | |
- +-----+-----+ +----+----+
- | | | | | |
- | | |... | | | ... There are different ports in each larb.
- | | | | | |
-OVL0 RDMA0 WDMA0 MC PP VLD
-
- As above, The Multimedia HW will go through SMI and M4U while it
-access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
-smi local arbiter and smi common. It will control whether the Multimedia
-HW should go though the m4u for translation or bypass it and talk
-directly with EMI. And also SMI help control the power domain and clocks for
-each local arbiter.
- Normally we specify a local arbiter(larb) for each multimedia HW
-like display, video decode, and camera. And there are different ports
-in each larb. Take a example, There are many ports like MC, PP, VLD in the
-video decode local arbiter, all these ports are according to the video HW.
- In some SoCs, there may be a GALS(Global Async Local Sync) module between
-smi-common and m4u, and additional GALS module between smi-larb and
-smi-common. GALS can been seen as a "asynchronous fifo" which could help
-synchronize for the modules in different clock frequency.
-
-Required properties:
-- compatible : must be one of the following string:
- "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
- "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW.
- "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW.
- "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
- generation one m4u HW.
- "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW.
- "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
- "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
-- reg : m4u register base and size.
-- interrupts : the interrupt of m4u.
-- clocks : must contain one entry for each clock-names.
-- clock-names : Only 1 optional clock:
- - "bclk": the block clock of m4u.
- Here is the list which require this "bclk":
- - mt2701, mt2712, mt7623 and mt8173.
- Note that m4u use the EMI clock which always has been enabled before kernel
- if there is no this "bclk".
-- mediatek,larbs : List of phandle to the local arbiters in the current Socs.
- Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
- according to the local arbiter index, like larb0, larb1, larb2...
-- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
- Specifies the mtk_m4u_id as defined in
- dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
- dt-binding/memory/mt2712-larb-port.h for mt2712,
- dt-binding/memory/mt6779-larb-port.h for mt6779,
- dt-binding/memory/mt8167-larb-port.h for mt8167,
- dt-binding/memory/mt8173-larb-port.h for mt8173, and
- dt-binding/memory/mt8183-larb-port.h for mt8183.
-
-Example:
- iommu: iommu@10205000 {
- compatible = "mediatek,mt8173-m4u";
- reg = <0 0x10205000 0 0x1000>;
- interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&infracfg CLK_INFRA_M4U>;
- clock-names = "bclk";
- mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>;
- #iommu-cells = <1>;
- };
-
-Example for a client device:
- display {
- compatible = "mediatek,mt8173-disp";
- iommus = <&iommu M4U_PORT_DISP_OVL0>,
- <&iommu M4U_PORT_DISP_RDMA0>;
- ...
- };
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
new file mode 100644
index 000000000000..0f26fe14c8e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek IOMMU Architecture Implementation
+
+maintainers:
+ - Yong Wu <yong.wu@mediatek.com>
+
+description: |+
+ Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and
+ this M4U have two generations of HW architecture. Generation one uses flat
+ pagetable, and only supports 4K size page mapping. Generation two uses the
+ ARM Short-Descriptor translation table format for address translation.
+
+ About the M4U Hardware Block Diagram, please check below:
+
+ EMI (External Memory Interface)
+ |
+ m4u (Multimedia Memory Management Unit)
+ |
+ +--------+
+ | |
+ gals0-rx gals1-rx (Global Async Local Sync rx)
+ | |
+ | |
+ gals0-tx gals1-tx (Global Async Local Sync tx)
+ | | Some SoCs may have GALS.
+ +--------+
+ |
+ SMI Common(Smart Multimedia Interface Common)
+ |
+ +----------------+-------
+ | |
+ | gals-rx There may be GALS in some larbs.
+ | |
+ | |
+ | gals-tx
+ | |
+ SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
+ (display) (vdec)
+ | |
+ | |
+ +-----+-----+ +----+----+
+ | | | | | |
+ | | |... | | | ... There are different ports in each larb.
+ | | | | | |
+ OVL0 RDMA0 WDMA0 MC PP VLD
+
+ As above, The Multimedia HW will go through SMI and M4U while it
+ access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
+ smi local arbiter and smi common. It will control whether the Multimedia
+ HW should go though the m4u for translation or bypass it and talk
+ directly with EMI. And also SMI help control the power domain and clocks for
+ each local arbiter.
+
+ Normally we specify a local arbiter(larb) for each multimedia HW
+ like display, video decode, and camera. And there are different ports
+ in each larb. Take a example, There are many ports like MC, PP, VLD in the
+ video decode local arbiter, all these ports are according to the video HW.
+
+ In some SoCs, there may be a GALS(Global Async Local Sync) module between
+ smi-common and m4u, and additional GALS module between smi-larb and
+ smi-common. GALS can been seen as a "asynchronous fifo" which could help
+ synchronize for the modules in different clock frequency.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-m4u # generation one
+ - mediatek,mt2712-m4u # generation two
+ - mediatek,mt6779-m4u # generation two
+ - mediatek,mt8167-m4u # generation two
+ - mediatek,mt8173-m4u # generation two
+ - mediatek,mt8183-m4u # generation two
+ - mediatek,mt8192-m4u # generation two
+
+ - description: mt7623 generation one
+ items:
+ - const: mediatek,mt7623-m4u
+ - const: mediatek,mt2701-m4u
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: bclk is the block clock.
+
+ clock-names:
+ items:
+ - const: bclk
+
+ mediatek,larbs:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 32
+ description: |
+ List of phandle to the local arbiters in the current Socs.
+ Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort
+ according to the local arbiter index, like larb0, larb1, larb2...
+
+ '#iommu-cells':
+ const: 1
+ description: |
+ This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as
+ defined in
+ dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623,
+ dt-binding/memory/mt2712-larb-port.h for mt2712,
+ dt-binding/memory/mt6779-larb-port.h for mt6779,
+ dt-binding/memory/mt8167-larb-port.h for mt8167,
+ dt-binding/memory/mt8173-larb-port.h for mt8173,
+ dt-binding/memory/mt8183-larb-port.h for mt8183,
+ dt-binding/memory/mt8192-larb-port.h for mt8192.
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - mediatek,larbs
+ - '#iommu-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt2701-m4u
+ - mediatek,mt2712-m4u
+ - mediatek,mt8173-m4u
+ - mediatek,mt8192-m4u
+
+ then:
+ required:
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - mediatek,mt8192-m4u
+
+ then:
+ required:
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ iommu: iommu@10205000 {
+ compatible = "mediatek,mt8173-m4u";
+ reg = <0x10205000 0x1000>;
+ interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&infracfg CLK_INFRA_M4U>;
+ clock-names = "bclk";
+ mediatek,larbs = <&larb0 &larb1 &larb2
+ &larb3 &larb4 &larb5>;
+ #iommu-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/memory/mt8173-larb-port.h>
+
+ /* Example for a client device */
+ display {
+ compatible = "mediatek,mt8173-disp";
+ iommus = <&iommu M4U_PORT_DISP_OVL0>,
+ <&iommu M4U_PORT_DISP_RDMA0>;
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..35bc20398139 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11182,6 +11182,15 @@ S: Maintained
F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
F: drivers/i2c/busses/i2c-mt65xx.c
+MEDIATEK IOMMU DRIVER
+M: Yong Wu <yong.wu@mediatek.com>
+L: iommu@lists.linux-foundation.org
+L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+S: Supported
+F: Documentation/devicetree/bindings/iommu/mediatek*
+F: drivers/iommu/mtk-iommu*
+F: include/dt-bindings/memory/mt*-port.h
+
MEDIATEK JPEG DRIVER
M: Rick Chang <rick.chang@mediatek.com>
M: Bin Liu <bin.liu@mediatek.com>
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index baebaac34a83..8594b4a83043 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2306,7 +2306,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
arm_smmu_tlb_inv_range_domain(gather->start,
- gather->end - gather->start,
+ gather->end - gather->start + 1,
gather->pgsize, true, smmu_domain);
}
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac948db7..d4004bcf333a 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -44,26 +44,25 @@
/*
* We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
- * and 12 bits in a page. With some carefully-chosen coefficients we can
- * hide the ugly inconsistencies behind these macros and at least let the
- * rest of the code pretend to be somewhat sane.
+ * and 12 bits in a page.
+ * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2.
*/
#define ARM_V7S_ADDR_BITS 32
-#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4)
-#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
+#define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8)
+#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12)
#define ARM_V7S_TABLE_SHIFT 10
-#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl))
-#define ARM_V7S_TABLE_SIZE(lvl) \
- (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
+#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg))
+#define ARM_V7S_TABLE_SIZE(lvl, cfg) \
+ (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte))
#define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl))
#define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
#define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
-#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1)
-#define ARM_V7S_LVL_IDX(addr, lvl) ({ \
+#define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1)
+#define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \
int _l = lvl; \
- ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
+ ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
})
/*
@@ -112,9 +111,10 @@
#define ARM_V7S_TEX_MASK 0x7
#define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
-/* MediaTek extend the two bits for PA 32bit/33bit */
+/* MediaTek extend the bits below for PA 32bit/33bit/34bit */
#define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
#define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)
+#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5)
/* *well, except for TEX on level 2 large pages, of course :( */
#define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
@@ -194,6 +194,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
if (paddr & BIT_ULL(33))
pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+ if (paddr & BIT_ULL(34))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT34;
return pte;
}
@@ -218,6 +220,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
paddr |= BIT_ULL(32);
if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
paddr |= BIT_ULL(33);
+ if (pte & ARM_V7S_ATTR_MTK_PA_BIT34)
+ paddr |= BIT_ULL(34);
return paddr;
}
@@ -234,7 +238,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
struct device *dev = cfg->iommu_dev;
phys_addr_t phys;
dma_addr_t dma;
- size_t size = ARM_V7S_TABLE_SIZE(lvl);
+ size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
void *table = NULL;
if (lvl == 1)
@@ -280,7 +284,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
struct device *dev = cfg->iommu_dev;
- size_t size = ARM_V7S_TABLE_SIZE(lvl);
+ size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
if (!cfg->coherent_walk)
dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
@@ -424,7 +428,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
arm_v7s_iopte *tblp;
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
- tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
+ tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg);
if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
@@ -477,7 +481,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
/* Find our entry at the current level */
- ptep += ARM_V7S_LVL_IDX(iova, lvl);
+ ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg);
/* If we can install a leaf entry at this level, then do so */
if (num_entries)
@@ -519,7 +523,6 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
- struct io_pgtable *iop = &data->iop;
int ret;
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
@@ -535,12 +538,7 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
*/
- if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
- io_pgtable_tlb_flush_walk(iop, iova, size,
- ARM_V7S_BLOCK_SIZE(2));
- } else {
- wmb();
- }
+ wmb();
return ret;
}
@@ -550,7 +548,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
int i;
- for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
+ for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) {
arm_v7s_iopte pte = data->pgd[i];
if (ARM_V7S_PTE_IS_TABLE(pte, 1))
@@ -602,9 +600,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
- num_ptes = ARM_V7S_PTES_PER_LVL(2);
+ num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
num_entries = size >> ARM_V7S_LVL_SHIFT(2);
- unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
+ unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
if (num_entries > 1)
@@ -646,7 +644,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
if (WARN_ON(lvl > 2))
return 0;
- idx = ARM_V7S_LVL_IDX(iova, lvl);
+ idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg);
ptep += idx;
do {
pte[i] = READ_ONCE(ptep[i]);
@@ -717,7 +715,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
- if (WARN_ON(upper_32_bits(iova)))
+ if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
@@ -732,7 +730,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
u32 mask;
do {
- ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
+ ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg);
pte = READ_ONCE(*ptep);
ptep = iopte_deref(pte, lvl, data);
} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
@@ -751,15 +749,14 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
{
struct arm_v7s_io_pgtable *data;
- if (cfg->ias > ARM_V7S_ADDR_BITS)
+ if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
return NULL;
- if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
+ if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS))
return NULL;
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_TLBI_ON_MAP |
IO_PGTABLE_QUIRK_ARM_MTK_EXT |
IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
@@ -775,8 +772,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
spin_lock_init(&data->split_lock);
data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
- ARM_V7S_TABLE_SIZE(2),
- ARM_V7S_TABLE_SIZE(2),
+ ARM_V7S_TABLE_SIZE(2, cfg),
+ ARM_V7S_TABLE_SIZE(2, cfg),
ARM_V7S_TABLE_SLAB_FLAGS, NULL);
if (!data->l2_tables)
goto out_free_data;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda8d6de..3d099a31ddca 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2426,9 +2426,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
size -= pgsize;
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain);
-
/* unroll mapping in case something went wrong */
if (ret)
iommu_unmap(domain, orig_iova, orig_size - size);
@@ -2438,18 +2435,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ const struct iommu_ops *ops = domain->ops;
+ int ret;
+
+ ret = __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
+ if (ret == 0 && ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain, iova, size);
+
+ return ret;
+}
+
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
might_sleep();
- return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(iommu_map);
int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
- return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
+ return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
}
EXPORT_SYMBOL_GPL(iommu_map_atomic);
@@ -2533,6 +2543,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
+ const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
@@ -2563,6 +2574,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
sg = sg_next(sg);
}
+ if (ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain, iova, mapped);
return mapped;
out_err:
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 040e85f70861..f0ba6a09b434 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -343,7 +343,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
spin_lock_init(&priv->pgtlock);
priv->cfg = (struct io_pgtable_cfg) {
- .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP,
.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
@@ -490,6 +489,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ struct msm_priv *priv = to_msm_priv(domain);
+
+ __flush_iotlb_range(iova, size, SZ_4K, false, priv);
+}
+
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t len, struct iommu_iotlb_gather *gather)
{
@@ -680,6 +687,7 @@ static struct iommu_ops msm_iommu_ops = {
* kick starting the other master.
*/
.iotlb_sync = NULL,
+ .iotlb_sync_map = msm_iommu_sync_map,
.iova_to_phys = msm_iommu_iova_to_phys,
.probe_device = msm_iommu_probe_device,
.release_device = msm_iommu_release_device,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8e56cec532e7..0ad14a7604b1 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -3,10 +3,12 @@
* Copyright (c) 2015-2016 MediaTek Inc.
* Author: Yong Wu <yong.wu@mediatek.com>
*/
+#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/clk.h>
#include <linux/component.h>
#include <linux/device.h>
+#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/err.h>
#include <linux/interrupt.h>
@@ -20,6 +22,7 @@
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -88,6 +91,9 @@
#define F_REG_MMU1_FAULT_MASK GENMASK(13, 7)
#define REG_MMU0_FAULT_VA 0x13c
+#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12)
+#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9)
+#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6)
#define F_MMU_FAULT_VA_WRITE_BIT BIT(1)
#define F_MMU_FAULT_VA_LAYER_BIT BIT(0)
@@ -103,13 +109,6 @@
#define MTK_PROTECT_PA_ALIGN 256
-/*
- * Get the local arbiter ID and the portid within the larb arbiter
- * from mtk_m4u_id which is defined by MTK_M4U_ID.
- */
-#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf)
-#define MTK_M4U_TO_PORT(id) ((id) & 0x1f)
-
#define HAS_4GB_MODE BIT(0)
/* HW will use the EMI clock if there isn't the "bclk". */
#define HAS_BCLK BIT(1)
@@ -119,6 +118,7 @@
#define HAS_SUB_COMM BIT(5)
#define WR_THROT_EN BIT(6)
#define HAS_LEGACY_IVRP_PADDR BIT(7)
+#define IOVA_34_EN BIT(8)
#define MTK_IOMMU_HAS_FLAG(pdata, _x) \
((((pdata)->flags) & (_x)) == (_x))
@@ -127,11 +127,19 @@ struct mtk_iommu_domain {
struct io_pgtable_cfg cfg;
struct io_pgtable_ops *iop;
+ struct mtk_iommu_data *data;
struct iommu_domain domain;
};
static const struct iommu_ops mtk_iommu_ops;
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data);
+
+#define MTK_IOMMU_TLB_ADDR(iova) ({ \
+ dma_addr_t _addr = iova; \
+ ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\
+})
+
/*
* In M4U 4GB mode, the physical address is remapped as below:
*
@@ -160,6 +168,25 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */
#define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list)
+struct mtk_iommu_iova_region {
+ dma_addr_t iova_base;
+ unsigned long long size;
+};
+
+static const struct mtk_iommu_iova_region single_domain[] = {
+ {.iova_base = 0, .size = SZ_4G},
+};
+
+static const struct mtk_iommu_iova_region mt8192_multi_dom[] = {
+ { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */
+ #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+ { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */
+ { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */
+ { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */
+ { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */
+ #endif
+};
+
/*
* There may be 1 or 2 M4U HWs, But we always expect they are in the same domain
* for the performance.
@@ -182,33 +209,43 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
return container_of(dom, struct mtk_iommu_domain, domain);
}
-static void mtk_iommu_tlb_flush_all(void *cookie)
+static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data)
{
- struct mtk_iommu_data *data = cookie;
-
for_each_m4u(data) {
+ if (pm_runtime_get_if_in_use(data->dev) <= 0)
+ continue;
+
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
data->base + data->plat_data->inv_sel_reg);
writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
wmb(); /* Make sure the tlb flush all done */
+
+ pm_runtime_put(data->dev);
}
}
static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+ size_t granule,
+ struct mtk_iommu_data *data)
{
- struct mtk_iommu_data *data = cookie;
+ bool has_pm = !!data->dev->pm_domain;
unsigned long flags;
int ret;
u32 tmp;
for_each_m4u(data) {
+ if (has_pm) {
+ if (pm_runtime_get_if_in_use(data->dev) <= 0)
+ continue;
+ }
+
spin_lock_irqsave(&data->tlb_lock, flags);
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
data->base + data->plat_data->inv_sel_reg);
- writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
- writel_relaxed(iova + size - 1,
+ writel_relaxed(MTK_IOMMU_TLB_ADDR(iova),
+ data->base + REG_MMU_INVLD_START_A);
+ writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1),
data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
data->base + REG_MMU_INVALIDATE);
@@ -219,36 +256,24 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
- mtk_iommu_tlb_flush_all(cookie);
+ mtk_iommu_tlb_flush_all(data);
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
spin_unlock_irqrestore(&data->tlb_lock, flags);
- }
-}
-
-static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- struct iommu_domain *domain = &data->m4u_dom->domain;
- iommu_iotlb_gather_add_page(domain, gather, iova, granule);
+ if (has_pm)
+ pm_runtime_put(data->dev);
+ }
}
-static const struct iommu_flush_ops mtk_iommu_flush_ops = {
- .tlb_flush_all = mtk_iommu_tlb_flush_all,
- .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync,
- .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
-};
-
static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
{
struct mtk_iommu_data *data = dev_id;
struct mtk_iommu_domain *dom = data->m4u_dom;
- u32 int_state, regval, fault_iova, fault_pa;
unsigned int fault_larb, fault_port, sub_comm = 0;
+ u32 int_state, regval, va34_32, pa34_32;
+ u64 fault_iova, fault_pa;
bool layer, write;
/* Read error info from registers */
@@ -264,6 +289,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
}
layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) {
+ va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova);
+ pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova);
+ fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK;
+ fault_iova |= (u64)va34_32 << 32;
+ fault_pa |= (u64)pa34_32 << 32;
+ }
+
fault_port = F_MMU_INT_ID_PORT_ID(regval);