summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2021-02-02 13:35:40 +0100
committerJoerg Roedel <jroedel@suse.de>2021-02-02 13:35:40 +0100
commitd1e3306ba83a839462d239ab6dff45ec10f8ce2b (patch)
tree9f4a9be46aec7d321a377429aed3bed726390598
parent6ee1d745b7c9fd573fba142a2efdad76a9f1cb04 (diff)
parent7060377ce06f9cd3ed6274c0f2310463feb5baec (diff)
downloadlinux-d1e3306ba83a839462d239ab6dff45ec10f8ce2b.tar.gz
linux-d1e3306ba83a839462d239ab6dff45ec10f8ce2b.tar.bz2
linux-d1e3306ba83a839462d239ab6dff45ec10f8ce2b.zip
Merge tag 'arm-smmu-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
Arm SMMU updates for 5.12 - Support for MT8192 IOMMU from Mediatek - Arm v7s io-pgtable extensions for MT8192 - Removal of TLBI_ON_MAP quirk - New Qualcomm compatible strings - Allow SVA without hardware broadcast TLB maintenance on SMMUv3 - Virtualization Host Extension support for SMMUv3 (SVA) - Allow SMMUv3 PMU (perf) driver to be built independently from IOMMU - Misc cleanups
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml2
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.txt105
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml183
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c154
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h14
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c5
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c65
-rw-r--r--drivers/iommu/iommu.c23
-rw-r--r--drivers/iommu/msm_iommu.c10
-rw-r--r--drivers/iommu/mtk_iommu.c409
-rw-r--r--drivers/iommu/mtk_iommu.h12
-rw-r--r--drivers/iommu/tegra-gart.c7
-rw-r--r--drivers/memory/mtk-smi.c8
-rw-r--r--drivers/perf/Kconfig2
-rw-r--r--include/dt-bindings/memory/mt2701-larb-port.h4
-rw-r--r--include/dt-bindings/memory/mt2712-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt6779-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8167-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8173-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8183-larb-port.h6
-rw-r--r--include/dt-bindings/memory/mt8192-larb-port.h243
-rw-r--r--include/dt-bindings/memory/mtk-memory-port.h15
-rw-r--r--include/linux/io-pgtable.h17
-rw-r--r--include/linux/iommu.h7
-rw-r--r--include/soc/mediatek/smi.h3
27 files changed, 981 insertions, 356 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 3b63f2ae24db..6ba161dea4d8 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -34,9 +34,11 @@ properties:
items:
- enum:
- qcom,sc7180-smmu-500
+ - qcom,sc8180x-smmu-500
- qcom,sdm845-smmu-500
- qcom,sm8150-smmu-500
- qcom,sm8250-smmu-500
+ - qcom,sm8350-smmu-500
- const: arm,mmu-500
- description: Qcom Adreno GPUs implementing "arm,smmu-v2"
items:
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
deleted file mode 100644
index ac949f7fe3d4..000000000000
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-* Mediatek IOMMU Architecture Implementation
-
- Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and
-this M4U have two generations of HW architecture. Generation one uses flat
-pagetable, and only supports 4K size page mapping. Generation two uses the
-ARM Short-Descriptor translation table format for address translation.
-
- About the M4U Hardware Block Diagram, please check below:
-
- EMI (External Memory Interface)
- |
- m4u (Multimedia Memory Management Unit)
- |
- +--------+
- | |
- gals0-rx gals1-rx (Global Async Local Sync rx)
- | |
- | |
- gals0-tx gals1-tx (Global Async Local Sync tx)
- | | Some SoCs may have GALS.
- +--------+
- |
- SMI Common(Smart Multimedia Interface Common)
- |
- +----------------+-------
- | |
- | gals-rx There may be GALS in some larbs.
- | |
- | |
- | gals-tx
- | |
- SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
- (display) (vdec)
- | |
- | |
- +-----+-----+ +----+----+
- | | | | | |
- | | |... | | | ... There are different ports in each larb.
- | | | | | |
-OVL0 RDMA0 WDMA0 MC PP VLD
-
- As above, The Multimedia HW will go through SMI and M4U while it
-access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
-smi local arbiter and smi common. It will control whether the Multimedia
-HW should go though the m4u for translation or bypass it and talk
-directly with EMI. And also SMI help control the power domain and clocks for
-each local arbiter.
- Normally we specify a local arbiter(larb) for each multimedia HW
-like display, video decode, and camera. And there are different ports
-in each larb. Take a example, There are many ports like MC, PP, VLD in the
-video decode local arbiter, all these ports are according to the video HW.
- In some SoCs, there may be a GALS(Global Async Local Sync) module between
-smi-common and m4u, and additional GALS module between smi-larb and
-smi-common. GALS can been seen as a "asynchronous fifo" which could help
-synchronize for the modules in different clock frequency.
-
-Required properties:
-- compatible : must be one of the following string:
- "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
- "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW.
- "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW.
- "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
- generation one m4u HW.
- "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW.
- "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
- "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
-- reg : m4u register base and size.
-- interrupts : the interrupt of m4u.
-- clocks : must contain one entry for each clock-names.
-- clock-names : Only 1 optional clock:
- - "bclk": the block clock of m4u.
- Here is the list which require this "bclk":
- - mt2701, mt2712, mt7623 and mt8173.
- Note that m4u use the EMI clock which always has been enabled before kernel
- if there is no this "bclk".
-- mediatek,larbs : List of phandle to the local arbiters in the current Socs.
- Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
- according to the local arbiter index, like larb0, larb1, larb2...
-- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
- Specifies the mtk_m4u_id as defined in
- dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
- dt-binding/memory/mt2712-larb-port.h for mt2712,
- dt-binding/memory/mt6779-larb-port.h for mt6779,
- dt-binding/memory/mt8167-larb-port.h for mt8167,
- dt-binding/memory/mt8173-larb-port.h for mt8173, and
- dt-binding/memory/mt8183-larb-port.h for mt8183.
-
-Example:
- iommu: iommu@10205000 {
- compatible = "mediatek,mt8173-m4u";
- reg = <0 0x10205000 0 0x1000>;
- interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&infracfg CLK_INFRA_M4U>;
- clock-names = "bclk";
- mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>;
- #iommu-cells = <1>;
- };
-
-Example for a client device:
- display {
- compatible = "mediatek,mt8173-disp";
- iommus = <&iommu M4U_PORT_DISP_OVL0>,
- <&iommu M4U_PORT_DISP_RDMA0>;
- ...
- };
diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
new file mode 100644
index 000000000000..0f26fe14c8e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek IOMMU Architecture Implementation
+
+maintainers:
+ - Yong Wu <yong.wu@mediatek.com>
+
+description: |+
+ Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and
+ this M4U have two generations of HW architecture. Generation one uses flat
+ pagetable, and only supports 4K size page mapping. Generation two uses the
+ ARM Short-Descriptor translation table format for address translation.
+
+ About the M4U Hardware Block Diagram, please check below:
+
+ EMI (External Memory Interface)
+ |
+ m4u (Multimedia Memory Management Unit)
+ |
+ +--------+
+ | |
+ gals0-rx gals1-rx (Global Async Local Sync rx)
+ | |
+ | |
+ gals0-tx gals1-tx (Global Async Local Sync tx)
+ | | Some SoCs may have GALS.
+ +--------+
+ |
+ SMI Common(Smart Multimedia Interface Common)
+ |
+ +----------------+-------
+ | |
+ | gals-rx There may be GALS in some larbs.
+ | |
+ | |
+ | gals-tx
+ | |
+ SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb).
+ (display) (vdec)
+ | |
+ | |
+ +-----+-----+ +----+----+
+ | | | | | |
+ | | |... | | | ... There are different ports in each larb.
+ | | | | | |
+ OVL0 RDMA0 WDMA0 MC PP VLD
+
+ As above, The Multimedia HW will go through SMI and M4U while it
+ access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
+ smi local arbiter and smi common. It will control whether the Multimedia
+ HW should go though the m4u for translation or bypass it and talk
+ directly with EMI. And also SMI help control the power domain and clocks for
+ each local arbiter.
+
+ Normally we specify a local arbiter(larb) for each multimedia HW
+ like display, video decode, and camera. And there are different ports
+ in each larb. Take a example, There are many ports like MC, PP, VLD in the
+ video decode local arbiter, all these ports are according to the video HW.
+
+ In some SoCs, there may be a GALS(Global Async Local Sync) module between
+ smi-common and m4u, and additional GALS module between smi-larb and
+ smi-common. GALS can been seen as a "asynchronous fifo" which could help
+ synchronize for the modules in different clock frequency.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - mediatek,mt2701-m4u # generation one
+ - mediatek,mt2712-m4u # generation two
+ - mediatek,mt6779-m4u # generation two
+ - mediatek,mt8167-m4u # generation two
+ - mediatek,mt8173-m4u # generation two
+ - mediatek,mt8183-m4u # generation two
+ - mediatek,mt8192-m4u # generation two
+
+ - description: mt7623 generation one
+ items:
+ - const: mediatek,mt7623-m4u
+ - const: mediatek,mt2701-m4u
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: bclk is the block clock.
+
+ clock-names:
+ items:
+ - const: bclk
+
+ mediatek,larbs:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ minItems: 1
+ maxItems: 32
+ description: |
+ List of phandle to the local arbiters in the current Socs.
+ Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort
+ according to the local arbiter index, like larb0, larb1, larb2...
+
+ '#iommu-cells':
+ const: 1
+ description: |
+ This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as
+ defined in
+ dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623,
+ dt-binding/memory/mt2712-larb-port.h for mt2712,
+ dt-binding/memory/mt6779-larb-port.h for mt6779,
+ dt-binding/memory/mt8167-larb-port.h for mt8167,
+ dt-binding/memory/mt8173-larb-port.h for mt8173,
+ dt-binding/memory/mt8183-larb-port.h for mt8183,
+ dt-binding/memory/mt8192-larb-port.h for mt8192.
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - mediatek,larbs
+ - '#iommu-cells'
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - mediatek,mt2701-m4u
+ - mediatek,mt2712-m4u
+ - mediatek,mt8173-m4u
+ - mediatek,mt8192-m4u
+
+ then:
+ required:
+ - clocks
+
+ - if:
+ properties:
+ compatible:
+ enum:
+ - mediatek,mt8192-m4u
+
+ then:
+ required:
+ - power-domains
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ iommu: iommu@10205000 {
+ compatible = "mediatek,mt8173-m4u";
+ reg = <0x10205000 0x1000>;
+ interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&infracfg CLK_INFRA_M4U>;
+ clock-names = "bclk";
+ mediatek,larbs = <&larb0 &larb1 &larb2
+ &larb3 &larb4 &larb5>;
+ #iommu-cells = <1>;
+ };
+
+ - |
+ #include <dt-bindings/memory/mt8173-larb-port.h>
+
+ /* Example for a client device */
+ display {
+ compatible = "mediatek,mt8173-disp";
+ iommus = <&iommu M4U_PORT_DISP_OVL0>,
+ <&iommu M4U_PORT_DISP_RDMA0>;
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index 992fe3b0900a..b50eb5320189 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11178,6 +11178,15 @@ S: Maintained
F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
F: drivers/i2c/busses/i2c-mt65xx.c
+MEDIATEK IOMMU DRIVER
+M: Yong Wu <yong.wu@mediatek.com>
+L: iommu@lists.linux-foundation.org
+L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+S: Supported
+F: Documentation/devicetree/bindings/iommu/mediatek*
+F: drivers/iommu/mtk-iommu*
+F: include/dt-bindings/memory/mt*-port.h
+
MEDIATEK JPEG DRIVER
M: Rick Chang <rick.chang@mediatek.com>
M: Bin Liu <bin.liu@mediatek.com>
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index e13b092e6004..bb251cab61f3 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -182,9 +182,13 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
unsigned long start, unsigned long end)
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
+ struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
+ size_t size = end - start + 1;
- arm_smmu_atc_inv_domain(smmu_mn->domain, mm->pasid, start,
- end - start + 1);
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
+ arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
+ PAGE_SIZE, false, smmu_domain);
+ arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size);
}
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -391,7 +395,7 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
unsigned long reg, fld;
unsigned long oas;
unsigned long asid_bits;
- u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY;
+ u32 feat_mask = ARM_SMMU_FEAT_COHERENCY;
if (vabits_actual == 52)
feat_mask |= ARM_SMMU_FEAT_VAX;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8ca7415d785d..8594b4a83043 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -88,15 +88,6 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
-static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
- struct arm_smmu_device *smmu)
-{
- if (offset > SZ_64K)
- return smmu->page1 + offset - SZ_64K;
-
- return smmu->base + offset;
-}
-
static void parse_driver_options(struct arm_smmu_device *smmu)
{
int i = 0;
@@ -272,9 +263,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
break;
case CMDQ_OP_TLBI_NH_VA:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+ fallthrough;
+ case CMDQ_OP_TLBI_EL2_VA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
@@ -296,6 +289,9 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
+ case CMDQ_OP_TLBI_EL2_ASID:
+ cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+ break;
case CMDQ_OP_ATC_INV:
cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
@@ -886,7 +882,8 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_TLBI_NH_ASID,
+ .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
.tlbi.asid = asid,
};
@@ -1269,13 +1266,16 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
}
if (s1_cfg) {
+ u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
+ STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
+
BUG_ON(ste_live);
dst[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
- FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
+ FIELD_PREP(STRTAB_STE_1_STRW, strw));
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
@@ -1667,40 +1667,28 @@ static void arm_smmu_tlb_inv_context(void *cookie)
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
-static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
- size_t granule, bool leaf,
- struct arm_smmu_domain *smmu_domain)
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ size_t granule,
+ struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_device *smmu = smmu_domain->smmu;
- unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
+ unsigned long end = iova + size, num_pages = 0, tg = 0;
size_t inv_range = granule;
struct arm_smmu_cmdq_batch cmds = {};
- struct arm_smmu_cmdq_ent cmd = {
- .tlbi = {
- .leaf = leaf,
- },
- };
if (!size)
return;
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- cmd.opcode = CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
- } else {
- cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
- }
-
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
- cmd.tlbi.tg = (tg - 10) / 2;
+ cmd->tlbi.tg = (tg - 10) / 2;
/* Determine what level the granule is at */
- cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
num_pages = size >> tg;
}
@@ -1718,11 +1706,11 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
- cmd.tlbi.scale = scale;
+ cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
- cmd.tlbi.num = num - 1;
+ cmd->tlbi.num = num - 1;
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
@@ -1731,17 +1719,54 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
num_pages -= num << scale;
}
- cmd.tlbi.addr = iova;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ cmd->tlbi.addr = iova;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
iova += inv_range;
}
arm_smmu_cmdq_batch_submit(smmu, &cmds);
+}
+
+static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .tlbi = {
+ .leaf = leaf,
+ },
+ };
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
+ cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ }
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
/*
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
- arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
+ arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
+}
+
+void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
+ .tlbi = {
+ .asid = asid,
+ .leaf = leaf,
+ },
+ };
+
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
}
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
@@ -1757,7 +1782,7 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
- arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
+ arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
}
static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -2280,8 +2305,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
- gather->pgsize, true, smmu_domain);
+ arm_smmu_tlb_inv_range_domain(gather->start,
+ gather->end - gather->start + 1,
+ gather->pgsize, true, smmu_domain);
}
static phys_addr_t
@@ -2611,6 +2637,7 @@ static struct iommu_ops arm_smmu_ops = {
/* Probing and initialisation functions */
static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
struct arm_smmu_queue *q,
+ void __iomem *page,
unsigned long prod_off,
unsigned long cons_off,
size_t dwords, const char *name)
@@ -2639,8 +2666,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1 << q->llq.max_n_shift, name);
}
- q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
- q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
+ q->prod_reg = page + prod_off;
+ q->cons_reg = page + cons_off;
q->ent_dwords = dwords;
q->q_base = Q_BASE_RWA;
@@ -2684,9 +2711,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
int ret;
/* cmdq */
- ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
- ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
- "cmdq");
+ ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
+ ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
+ CMDQ_ENT_DWORDS, "cmdq");
if (ret)
return ret;
@@ -2695,9 +2722,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
return ret;
/* evtq */
- ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
- ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
- "evtq");
+ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
+ ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
+ EVTQ_ENT_DWORDS, "evtq");
if (ret)
return ret;
@@ -2705,9 +2732,9 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
- return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
- ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
- "priq");
+ return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
+ ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
+ PRIQ_ENT_DWORDS, "priq");
}
static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -3060,7 +3087,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
/* CR2 (random crap) */
- reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
+ reg = CR2_PTM | CR2_RECINVSID;
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H)
+ reg |= CR2_E2H;
+
writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
/* Stream table */
@@ -3099,10 +3130,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
- writel_relaxed(smmu->evtq.q.llq.prod,
- arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
- writel_relaxed(smmu->evtq.q.llq.cons,
- arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
+ writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
+ writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
enables |= CR0_EVTQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -3117,9 +3146,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writeq_relaxed(smmu->priq.q.q_base,
smmu->base + ARM_SMMU_PRIQ_BASE);
writel_relaxed(smmu->priq.q.llq.prod,
- arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
+ smmu->page1 + ARM_SMMU_PRIQ_PROD);
writel_relaxed(smmu->priq.q.llq.cons,
- arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
+ smmu->page1 + ARM_SMMU_PRIQ_CONS);
enables |= CR0_PRIQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -3221,8 +3250,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->options |= ARM_SMMU_OPT_MSIPOLL;
}
- if (reg & IDR0_HYP)
+ if (reg & IDR0_HYP) {
smmu->features |= ARM_SMMU_FEAT_HYP;
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ smmu->features |= ARM_SMMU_FEAT_E2H;
+ }
/*
* The coherency feature as set by FW is used in preference to the ID
@@ -3489,11 +3521,7 @@ err_reset_pci_ops: __maybe_unused;
static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
resource_size_t size)
{
- struct resource res = {
- .flags = IORESOURCE_MEM,
- .start = start,
- .end = start + size - 1,
- };
+ struct resource res = DEFINE_RES_MEM(start, size);
return devm_ioremap_resource(dev, &res);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 96c2e9565e00..f985817c967a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -139,15 +139,15 @@
#define ARM_SMMU_CMDQ_CONS 0x9c
#define ARM_SMMU_EVTQ_BASE 0xa0
-#define ARM_SMMU_EVTQ_PROD 0x100a8
-#define ARM_SMMU_EVTQ_CONS 0x100ac
+#define ARM_SMMU_EVTQ_PROD 0xa8
+#define ARM_SMMU_EVTQ_CONS 0xac
#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
#define ARM_SMMU_PRIQ_BASE 0xc0
-#define ARM_SMMU_PRIQ_PROD 0x100c8
-#define ARM_SMMU_PRIQ_CONS 0x100cc
+#define ARM_SMMU_PRIQ_PROD 0xc8
+#define ARM_SMMU_PRIQ_CONS 0xcc
#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
@@ -430,6 +430,8 @@ struct arm_smmu_cmdq_ent {
#define CMDQ_OP_TLBI_NH_ASID 0x11
#define CMDQ_OP_TLBI_NH_VA 0x12
#define CMDQ_OP_TLBI_EL2_ALL 0x20
+ #define CMDQ_OP_TLBI_EL2_ASID 0x21
+ #define CMDQ_OP_TLBI_EL2_VA 0x22
#define CMDQ_OP_TLBI_S12_VMALL 0x28
#define CMDQ_OP_TLBI_S2_IPA 0x2a
#define CMDQ_OP_TLBI_NSNH_ALL 0x30
@@ -604,6 +606,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_RANGE_INV (1 << 15)
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
+#define ARM_SMMU_FEAT_E2H (1 << 18)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -694,6 +697,9 @@ extern struct arm_smmu_ctx_desc quiet_cd;
int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
+void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain);
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)