diff options
| author | Will Deacon <will@kernel.org> | 2024-07-04 15:13:53 +0100 |
|---|---|---|
| committer | Will Deacon <will@kernel.org> | 2024-07-04 15:13:53 +0100 |
| commit | ac4e52c69f2d20b13f37397f44fa3d59dc4a87f9 (patch) | |
| tree | 12c02420f8200715b0c56be2671c16b58b2bef84 | |
| parent | a4ce399973d21494eab12e7049f38d47c0c4cb30 (diff) | |
| parent | 0b4eeee2876f2b08442eb32081451bf130e01a4c (diff) | |
| download | linux-ac4e52c69f2d20b13f37397f44fa3d59dc4a87f9.tar.gz linux-ac4e52c69f2d20b13f37397f44fa3d59dc4a87f9.tar.bz2 linux-ac4e52c69f2d20b13f37397f44fa3d59dc4a87f9.zip | |
Merge branch 'for-joerg/arm-smmu/updates' into for-joerg/arm-smmu/next
* for-joerg/arm-smmu/updates: (29 commits)
iommu/arm-smmu-qcom: Register the TBU driver in qcom_smmu_impl_init
iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping
iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc
iommu/io-pgtable-arm: Add read_and_clear_dirty() support
iommu/arm-smmu-v3: Add feature detection for HTTU
iommu/arm-smmu-v3: Add support for domain_alloc_user fn
iommu/arm-smmu-qcom: record reason for deferring probe
iommu/arm-smmu: Pretty-print context fault related regs
iommu/arm-smmu-qcom-debug: Do not print for handled faults
iommu/arm-smmu: Add CB prefix to register bitfields
iommu/arm-smmu-v3: add missing MODULE_DESCRIPTION() macro
iommu/arm-smmu-v3: Shrink the strtab l1_desc array
iommu/arm-smmu-v3: Do not zero the strtab twice
iommu/arm-smmu-v3: Allow setting a S1 domain to a PASID
iommu/arm-smmu-v3: Allow a PASID to be set when RID is IDENTITY/BLOCKED
iommu/arm-smmu-v3: Test the STE S1DSS functionality
iommu/arm-smmu-v3: Allow IDENTITY/BLOCKED to be set while PASID is used
iommu/arm-smmu-v3: Put the SVA mmu notifier in the smmu_domain
iommu/arm-smmu-v3: Keep track of arm_smmu_master_domain for SVA
iommu/arm-smmu-v3: Make SVA allocate a normal arm_smmu_domain
...
| -rw-r--r-- | drivers/iommu/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/Makefile | 5 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 431 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 117 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 759 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 63 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 2 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 83 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 42 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 2 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.c | 74 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.h | 71 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 119 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 3 | ||||
| -rw-r--r-- | include/linux/io-pgtable.h | 3 |
16 files changed, 1117 insertions, 662 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c04584be3089..a82f10054aec 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -394,6 +394,7 @@ config ARM_SMMU_V3 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 014a997753a8..355173d1441d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o -arm_smmu_v3-objs-y += arm-smmu-v3.o -arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o -arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) +arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e490ffb38015..a7c36654dee5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,103 +13,31 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -struct arm_smmu_mmu_notifier { - struct mmu_notifier mn; - struct arm_smmu_ctx_desc *cd; - bool cleared; - refcount_t refs; - struct list_head list; - struct arm_smmu_domain *domain; -}; - -#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) - -struct arm_smmu_bond { - struct mm_struct *mm; - struct arm_smmu_mmu_notifier *smmu_mn; - struct list_head list; -}; - -#define sva_to_bond(handle) \ - container_of(handle, struct arm_smmu_bond, sva) - static DEFINE_MUTEX(sva_lock); -static void +static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; struct arm_smmu_cd target_cd; unsigned long flags; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; - /* S1 domains only support RID attachment right now */ - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); - arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } -/* - * Check if the CPU ASID is available on the SMMU side. If a private context - * descriptor is using it, try to replace it. - */ -static struct arm_smmu_ctx_desc * -arm_smmu_share_asid(struct mm_struct *mm, u16 asid) -{ - int ret; - u32 new_asid; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_device *smmu; - struct arm_smmu_domain *smmu_domain; - - cd = xa_load(&arm_smmu_asid_xa, asid); - if (!cd) - return NULL; - - if (cd->mm) { - if (WARN_ON(cd->mm != mm)) - return ERR_PTR(-EINVAL); - /* All devices bound to this mm use the same cd struct. */ - refcount_inc(&cd->refs); - return cd; - } - - smmu_domain = container_of(cd, struct arm_smmu_domain, cd); - smmu = smmu_domain->smmu; - - ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - if (ret) - return ERR_PTR(-ENOSPC); - /* - * Race with unmap: TLB invalidations will start targeting the new ASID, - * which isn't assigned yet. We'll do an invalidate-all on the old ASID - * later, so it doesn't matter. - */ - cd->asid = new_asid; - /* - * Update ASID and invalidate CD in all associated masters. There will - * be some overlap between use of both ASIDs, until we invalidate the - * TLB. - */ - arm_smmu_update_s1_domain_cd_entry(smmu_domain); - - /* Invalidate TLB entries previously associated with that context */ - arm_smmu_tlb_inv_asid(smmu, asid); - - xa_erase(&arm_smmu_asid_xa, asid); - return NULL; -} - static u64 page_size_to_cd(void) { static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || @@ -187,69 +115,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); -static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) -{ - u16 asid; - int err = 0; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_ctx_desc *ret = NULL; - - /* Don't free the mm until we release the ASID */ - mmgrab(mm); - - asid = arm64_mm_context_get(mm); - if (!asid) { - err = -ESRCH; - goto out_drop_mm; - } - - cd = kzalloc(sizeof(*cd), GFP_KERNEL); - if (!cd) { - err = -ENOMEM; - goto out_put_context; - } - - refcount_set(&cd->refs, 1); - - mutex_lock(&arm_smmu_asid_lock); - ret = arm_smmu_share_asid(mm, asid); - if (ret) { - mutex_unlock(&arm_smmu_asid_lock); - goto out_free_cd; - } - - err = xa_insert(&arm_smmu_asid_xa, asid, cd, GFP_KERNEL); - mutex_unlock(&arm_smmu_asid_lock); - - if (err) - goto out_free_asid; - - cd->asid = asid; - cd->mm = mm; - - return cd; - -out_free_asid: - arm_smmu_free_asid(cd); -out_free_cd: - kfree(cd); -out_put_context: - arm64_mm_context_put(mm); -out_drop_mm: - mmdrop(mm); - return err < 0 ? ERR_PTR(err) : ret; -} - -static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) -{ - if (arm_smmu_free_asid(cd)) { - /* Unpin ASID */ - arm64_mm_context_put(cd->mm); - mmdrop(cd->mm); - kfree(cd); - } -} - /* * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this * is used as a threshold to replace per-page TLBI commands to issue in the @@ -264,8 +129,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); size_t size; /* @@ -282,62 +147,50 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, size = 0; } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { - if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_mn->cd->asid); - else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_mn->cd->asid, - PAGE_SIZE, false, - smmu_domain); - } + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + else + arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid, + PAGE_SIZE, false, smmu_domain); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain(smmu_domain, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - struct arm_smmu_master *master; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); + struct arm_smmu_master_domain *master_domain; unsigned long flags; - mutex_lock(&sva_lock); - if (smmu_mn->cleared) { - mutex_unlock(&sva_lock); - return; - } - /* * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; - cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; - arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + arm_smmu_make_sva_cd(&target, master, NULL, + smmu_domain->cd.asid); + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); - - smmu_mn->cleared = true; - mutex_unlock(&sva_lock); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) { - kfree(mn_to_smmu(mn)); + kfree(container_of(mn, struct arm_smmu_domain, mmu_notifier)); } static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { @@ -346,127 +199,6 @@ static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { .free_notifier = arm_smmu_mmu_notifier_free, }; -/* Allocate or get existing MMU notifier for this {domain, mm} pair */ -static struct arm_smmu_mmu_notifier * -arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_mmu_notifier *smmu_mn; - - list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { - if (smmu_mn->mn.mm == mm) { - refcount_inc(&smmu_mn->refs); - return smmu_mn; - } - } - - cd = arm_smmu_alloc_shared_cd(mm); - if (IS_ERR(cd)) - return ERR_CAST(cd); - - smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL); - if (!smmu_mn) { - ret = -ENOMEM; - goto err_free_cd; - } - - refcount_set(&smmu_mn->refs, 1); - smmu_mn->cd = cd; - smmu_mn->domain = smmu_domain; - smmu_mn->mn.ops = &arm_smmu_mmu_notifier_ops; - - ret = mmu_notifier_register(&smmu_mn->mn, mm); - if (ret) { - kfree(smmu_mn); - goto err_free_cd; - } - - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); - return smmu_mn; - -err_free_cd: - arm_smmu_free_shared_cd(cd); - return ERR_PTR(ret); -} - -static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) -{ - struct mm_struct *mm = smmu_mn->mn.mm; - struct arm_smmu_ctx_desc *cd = smmu_mn->cd; - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - - if (!refcount_dec_and_test(&smmu_mn->refs)) - return; - - list_del(&smmu_mn->list); - - /* - * If we went through clear(), we've already invalidated, and no - * new TLB entry can have been formed. - */ - if (!smmu_mn->cleared) { - arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, - 0); - } - - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); - arm_smmu_free_shared_cd(cd); -} - -static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr; - struct arm_smmu_bond *bond; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain; - - if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return -ENODEV; - smmu_domain = to_smmu_domain(domain); - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return -ENODEV; - - if (!master || !master->sva_enabled) - return -ENODEV; - - bond = kzalloc(sizeof(*bond), GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond->mm = mm; - - bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); - if (IS_ERR(bond->smmu_mn)) { - ret = PTR_ERR(bond->smmu_mn); - goto err_free_bond; - } - - cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm)); - if (!cdptr) { - ret = -ENOMEM; - goto err_put_notifier; - } - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, pasid, cdptr, &target); - - list_add(&bond->list, &master->bonds); - return 0; - -err_put_notifier: - arm_smmu_mmu_notifier_put(bond->smmu_mn); -err_free_bond: - kfree(bond); - return ret; -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -583,11 +315,6 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master) int arm_smmu_master_disable_sva(struct arm_smmu_master *master) { mutex_lock(&sva_lock); - if (!list_empty(&master->bonds)) { - dev_err(master->dev, "cannot disable SVA, device is bound\n"); - mutex_unlock(&sva_lock); - return -EBUSY; - } arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); @@ -604,51 +331,51 @@ void arm_smmu_sva_notifier_synchronize(void) mmu_notifier_synchronize(); } -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id) -{ - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond = NULL, *t; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - mutex_lock(&sva_lock); - - arm_smmu_clear_cd(master, id); - - list_for_each_entry(t, &master->bonds, list) { - if (t->mm == mm) { - bond = t; - break; - } - } - - if (!WARN_ON(!bond)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { - int ret = 0; - struct mm_struct *mm = domain->mm; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_cd target; + int ret; - if (mm_get_enqcmd_pasid(mm) != id) + /* Prevent arm_smmu_mm_release from being called while we are attaching */ + if (!mmget_not_zero(domain->mm)) return -EINVAL; - mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, id, mm); - mutex_unlock(&sva_lock); + /* + * This does not need the arm_smmu_asid_lock because SVA domains never + * get reassigned + */ + arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid); + ret = arm_smmu_set_pasid(master, smmu_domain, id, &target); + mmput(domain->mm); return ret; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(domain); + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + /* + * Ensure the ASID is empty in the iommu cache before allowing reuse. + */ + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + + /* + * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can + * still be called/running at this point. We allow the ASID to be + * reused, and if there is a race then it just suffers harmless + * unnecessary invalidation. + */ + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + + /* + * Actual free is defered to the SRCU callback + * arm_smmu_mmu_notifier_free() + */ + mmu_notifier_put(&smmu_domain->mmu_notifier); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -656,14 +383,38 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free }; -struct iommu_domain *arm_smmu_sva_domain_alloc(void) +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { - struct iommu_domain *domain; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_domain *smmu_domain; + u32 asid; + int ret; + + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); + smmu_domain->domain.type = IOMMU_DOMAIN_SVA; + smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + smmu_domain->smmu = smmu; + + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + goto err_free; + + smmu_domain->cd.asid = asid; + smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; + ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); + if (ret) + goto err_asid; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - domain->ops = &arm_smmu_sva_domain_ops; + return &smmu_domain->domain; - return domain; +err_asid: + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 315e487fd990..cceb737a7001 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -144,6 +144,14 @@ static void arm_smmu_v3_test_ste_expect_transition( KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); } +static void arm_smmu_v3_test_ste_expect_non_hitless_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_ste_expect_transition(test, cur, target, + num_syncs_expected, false); +} + static void arm_smmu_v3_test_ste_expect_hitless_transition( struct kunit *test, const struct arm_smmu_ste *cur, const struct arm_smmu_ste *target, unsigned int num_syncs_expected) @@ -155,6 +163,7 @@ static void arm_smmu_v3_test_ste_expect_hitless_transition( static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, + unsigned int s1dss, const dma_addr_t dma_addr) { struct arm_smmu_master master = { @@ -164,7 +173,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master); + arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -194,7 +203,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -203,7 +213,8 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -212,7 +223,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -221,17 +233,59 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } +static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + + /* + * Flipping s1dss on a CD table STE only involves changes to the second + * qword of an STE and can be done in a single write. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(1)); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &ste, NUM_EXPECTED_SYNCS(1)); +} + +static void +arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void +arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); +} + static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, bool ats_enabled) { struct arm_smmu_master master = { .smmu = &smmu, - .ats_enabled = ats_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -247,7 +301,7 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3; io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4; - arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain); + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain, ats_enabled); } static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) @@ -286,6 +340,48 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) NUM_EXPECTED_SYNCS(2)); } +static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste ste_2; + + /* + * Although no flow resembles this in practice, one way to force an STE + * update to be non-hitless is to change its CD table pointer as well as + * s1 dss field in the same update. + */ + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, + 0x4B4B4b4B4B); + arm_smmu_v3_test_ste_expect_non_hitless_transition( + test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_test_cd_expect_transition( struct kunit *test, const struct arm_smmu_cd *cur, const struct arm_smmu_cd *target, unsigned int num_syncs_expected, @@ -439,10 +535,16 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_cdtable), KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_s1dss_change), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_abort), KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_s2), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1), + KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), @@ -465,4 +567,5 @@ static struct kunit_suite arm_smmu_v3_test_module = { kunit_test_suites(&arm_smmu_v3_test_module); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_DESCRIPTION("KUnit tests for arm-smmu-v3 driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab415e107054..a31460f9f3d4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -27,6 +27,7 @@ #include <linux/pci-ats.h> #include <linux/platform_device.h> #include <kunit/visibility.h> +#include <uapi/linux/iommufd.h> #include "arm-smmu-v3.h" #include "../../dma-iommu.h" @@ -36,6 +37,9 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); +static struct iommu_ops arm_smmu_ops; +static struct iommu_dirty_ops arm_smmu_dirty_ops; + enum arm_smmu_msi_index { EVTQ_MSI_INDEX, GERROR_MSI_INDEX, @@ -80,7 +84,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { }; static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu); + struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -991,6 +995,14 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + + /* + * See 13.5 Summary of attribute/permission configuration fields + * for the SHCFG behavior. + */ + if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == + STRTAB_STE_1_S1DSS_BYPASS) + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* S2 translates */ @@ -1211,8 +1223,8 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; } -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1289,6 +1301,8 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target) { + bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); + bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); struct arm_smmu_cd_writer cd_writer = { .writer = { .ops = &arm_smmu_cd_writer_ops, @@ -1297,6 +1311,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, .ssid = ssid, }; + if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { + if (cur_valid) + master->cd_table.used_ssids--; + else + master->cd_table.used_ssids++; + } + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); } @@ -1331,6 +1352,12 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_0_ASET | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) ); + + /* To enable dirty flag update, set both Access flag and dirty state update */ + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | + CTXDESC_CD_0_TCR_HD); + target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); @@ -1430,30 +1457,13 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) cd_table->cdtab = NULL; } -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) -{ - bool free; - struct arm_smmu_ctx_desc *old_cd; |
