diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 16:15:51 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 16:15:51 -0800 |
| commit | 13eaa5bda0df8f5c1c4f2a4fb4a0bc20787dcc68 (patch) | |
| tree | de605f4423dc72b5bba5b2e753d314b17df3b583 | |
| parent | 362f533a2a1098fe95020cb59340023e9b11d062 (diff) | |
| parent | 66dc1b791c5839d64d261c8b40250a33e6da050b (diff) | |
| download | linux-13eaa5bda0df8f5c1c4f2a4fb4a0bc20787dcc68.tar.gz linux-13eaa5bda0df8f5c1c4f2a4fb4a0bc20787dcc68.tar.bz2 linux-13eaa5bda0df8f5c1c4f2a4fb4a0bc20787dcc68.zip | |
Merge tag 'iommu-updates-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
- Identity domain support for virtio-iommu
- Move flush queue code into iommu-dma
- Some fixes for AMD IOMMU suspend/resume support when x2apic is used
- Arm SMMU Updates from Will Deacon:
- Revert evtq and priq back to their former sizes
- Return early on short-descriptor page-table allocation failure
- Fix page fault reporting for Adreno GPU on SMMUv2
- Make SMMUv3 MMU notifier ops 'const'
- Numerous new compatible strings for Qualcomm SMMUv2 implementations
- Various smaller fixes and cleanups
* tag 'iommu-updates-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (38 commits)
iommu/iova: Temporarily include dma-mapping.h from iova.h
iommu: Move flush queue data into iommu_dma_cookie
iommu/iova: Move flush queue code to iommu-dma
iommu/iova: Consolidate flush queue code
iommu/vt-d: Use put_pages_list
iommu/amd: Use put_pages_list
iommu/amd: Simplify pagetable freeing
iommu/iova: Squash flush_cb abstraction
iommu/iova: Squash entry_dtor abstraction
iommu/iova: Fix race between FQ timeout and teardown
iommu/amd: Fix typo in *glues … together* in comment
iommu/vt-d: Remove unused dma_to_mm_pfn function
iommu/vt-d: Drop duplicate check in dma_pte_free_pagetable()
iommu/vt-d: Use bitmap_zalloc() when applicable
iommu/amd: Remove useless irq affinity notifier
iommu/amd: X2apic mode: mask/unmask interrupts on suspend/resume
iommu/amd: X2apic mode: setup the INTX registers on mask/unmask
iommu/amd: X2apic mode: re-enable after resume
iommu/amd: Restore GA log/tail pointer on host resume
iommu/iova: Move fast alloc size roundup into alloc_iova_fast()
...
| -rw-r--r-- | Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 | ||||
| -rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 2 | ||||
| -rw-r--r-- | drivers/iommu/amd/init.c | 109 | ||||
| -rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 110 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 | ||||
| -rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 3 | ||||
| -rw-r--r-- | drivers/iommu/dma-iommu.c | 274 | ||||
| -rw-r--r-- | drivers/iommu/intel/iommu.c | 111 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-arm-v7s.c | 6 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 9 | ||||
| -rw-r--r-- | drivers/iommu/iommu.c | 3 | ||||
| -rw-r--r-- | drivers/iommu/iova.c | 209 | ||||
| -rw-r--r-- | drivers/iommu/virtio-iommu.c | 115 | ||||
| -rw-r--r-- | drivers/vdpa/vdpa_user/iova_domain.c | 8 | ||||
| -rw-r--r-- | include/linux/intel-svm.h | 6 | ||||
| -rw-r--r-- | include/linux/iommu.h | 3 | ||||
| -rw-r--r-- | include/linux/iova.h | 68 | ||||
| -rw-r--r-- | include/trace/events/iommu.h | 10 | ||||
| -rw-r--r-- | include/uapi/linux/virtio_iommu.h | 8 |
20 files changed, 493 insertions, 570 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index f66a3effba73..da5381c8ee11 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -38,10 +38,12 @@ properties: - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sdx55-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 867535eb0ce9..ffc89c4fb120 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -645,8 +645,6 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; #endif - /* IRQ notifier for IntCapXT interrupt */ - struct irq_affinity_notify intcapxt_notify; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1eacd43cb436..dc338acf3338 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -806,16 +806,27 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP u32 status, i; + u64 entry; if (!iommu->ga_log) return -EINVAL; - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - /* Check if already running */ - if (status & (MMIO_STATUS_GALOG_RUN_MASK)) + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK))) return 0; + entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, + &entry, sizeof(entry)); + entry = (iommu_virt_to_phys(iommu->ga_log_tail) & + (BIT_ULL(52)-1)) & ~7ULL; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, + &entry, sizeof(entry)); + writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); @@ -825,7 +836,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) break; } - if (i >= LOOP_TIMEOUT) + if (WARN_ON(i >= LOOP_TIMEOUT)) return -EINVAL; #endif /* CONFIG_IRQ_REMAP */ return 0; @@ -834,8 +845,6 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) static int iommu_init_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - u64 entry; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; @@ -849,16 +858,6 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!iommu->ga_log_tail) goto err_out; - entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, - &entry, sizeof(entry)); - entry = (iommu_virt_to_phys(iommu->ga_log_tail) & - (BIT_ULL(52)-1)) & ~7ULL; - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, - &entry, sizeof(entry)); - writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); - return 0; err_out: free_ga_log(iommu); @@ -1523,7 +1522,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) } /* - * This function clues the initialization function for one IOMMU + * This function glues the initialization function for one IOMMU * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards. */ @@ -2016,48 +2015,18 @@ union intcapxt { }; } __attribute__ ((packed)); -/* - * There isn't really any need to mask/unmask at the irqchip level because - * the 64-bit INTCAPXT registers can be updated atomically without tearing - * when the affinity is being updated. - */ -static void intcapxt_unmask_irq(struct irq_data *data) -{ -} - -static void intcapxt_mask_irq(struct irq_data *data) -{ -} static struct irq_chip intcapxt_controller; static int intcapxt_irqdomain_activate(struct irq_domain *domain, struct irq_data *irqd, bool reserve) { - struct amd_iommu *iommu = irqd->chip_data; - struct irq_cfg *cfg = irqd_cfg(irqd); - union intcapxt xt; - - xt.capxt = 0ULL; - xt.dest_mode_logical = apic->dest_mode_logical; - xt.vector = cfg->vector; - xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); - xt.destid_24_31 = cfg->dest_apicid >> 24; - - /** - * Current IOMMU implemtation uses the same IRQ for all - * 3 IOMMU interrupts. - */ - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); return 0; } static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irqd) { - intcapxt_mask_irq(irqd); } @@ -2091,6 +2060,38 @@ static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq irq_domain_free_irqs_top(domain, virq, nr_irqs); } + +static void intcapxt_unmask_irq(struct irq_data *irqd) +{ + struct amd_iommu *iommu = irqd->chip_data; + struct irq_cfg *cfg = irqd_cfg(irqd); + union intcapxt xt; + + xt.capxt = 0ULL; + xt.dest_mode_logical = apic->dest_mode_logical; + xt.vector = cfg->vector; + xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); + xt.destid_24_31 = cfg->dest_apicid >> 24; + + /** + * Current IOMMU implementation uses the same IRQ for all + * 3 IOMMU interrupts. + */ + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); +} + +static void intcapxt_mask_irq(struct irq_data *irqd) +{ + struct amd_iommu *iommu = irqd->chip_data; + + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); +} + + static int intcapxt_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) { @@ -2100,8 +2101,12 @@ static int intcapxt_set_affinity(struct irq_data *irqd, ret = parent->chip->irq_set_affinity(parent, mask, force); if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; + return 0; +} - return intcapxt_irqdomain_activate(irqd->domain, irqd, false); +static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) +{ + return on ? -EOPNOTSUPP : 0; } static struct irq_chip intcapxt_controller = { @@ -2111,7 +2116,8 @@ static struct irq_chip intcapxt_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = intcapxt_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .irq_set_wake = intcapxt_set_wake, + .flags = IRQCHIP_MASK_ON_SUSPEND, }; static const struct irq_domain_ops intcapxt_domain_ops = { @@ -2173,7 +2179,6 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) return ret; } - iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); return 0; } @@ -2196,6 +2201,10 @@ static int iommu_init_irq(struct amd_iommu *iommu) iommu->int_enabled = true; enable_faults: + + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 182c93a43efd..b1bf4125b0f7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -74,87 +74,61 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, * ****************************************************************************/ -static void free_page_list(struct page *freelist) +static void free_pt_page(u64 *pt, struct list_head *freelist) { - while (freelist != NULL) { - unsigned long p = (unsigned long)page_address(freelist); + struct page *p = virt_to_page(pt); - freelist = freelist->freelist; - free_page(p); - } + list_add_tail(&p->lru, freelist); } -static struct page *free_pt_page(unsigned long pt, struct page *freelist) +static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) { - struct page *p = virt_to_page((void *)pt); + u64 *p; + int i; - p->freelist = freelist; + for (i = 0; i < 512; ++i) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; - return p; -} + /* Large PTE? */ + if (PM_PTE_LEVEL(pt[i]) == 0 || + PM_PTE_LEVEL(pt[i]) == 7) + continue; -#define DEFINE_FREE_PT_FN(LVL, FN) \ -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ -{ \ - unsigned long p; \ - u64 *pt; \ - int i; \ - \ - pt = (u64 *)__pt; \ - \ - for (i = 0; i < 512; ++i) { \ - /* PTE present? */ \ - if (!IOMMU_PTE_PRESENT(pt[i])) \ - continue; \ - \ - /* Large PTE? */ \ - if (PM_PTE_LEVEL(pt[i]) == 0 || \ - PM_PTE_LEVEL(pt[i]) == 7) \ - continue; \ - \ - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ - freelist = FN(p, freelist); \ - } \ - \ - return free_pt_page((unsigned long)pt, freelist); \ -} + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = IOMMU_PTE_PAGE(pt[i]); + if (lvl > 2) + free_pt_lvl(p, freelist, lvl - 1); + else + free_pt_page(p, freelist); + } -DEFINE_FREE_PT_FN(l2, free_pt_page) -DEFINE_FREE_PT_FN(l3, free_pt_l2) -DEFINE_FREE_PT_FN(l4, free_pt_l3) -DEFINE_FREE_PT_FN(l5, free_pt_l4) -DEFINE_FREE_PT_FN(l6, free_pt_l5) + free_pt_page(pt, freelist); +} -static struct page *free_sub_pt(unsigned long root, int mode, - struct page *freelist) +static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) { switch (mode) { case PAGE_MODE_NONE: case PAGE_MODE_7_LEVEL: break; case PAGE_MODE_1_LEVEL: - freelist = free_pt_page(root, freelist); + free_pt_page(root, freelist); break; case PAGE_MODE_2_LEVEL: - freelist = free_pt_l2(root, freelist); - break; case PAGE_MODE_3_LEVEL: - freelist = free_pt_l3(root, freelist); - break; case PAGE_MODE_4_LEVEL: - freelist = free_pt_l4(root, freelist); - break; case PAGE_MODE_5_LEVEL: - freelist = free_pt_l5(root, freelist); - break; case PAGE_MODE_6_LEVEL: - freelist = free_pt_l6(root, freelist); + free_pt_lvl(root, freelist, mode); break; default: BUG(); } - - return freelist; } void amd_iommu_domain_set_pgtable(struct protection_domain *domain, @@ -362,9 +336,9 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable, return pte; } -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) +static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) { - unsigned long pt; + u64 *pt; int mode; while (cmpxchg64(pte, pteval, 0) != pteval) { @@ -373,12 +347,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) } if (!IOMMU_PTE_PRESENT(pteval)) - return freelist; + return; - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); + pt = IOMMU_PTE_PAGE(pteval); mode = IOMMU_PTE_MODE(pteval); - return free_sub_pt(pt, mode, freelist); + free_sub_pt(pt, mode, freelist); } /* @@ -392,7 +366,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct protection_domain *dom = io_pgtable_ops_to_domain(ops); - struct page *freelist = NULL; + LIST_HEAD(freelist); bool updated = false; u64 __pte, *pte; int ret, i, count; @@ -412,9 +386,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, goto out; for (i = 0; i < count; ++i) - freelist = free_clear_pte(&pte[i], pte[i], freelist); + free_clear_pte(&pte[i], pte[i], &freelist); - if (freelist != NULL) + if (!list_empty(&freelist)) updated = true; if (count > 1) { @@ -449,7 +423,7 @@ out: } /* Everything flushed out, free pages now */ - free_page_list(freelist); + put_pages_list(&freelist); return ret; } @@ -511,8 +485,7 @@ static void v1_free_pgtable(struct io_pgtable *iop) { struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); struct protection_domain *dom; - struct page *freelist = NULL; - unsigned long root; + LIST_HEAD(freelist); if (pgtable->mode == PAGE_MODE_NONE) return; @@ -529,10 +502,9 @@ static void v1_free_pgtable(struct io_pgtable *iop) BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); + free_sub_pt(pgtable->root, pgtable->mode, &freelist); - free_page_list(freelist); + put_pages_list(&freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index ee66d1f4cb81..a737ba5f727e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -220,7 +220,7 @@ static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) kfree(mn_to_smmu(mn)); } -static struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { +static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { .invalidate_range = arm_smmu_mm_invalidate_range, .release = arm_smmu_mm_release, .free_notifier = arm_smmu_mmu_notifier_free, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 4cb136f07914..cd48590ada30 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -184,7 +184,6 @@ #else #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) #endif -#define Q_MIN_SZ_SHIFT (PAGE_SHIFT) /* * Stream table. @@ -374,7 +373,7 @@ /* Event queue */ #define EVTQ_ENT_SZ_SHIFT 5 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) -#define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) +#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) #define EVTQ_0_ID GENMASK_ULL(7, 0) @@ -400,7 +399,7 @@ /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) -#define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) +#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) #define PRIQ_0_SID GENMASK_ULL(31, 0) #define PRIQ_0_SSID GENMASK_ULL(51, 32) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index ca736b065dd0..ba6298c7140e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -51,7 +51,7 @@ static void qcom_adreno_smmu_get_fault_info(const void *cookie, info->fsynr1 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSYNR1); info->far = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_FAR); info->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx)); - info->ttbr0 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0); + info->ttbr0 = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0); info->contextidr = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_CONTEXTIDR); } @@ -415,6 +415,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, { .compatible = "qcom,sm8350-smmu-500" }, + { .compatible = "qcom,sm8450-smmu-500" }, { } }; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index b42e38a0dbe2..d85d54f2b549 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -9,9 +9,12 @@ */ #include <linux/acpi_iort.h> +#include <linux/atomic.h> +#include <linux/crash_dump.h> #include <linux/device.h> -#include <linux/dma-map-ops.h> +#include <linux/dma-direct.h> #include <linux/dma-iommu.h> +#include <linux/dma-map-ops.h> #include <linux/gfp.h> #include <linux/huge_mm.h> #include <linux/iommu.h> @@ -20,11 +23,10 @@ #include <linux/mm.h> #include <linux/mutex.h> #include <linux/pci.h> -#include <linux/swiotlb.h> #include <linux/scatterlist.h> +#include <linux/spinlock.h> +#include <linux/swiotlb.h> #include <linux/vmalloc.h> -#include <linux/crash_dump.h> -#include <linux/dma-direct.h> struct iommu_dma_msi_page { struct list_head list; @@ -41,7 +43,19 @@ struct iommu_dma_cookie { enum iommu_dma_cookie_type type; union { /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct iova_domain iovad; + struct { + struct iova_domain iovad; + + struct iova_fq __percpu *fq; /* Flush queue */ + /* Number of TLB flushes that have been started */ + atomic64_t fq_flush_start_cnt; + /* Number of TLB flushes that have been finished */ + atomic64_t fq_flush_finish_cnt; + /* Timer to regularily empty the flush queues */ + struct timer_list fq_timer; + /* 1 when timer is active, 0 when not */ + atomic_t fq_timer_on; + }; /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ dma_addr_t msi_iova; }; @@ -64,16 +78,203 @@ static int __init iommu_dma_forcedac_setup(char *str) } early_param("iommu.forcedac", iommu_dma_forcedac_setup); -static void iommu_dma_entry_dtor(unsigned long data) +/* Number of entries per flush queue */ +#define IOVA_FQ_SIZE 256 + +/* Timeout (in ms) after which entries are flushed from the queue */ +#define IOVA_FQ_TIMEOUT 10 + +/* Flush queue entry for deferred flushing */ +struct iova_fq_entry { + unsigned long iova_pfn; + unsigned long pages; + struct list_head freelist; + u64 counter; /* Flush counter when this entry was added */ +}; + +/* Per-CPU flush queue structure */ +struct iova_fq { + struct iova_fq_entry entries[IOVA_FQ_SIZE]; + unsigned int head, tail; + spinlock_t lock; +}; + +#define fq_ring_for_each(i, fq) \ + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + +static inline bool fq_full(struct iova_fq *fq) +{ + assert_spin_locked(&fq->lock); + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); +} + +static inline unsigned int fq_ring_add(struct iova_fq *fq) +{ + unsigned int idx = fq->tail; + + assert_spin_locked(&fq->lock); + + fq->tail = (idx + 1) % IOVA_FQ_SIZE; + + return idx; +} + +static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +{ + u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); + unsigned int idx; + + assert_spin_locked(&fq->lock); + + fq_ring_for_each(idx, fq) { + + if (fq->entries[idx].counter >= counter) + break; + + put_pages_list(&fq->entries[idx].freelist); + free_iova_fast(&cookie->iovad, + fq->entries[idx].iova_pfn, + fq->entries[idx].pages); + + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + } +} + +static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) { - struct page *freelist = (struct page *)data; + atomic64_inc(&cookie->fq_flush_start_cnt); + cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); + atomic64_inc(&cookie->fq_flush_finish_cnt); +} + +static void fq_flush_timeout(struct timer_list *t) +{ + struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); + int cpu; + + atomic_set(&cookie->fq_timer_on, 0); + fq_flush_iotlb(cookie); + + for_each_possible_cpu(cpu) { + unsigned long flags; + struct iova_fq *fq; + + fq = per_cpu_ptr(cookie->fq, cpu); + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free(cookie, fq); + spin_unlock_irqrestore(&fq->lock, flags); + } +} + +static void queue_iova(struct iommu_dma_cookie *cookie, + unsigned long pfn, unsigned long pages, + struct list_head *freelist) +{ + struct iova_fq *fq; + unsigned long flags; + unsigned int idx; + + /* + * Order against the IOMMU driver's pagetable update from unmapping + * @pte, to guarantee that fq_flush_iotlb() observes that if called + * from a different CPU before we release the lock below. Full barrier + * so it also pairs with iommu_dma_init_fq() to avoid seeing partially + * written fq state here. + */ + smp_mb(); - while (freelist) { - unsigned long p = (unsigned long)page_address(freelist); + fq = raw_cpu_ptr(cookie->fq); + spin_lock_irqsave(&fq->lock, flags); + + /* + * First remove all entries from the flush queue that have already been + * flushed out on another CPU. This makes the fq_full() check below less + * likely to be true. + */ + fq_ring_free(cookie, fq); - freelist = freelist->freelist; - free_page(p); + if (fq_full(fq)) { + fq_flush_iotlb(cookie); + fq_ring_free(cookie, fq); } + + idx = fq_ring_add(fq); + + fq->entries[idx].iova_pfn = pfn; + fq->entries[idx].pages = pages; + fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); + list_splice(freelist, &fq->entries[idx].freelist); + + spin_unlock_irqrestore(&fq->lock, flags); + + /* Avoid false sharing as much as possible. */ + if (!atomic_read(&cookie->fq_timer_on) && + !atomic_xchg(&cookie->fq_timer_on, 1)) + mod_timer(&cookie->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); +} + +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +{ + int cpu, idx; + + if (!cookie->fq) + return; + + del_timer_sync(&cookie->fq_timer); + /* The IOVAs will be torn down separately, so just free our queued pages */ + for_each_possible_cpu(cpu) { + struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); + + fq_ring_for_each(idx, fq) + put_pages_list(&fq->entries[idx].freelist); + } + + free_percpu(cookie->fq); +} + +/* sysfs updates are serialised by the mutex of the group owning @domain */ +int iommu_dma_init_fq(struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_fq __percpu *queue; + int i, cpu; + + if (cookie->fq_domain) + return 0; + + atomic64_set(&cookie->fq_flush_start_cnt, 0); + atomic64_set(&cookie->fq_flush_finish_cnt, 0); + + queue = alloc_percpu(struct iova_fq); + if (!queue) { + pr_warn("iova flush queue initialization failed\n"); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) { + struct iova_fq *fq = per_cpu_ptr(queue, cpu); + + fq->head = 0; + fq->tail = 0; + + spin_lock_init(&fq->lock); + + for (i = 0; i < IOVA_FQ_SIZE; i++) + INIT_LIST_HEAD(&fq->entries[i].freelist); + } + + cookie->fq = queue; + + timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); + atomic_set(&cookie->fq_timer_on, 0); + /* + * Prevent incomplete fq state being observable. Pairs with path from + * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() + */ + smp_wmb(); + WRITE_ONCE(cookie->fq_domain, domain); + return 0; } static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) @@ -156,8 +357,10 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (!cookie) return; - if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) + if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { + iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); + } list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { list_del(&msi->list); @@ -294,17 +497,6 @@ static int iova_reserve_iommu_regions(struct device *dev, return ret; } -static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) -{ - struct iommu_dma_cookie *cookie; - struct iommu_domain *domain; - - cookie = container_of(iovad, struct iommu_dma_cookie, iovad); - domain = cookie->fq_domain; - - domain->ops->flush_iotlb_all(domain); -} - static bool dev_is_untrusted(struct device *dev) { return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; @@ -315,30 +507,6 @@ static bool dev_use_swiotlb(struct device *dev) return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev); } -/* sysfs updates are serialised by the mutex of the group owning @domain */ -int iommu_dma_init_fq(struct iommu_domain *domain) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - int ret; - - if (cookie->fq_domain) - return 0; - - ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all, - iommu_dma_entry_dtor); - if (ret) { - pr_warn("iova flush queue initialization failed\n"); - return ret; - } - /* - * Prevent incomplete iovad->fq being observable. Pairs with path from - * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() - */ - smp_wmb(); - WRITE_ONCE(cookie->fq_domain, domain); - return 0; -} - /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -442,14 +610,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, shift = iova_shift(iovad); iova_len = size >> shift; - /* - * Freeing non-power-of-two-sized allocations back into the IOVA caches - * will come back to bite us badly, so we have to waste a bit of space - * rounding up anything cacheable to make sure that can't happen. The - * order of the unadjusted size will still match upon freeing. - */ - if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) - iova_len = roundup_pow_of_two(iova_len); dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); @@ -477,9 +637,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; else if (gather && gather->queued) - queue_iova(iovad, iova_pfn(iovad, iova), + queue_iova(cookie, iova_pfn(iovad, iova), size >> iova_shift(iovad), - (unsigned long)gather->freelist); + &gather->freelist); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b6a8f3282411..92fea3fbbb11 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -133,11 +133,6 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to wor |
