diff options
| -rw-r--r-- | Documentation/arm64/silicon-errata.txt | 5 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 12 | ||||
| -rw-r--r-- | drivers/acpi/arm64/iort.c | 83 | ||||
| -rw-r--r-- | drivers/iommu/Kconfig | 6 | ||||
| -rw-r--r-- | drivers/iommu/amd_iommu.c | 458 | ||||
| -rw-r--r-- | drivers/iommu/amd_iommu_init.c | 44 | ||||
| -rw-r--r-- | drivers/iommu/amd_iommu_types.h | 3 | ||||
| -rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 229 | ||||
| -rw-r--r-- | drivers/iommu/arm-smmu.c | 64 | ||||
| -rw-r--r-- | drivers/iommu/dma-iommu.c | 2 | ||||
| -rw-r--r-- | drivers/iommu/intel-iommu.c | 26 | ||||
| -rw-r--r-- | drivers/iommu/intel-svm.c | 30 | ||||
| -rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 4 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-arm-v7s.c | 183 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 189 | ||||
| -rw-r--r-- | drivers/iommu/io-pgtable.h | 6 | ||||
| -rw-r--r-- | drivers/iommu/iommu.c | 17 | ||||
| -rw-r--r-- | drivers/iommu/iova.c | 30 | ||||
| -rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 353 | ||||
| -rw-r--r-- | drivers/iommu/omap-iommu.c | 2 | ||||
| -rw-r--r-- | drivers/iommu/s390-iommu.c | 15 | ||||
| -rw-r--r-- | include/linux/intel-svm.h | 20 |
22 files changed, 1233 insertions, 548 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index f5f93dca54b7..66e8ce14d23d 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -61,12 +61,15 @@ stable kernels. | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | -| Cavium | ThunderX SMMUv2 | #27704 | N/A | | Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 | +| Cavium | ThunderX SMMUv2 | #27704 | N/A | +| Cavium | ThunderX2 SMMUv3| #74 | N/A | +| Cavium | ThunderX2 SMMUv3| #126 | N/A | | | | | | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | | | | | | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | +| Hisilicon | Hip0{6,7} | #161010701 | N/A | | | | | | | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index be57550e14e4..c9abbf3e4f68 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -26,6 +26,12 @@ the PCIe specification. * "priq" - PRI Queue not empty * "cmdq-sync" - CMD_SYNC complete * "gerror" - Global Error activated + * "combined" - The combined interrupt is optional, + and should only be provided if the + hardware supports just a single, + combined interrupt line. + If provided, then the combined interrupt + will be used in preference to any others. - #iommu-cells : See the generic IOMMU binding described in devicetree/bindings/pci/pci-iommu.txt @@ -49,6 +55,12 @@ the PCIe specification. - hisilicon,broken-prefetch-cmd : Avoid sending CMD_PREFETCH_* commands to the SMMU. +- cavium,cn9900-broken-page1-regspace + : Replaces all page 1 offsets used for EVTQ_PROD/CONS, + PRIQ_PROD/CONS register access with page 0 offsets. + Set for Cavium ThunderX2 silicon that doesn't support + SMMU page1 register space. + ** Example smmu@2b400000 { diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index d048f72c23f8..a3215ee671c1 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -31,6 +31,11 @@ #define IORT_IOMMU_TYPE ((1 << ACPI_IORT_NODE_SMMU) | \ (1 << ACPI_IORT_NODE_SMMU_V3)) +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 +#endif + struct iort_its_msi_chip { struct list_head list; struct fwnode_handle *fw_node; @@ -819,6 +824,36 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node) return num_res; } +static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu) +{ + /* + * Cavium ThunderX2 implementation doesn't not support unique + * irq line. Use single irq line for all the SMMUv3 interrupts. + */ + if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + return false; + + /* + * ThunderX2 doesn't support MSIs from the SMMU, so we're checking + * SPI numbers here. + */ + return smmu->event_gsiv == smmu->pri_gsiv && + smmu->event_gsiv == smmu->gerr_gsiv && + smmu->event_gsiv == smmu->sync_gsiv; +} + +static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu) +{ + /* + * Override the size, for Cavium ThunderX2 implementation + * which doesn't support the page 1 SMMU register space. + */ + if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + return SZ_64K; + + return SZ_128K; +} + static void __init arm_smmu_v3_init_resources(struct resource *res, struct acpi_iort_node *node) { @@ -829,30 +864,38 @@ static void __init arm_smmu_v3_init_resources(struct resource *res, smmu = (struct acpi_iort_smmu_v3 *)node->node_data; res[num_res].start = smmu->base_address; - res[num_res].end = smmu->base_address + SZ_128K - 1; + res[num_res].end = smmu->base_address + + arm_smmu_v3_resource_size(smmu) - 1; res[num_res].flags = IORESOURCE_MEM; num_res++; + if (arm_smmu_v3_is_combined_irq(smmu)) { + if (smmu->event_gsiv) + acpi_iort_register_irq(smmu->event_gsiv, "combined", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + } else { - if (smmu->event_gsiv) - acpi_iort_register_irq(smmu->event_gsiv, "eventq", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); - - if (smmu->pri_gsiv) - acpi_iort_register_irq(smmu->pri_gsiv, "priq", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); - - if (smmu->gerr_gsiv) - acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); - - if (smmu->sync_gsiv) - acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); + if (smmu->event_gsiv) + acpi_iort_register_irq(smmu->event_gsiv, "eventq", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + + if (smmu->pri_gsiv) + acpi_iort_register_irq(smmu->pri_gsiv, "priq", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + + if (smmu->gerr_gsiv) + acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + + if (smmu->sync_gsiv) + acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + } } static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6ee3a25ae731..f73ff28f77e2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) + depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG config ROCKCHIP_IOMMU bool "Rockchip IOMMU Support" - depends on ARM + depends on ARM || ARM64 depends on ARCH_ROCKCHIP || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU @@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARM_LPAE + depends on ARM || IOMMU_DMA depends on ARCH_RENESAS || COMPILE_TEST select IOMMU_API select IOMMU_IO_PGTABLE_LPAE diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f16d0f26ee24..688e77576e5a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -91,25 +91,6 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - struct dma_ops_domain *dma_dom; -}; - -struct flush_queue { - spinlock_t lock; - unsigned next; - struct flush_queue_entry *entries; -}; - -static DEFINE_PER_CPU(struct flush_queue, flush_queue); - -static atomic_t queue_timer_on; -static struct timer_list queue_timer; - /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -140,6 +121,8 @@ struct iommu_dev_data { PPR completions */ u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ + + struct ratelimit_state rs; /* Ratelimit IOPF messages */ }; /* @@ -155,6 +138,20 @@ static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +#define FLUSH_QUEUE_SIZE 256 + +struct flush_queue_entry { + unsigned long iova_pfn; + unsigned long pages; + u64 counter; /* Flush counter when this entry was added to the queue */ +}; + +struct flush_queue { + struct flush_queue_entry *entries; + unsigned head, tail; + spinlock_t lock; +}; + /* * Data container for a dma_ops specific protection domain */ @@ -164,6 +161,36 @@ struct dma_ops_domain { /* IOVA RB-Tree */ struct iova_domain iovad; + + struct flush_queue __percpu *flush_queue; + + /* + * We need two counter here to be race-free wrt. IOTLB flushing and + * adding entries to the flush queue. + * + * The flush_start_cnt is incremented _before_ the IOTLB flush starts. + * New entries added to the flush ring-buffer get their 'counter' value + * from here. This way we can make sure that entries added to the queue + * (or other per-cpu queues of the same domain) while the TLB is about + * to be flushed are not considered to be flushed already. + */ + atomic64_t flush_start_cnt; + + /* + * The flush_finish_cnt is incremented when an IOTLB flush is complete. + * This value is always smaller than flush_start_cnt. The queue_add + * function frees all IOVAs that have a counter value smaller than + * flush_finish_cnt. This makes sure that we only free IOVAs that are + * flushed out of the IOTLB of the domain. + */ + atomic64_t flush_finish_cnt; + + /* + * Timer to make sure we don't keep IOVAs around unflushed + * for too long + */ + struct timer_list flush_timer; + atomic_t flush_timer_on; }; static struct iova_domain reserved_iova_ranges; @@ -255,6 +282,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) list_add_tail(&dev_data->dev_data_list, &dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + ratelimit_default_init(&dev_data->rs); + return dev_data; } @@ -553,6 +582,29 @@ static void dump_command(unsigned long phys_addr) pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); } +static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, + u64 address, int flags) +{ + struct iommu_dev_data *dev_data = NULL; + struct pci_dev *pdev; + + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (pdev) + dev_data = get_dev_data(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", + domain_id, address, flags); + } else if (printk_ratelimit()) { + pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domain_id, address, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { int type, devid, domid, flags; @@ -577,7 +629,12 @@ retry: goto retry; } - printk(KERN_ERR "AMD-Vi: Event logged ["); + if (type == EVENT_TYPE_IO_FAULT) { + amd_iommu_report_page_fault(devid, domid, address, flags); + return; + } else { + printk(KERN_ERR "AMD-Vi: Event logged ["); + } switch (type) { case EVENT_TYPE_ILL_DEV: @@ -587,12 +644,6 @@ retry: address, flags); dump_dte_entry(devid); break; - case EVENT_TYPE_IO_FAULT: - printk("IO_PAGE_FAULT device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); - break; case EVENT_TYPE_DEV_TAB_ERR: printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", @@ -850,19 +901,20 @@ static int wait_on_sem(volatile u64 *sem) } static void copy_cmd_to_buffer(struct amd_iommu *iommu, - struct iommu_cmd *cmd, - u32 tail) + struct iommu_cmd *cmd) { u8 *target; - target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + target = iommu->cmd_buf + iommu->cmd_buf_tail; + + iommu->cmd_buf_tail += sizeof(*cmd); + iommu->cmd_buf_tail %= CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); /* Tell the IOMMU about it */ - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } static void build_completion_wait(struct iommu_cmd *cmd, u64 address) @@ -1020,33 +1072,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, struct iommu_cmd *cmd, bool sync) { - u32 left, tail, head, next_tail; + unsigned int count = 0; + u32 left, next_tail; + next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; again: - - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; - left = (head - next_tail) % CMD_BUFFER_SIZE; + left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; if (left <= 0x20) { - struct iommu_cmd sync_cmd; - int ret; - - iommu->cmd_sem = 0; + /* Skip udelay() the first time around */ + if (count++) { + if (count == LOOP_TIMEOUT) { + pr_err("AMD-Vi: Command buffer timeout\n"); + return -EIO; + } - build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); - copy_cmd_to_buffer(iommu, &sync_cmd, tail); + udelay(1); + } - if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) - return ret; + /* Update head and recheck remaining space */ + iommu->cmd_buf_head = readl(iommu->mmio_base + + MMIO_CMD_HEAD_OFFSET); goto again; } - copy_cmd_to_buffer(iommu, cmd, tail); + copy_cmd_to_buffer(iommu, cmd); - /* We need to sync now to make sure all commands are processed */ + /* Do we need to make sure all commands are processed? */ iommu->need_sync = sync; return 0; @@ -1735,6 +1788,180 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } +static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + kfree(queue->entries); + } + + free_percpu(dom->flush_queue); + + dom->flush_queue = NULL; +} + +static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) +{ + int cpu; + + atomic64_set(&dom->flush_start_cnt, 0); + atomic64_set(&dom->flush_finish_cnt, 0); + + dom->flush_queue = alloc_percpu(struct flush_queue); + if (!dom->flush_queue) + return -ENOMEM; + + /* First make sure everything is cleared */ + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + queue->head = 0; + queue->tail = 0; + queue->entries = NULL; + } + + /* Now start doing the allocation */ + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), + GFP_KERNEL); + if (!queue->entries) { + dma_ops_domain_free_flush_queue(dom); + return -ENOMEM; + } + + spin_lock_init(&queue->lock); + } + + return 0; +} + +static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) +{ + atomic64_inc(&dom->flush_start_cnt); + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); + atomic64_inc(&dom->flush_finish_cnt); +} + +static inline bool queue_ring_full(struct flush_queue *queue) +{ + assert_spin_locked(&queue->lock); + + return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); +} + +#define queue_ring_for_each(i, q) \ + for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) + +static inline unsigned queue_ring_add(struct flush_queue *queue) +{ + unsigned idx = queue->tail; + + assert_spin_locked(&queue->lock); + queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; + + return idx; +} + +static inline void queue_ring_remove_head(struct flush_queue *queue) +{ + assert_spin_locked(&queue->lock); + queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; +} + +static void queue_ring_free_flushed(struct dma_ops_domain *dom, + struct flush_queue *queue) +{ + u64 counter = atomic64_read(&dom->flush_finish_cnt); + int idx; + + queue_ring_for_each(idx, queue) { + /* + * This assumes that counter values in the ring-buffer are + * monotonously rising. + */ + if (queue->entries[idx].counter >= counter) + break; + + free_iova_fast(&dom->iovad, + queue->entries[idx].iova_pfn, + queue->entries[idx].pages); + + queue_ring_remove_head(queue); + } +} + +static void queue_add(struct dma_ops_domain *dom, + unsigned long address, unsigned long pages) +{ + struct flush_queue *queue; + unsigned long flags; + int idx; + + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + + queue = get_cpu_ptr(dom->flush_queue); + spin_lock_irqsave(&queue->lock, flags); + + /* + * First remove the enries from the ring-buffer that are already + * flushed to make the below queue_ring_full() check less likely + */ + queue_ring_free_flushed(dom, queue); + + /* + * When ring-queue is full, flush the entries from the IOTLB so + * that we can free all entries with queue_ring_free_flushed() + * below. + */ + if (queue_ring_full(queue)) { + dma_ops_domain_flush_tlb(dom); + queue_ring_free_flushed(dom, queue); + } + + idx = queue_ring_add(queue); + + queue->entries[idx].iova_pfn = address; + queue->entries[idx].pages = pages; + queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); + + spin_unlock_irqrestore(&queue->lock, flags); + + if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) + mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); + + put_cpu_ptr(dom->flush_queue); +} + +static void queue_flush_timeout(unsigned long data) +{ + struct dma_ops_domain *dom = (struct dma_ops_domain *)data; + int cpu; + + atomic_set(&dom->flush_timer_on, 0); + + dma_ops_domain_flush_tlb(dom); + + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + unsigned long flags; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + spin_lock_irqsave(&queue->lock, flags); + queue_ring_free_flushed(dom, queue); + spin_unlock_irqrestore(&queue->lock, flags); + } +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -1746,6 +1973,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); + if (timer_pending(&dom->flush_timer)) + del_timer(&dom->flush_timer); + + dma_ops_domain_free_flush_queue(dom); + put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -1784,6 +2016,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) /* Initialize reserved ranges */ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); + if (dma_ops_domain_alloc_flush_queue(dma_dom)) + goto free_dma_dom; + + setup_timer(&dma_dom->flush_timer, queue_flush_timeout, + (unsigned long)dma_dom); + + atomic_set(&dma_dom->flush_timer_on, 0); + add_domain_to_list(&dma_dom->domain); return dma_dom; @@ -1846,7 +2086,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - flags &= ~(0xffffUL); + + flags &= ~(DTE_FLAG_SA | 0xffffULL); flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; @@ -2227,92 +2468,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -static void __queue_flush(struct flush_queue *queue) -{ - struct protection_domain *domain; - unsigned long flags; - int idx; - - /* First flush TLB of all known domains */ - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_for_each_entry(domain, &amd_iommu_pd_list, list) - domain_flush_tlb(domain); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); - - /* Wait until flushes have completed */ - domain_flush_complete(NULL); - - for (idx = 0; idx < queue->next; ++idx) { - struct flush_queue_entry *entry; - - entry = queue->entries + idx; - - free_iova_fast(&entry->dma_dom->iovad, - entry->iova_pfn, - entry->pages); - - /* Not really necessary, just to make sure we catch any bugs */ - entry->dma_dom = NULL; - } - - queue->next = 0; -} - -static void queue_flush_all(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(&flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - if (queue->next > 0) - __queue_flush(queue); - spin_unlock_irqrestore(&queue->lock, flags); - } -} - -static void queue_flush_timeout(unsigned long unsused) -{ - atomic_set(&queue_timer_on, 0); - queue_flush_all(); -} - -static void queue_add(struct dma_ops_domain *dma_dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue_entry *entry; - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(&flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - if (queue->next == FLUSH_QUEUE_SIZE) - __queue_flush(queue); - - idx = queue->next++; - entry = queue->entries + idx; - - entry->iova_pfn = address; - entry->pages = pages; - entry->dma_dom = dma_dom; - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) - mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(&flush_queue); -} - - /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -2807,7 +2962,7 @@ static int init_reserved_iova_ranges(void) int __init amd_iommu_init_api(void) { - int ret, cpu, err = 0; + int ret, err = 0; ret = iova_cache_get(); if (ret) @@ -2817,18 +2972,6 @@ int __init amd_iommu_init_api(void) if (ret) return ret; - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * - sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) - goto out_put_iova; - - spin_lock_init(&queue->lock); - } - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; @@ -2840,23 +2983,12 @@ int __init amd_iommu_init_api(void) err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); if (err) return err; - return 0; - -out_put_iova: - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - - kfree(queue->entries); - } - return -ENOMEM; + return 0; } int __init amd_iommu_init_dma_ops(void) { - setup_timer(&queue_timer, queue_flush_timeout, 0); - atomic_set(&queue_timer_on, 0); - swiotlb = iommu_pass_through ? 1 : 0; iommu_detected = 1; @@ -3012,12 +3144,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) switch (dom->type) { case IOMMU_DOMAIN_DMA: - /* - * First make sure the domain is no longer referenced from the - * flush queue - */ - queue_flush_all(); - /* Now release the domain */ dma_dom = to_dma_ops_domain(domain); dma_ops_domain_free(dma_dom); @@ -4281,7 +4407,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain, irte_info->index); } -static struct irq_domain_ops amd_ir_domain_ops = { +static const struct irq_domain_ops amd_ir_domain_ops = { .alloc = irq_remapping_alloc, .free = irq_remapping_free, .activate = irq_remapping_activate, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5a11328f4d98..5cc597b383c7 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -29,6 +29,7 @@ #include <linux/export.h> #include <linux/iommu.h> #include <linux/kmemleak.h> +#include <linux/crash_dump.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -236,6 +237,7 @@ enum iommu_init_state { IOMMU_INITIALIZED, IOMMU_NOT_FOUND, IOMMU_INIT_ERROR, + IOMMU_CMDLINE_DISABLED, }; /* Early ioapic and hpet maps from kernel command line */ @@ -588,6 +590,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + iommu->cmd_buf_head = 0; + iommu->cmd_buf_tail = 0; iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); } @@ -1898,6 +1902,14 @@ static void init_device_table_dma(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + /* + * In kdump kernels in-flight DMA from the old kernel might + * cause IO_PAGE_FAULTs. There are no reports that a kdump + * actually failed because of that, so just disable fault + * reporting in the hardware to get rid of the messages + */ + if (is_kdump_kernel()) + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } @@ -2097,23 +2109,27 @@ static struct syscore_ops amd_iommu_syscore_ops = { .resume = amd_iommu_resume, }; -static void __init free_on_init_error(void) +static void __init free_iommu_resources(void) { kmemleak_free(irq_lookup_table); free_pages((unsigned long)irq_lookup_table, get_order(rlookup_table_size)); + irq_lookup_table = NULL; kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); + amd_iommu_rlookup_table = NULL; free_pages((unsigned long)amd_iommu_alias_table, get_order(alias_table_size)); + amd_iommu_alias_table = NULL; free_pages((unsigned long)amd_iommu_dev_table, get_order(dev_table_size)); + amd_iommu_dev_table = NULL; free_iommu_all(); @@ -2183,6 +2199,7 @@ static void __init free_dma_resources(void) { free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); + amd_iommu_pd_alloc_bitmap = NULL; free_unity_maps(); } @@ -2307,6 +2324,9 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* Disable any previously enabled IOMMUs */ + disable_iommus(); + if (amd_iommu_irq_remap) amd_iommu_irq_remap = check_ioapic_information(); @@ -2410,6 +2430,13 @@ static int __init state_next(void) case IOMMU_IVRS_DETECTED: ret = early_amd_iommu_init(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; + if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { + pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n"); + free_dma_resources(); + free_iommu_resources(); + init_state = IOMMU_CMDLINE_DISABLED; + ret = -EINVAL; + } break; case IOMMU_ACPI_FINISHED: early_enable_iommus(); @@ -2438,6 +2465,7 @@ static int __init state_next(void) break; case IOMMU_NOT_FOUND: case IOMMU_INIT_ERROR: + case IOMMU_CMDLINE_DISABLED: /* Error states => do nothing */ ret = -EINVAL; break; @@ -2451,13 +2479,14 @@ static int __init state_next(void) static int __init iommu_go_to_state(enum iommu_init_state state) { - int ret = 0; + int ret = -EINVAL; while (init_state != state) { - ret = state_next(); - if (init_state == IOMMU_NOT_FOUND || - init_state == IOMMU_INIT_ERROR) + if (init_state == IOMMU_NOT_FOUND || + init_state == IOMMU_INIT_ERROR || + init_stat |
