summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2020-06-09 15:03:03 +0200
committerJoerg Roedel <jroedel@suse.de>2020-06-10 17:46:43 +0200
commit672cf6df9b8a3a6d70a6a5c30397f76fa40d3178 (patch)
treec6af5ef7ab0bf60f3472f1dcd5d96f325f90bc68 /drivers/iommu/intel-iommu.c
parentad8694bac410e5d72ad610dfb146f58bf2fe0365 (diff)
downloadlinux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.tar.gz
linux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.tar.bz2
linux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.zip
iommu/vt-d: Move Intel IOMMU driver into subdirectory
Move all files related to the Intel IOMMU driver into its own subdirectory. Signed-off-by: Joerg Roedel <jroedel@suse.de> Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20200609130303.26974-3-joro@8bytes.org
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c6207
1 files changed, 0 insertions, 6207 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
deleted file mode 100644
index 648a785e078a..000000000000
--- a/drivers/iommu/intel-iommu.c
+++ /dev/null
@@ -1,6207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2006-2014 Intel Corporation.
- *
- * Authors: David Woodhouse <dwmw2@infradead.org>,
- * Ashok Raj <ashok.raj@intel.com>,
- * Shaohua Li <shaohua.li@intel.com>,
- * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
- * Fenghua Yu <fenghua.yu@intel.com>
- * Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt) "DMAR: " fmt
-#define dev_fmt(fmt) pr_fmt(fmt)
-
-#include <linux/init.h>
-#include <linux/bitmap.h>
-#include <linux/debugfs.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/dmar.h>
-#include <linux/dma-mapping.h>
-#include <linux/mempool.h>
-#include <linux/memory.h>
-#include <linux/cpu.h>
-#include <linux/timer.h>
-#include <linux/io.h>
-#include <linux/iova.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/syscore_ops.h>
-#include <linux/tboot.h>
-#include <linux/dmi.h>
-#include <linux/pci-ats.h>
-#include <linux/memblock.h>
-#include <linux/dma-contiguous.h>
-#include <linux/dma-direct.h>
-#include <linux/crash_dump.h>
-#include <linux/numa.h>
-#include <linux/swiotlb.h>
-#include <asm/irq_remapping.h>
-#include <asm/cacheflush.h>
-#include <asm/iommu.h>
-#include <trace/events/intel_iommu.h>
-
-#include "irq_remapping.h"
-#include "intel-pasid.h"
-
-#define ROOT_SIZE VTD_PAGE_SIZE
-#define CONTEXT_SIZE VTD_PAGE_SIZE
-
-#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
-#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
-#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
-#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
-
-#define IOAPIC_RANGE_START (0xfee00000)
-#define IOAPIC_RANGE_END (0xfeefffff)
-#define IOVA_START_ADDR (0x1000)
-
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
-
-#define MAX_AGAW_WIDTH 64
-#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
-
-#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
-
-/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
- to match. That way, we can use 'unsigned long' for PFNs with impunity. */
-#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
- __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
-#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
-/* page table handling */
-#define LEVEL_STRIDE (9)
-#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
-
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
- */
-#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
-
-static inline int agaw_to_level(int agaw)
-{
- return agaw + 2;
-}
-
-static inline int agaw_to_width(int agaw)
-{
- return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
-}
-
-static inline int width_to_agaw(int width)
-{
- return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
-}
-
-static inline unsigned int level_to_offset_bits(int level)
-{
- return (level - 1) * LEVEL_STRIDE;
-}
-
-static inline int pfn_level_offset(unsigned long pfn, int level)
-{
- return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
-}
-
-static inline unsigned long level_mask(int level)
-{
- return -1UL << level_to_offset_bits(level);
-}
-
-static inline unsigned long level_size(int level)
-{
- return 1UL << level_to_offset_bits(level);
-}
-
-static inline unsigned long align_to_level(unsigned long pfn, int level)
-{
- return (pfn + level_size(level) - 1) & level_mask(level);
-}
-
-static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
-{
- return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
-}
-
-/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
- are never going to work. */
-static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
-{
- return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
-}
-
-static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
-{
- return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
-}
-static inline unsigned long page_to_dma_pfn(struct page *pg)
-{
- return mm_to_dma_pfn(page_to_pfn(pg));
-}
-static inline unsigned long virt_to_dma_pfn(void *p)
-{
- return page_to_dma_pfn(virt_to_page(p));
-}
-
-/* global iommu list, set NULL for ignored DMAR units */
-static struct intel_iommu **g_iommus;
-
-static void __init check_tylersburg_isoch(void);
-static int rwbf_quirk;
-
-/*
- * set to 1 to panic kernel if can't successfully enable VT-d
- * (used when kernel is launched w/ TXT)
- */
-static int force_on = 0;
-int intel_iommu_tboot_noforce;
-static int no_platform_optin;
-
-#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-
-/*
- * Take a root_entry and return the Lower Context Table Pointer (LCTP)
- * if marked present.
- */
-static phys_addr_t root_entry_lctp(struct root_entry *re)
-{
- if (!(re->lo & 1))
- return 0;
-
- return re->lo & VTD_PAGE_MASK;
-}
-
-/*
- * Take a root_entry and return the Upper Context Table Pointer (UCTP)
- * if marked present.
- */
-static phys_addr_t root_entry_uctp(struct root_entry *re)
-{
- if (!(re->hi & 1))
- return 0;
-
- return re->hi & VTD_PAGE_MASK;
-}
-
-static inline void context_clear_pasid_enable(struct context_entry *context)
-{
- context->lo &= ~(1ULL << 11);
-}
-
-static inline bool context_pasid_enabled(struct context_entry *context)
-{
- return !!(context->lo & (1ULL << 11));
-}
-
-static inline void context_set_copied(struct context_entry *context)
-{
- context->hi |= (1ull << 3);
-}
-
-static inline bool context_copied(struct context_entry *context)
-{
- return !!(context->hi & (1ULL << 3));
-}
-
-static inline bool __context_present(struct context_entry *context)
-{
- return (context->lo & 1);
-}
-
-bool context_present(struct context_entry *context)
-{
- return context_pasid_enabled(context) ?
- __context_present(context) :
- __context_present(context) && !context_copied(context);
-}
-
-static inline void context_set_present(struct context_entry *context)
-{
- context->lo |= 1;
-}
-
-static inline void context_set_fault_enable(struct context_entry *context)
-{
- context->lo &= (((u64)-1) << 2) | 1;
-}
-
-static inline void context_set_translation_type(struct context_entry *context,
- unsigned long value)
-{
- context->lo &= (((u64)-1) << 4) | 3;
- context->lo |= (value & 3) << 2;
-}
-
-static inline void context_set_address_root(struct context_entry *context,
- unsigned long value)
-{
- context->lo &= ~VTD_PAGE_MASK;
- context->lo |= value & VTD_PAGE_MASK;
-}
-
-static inline void context_set_address_width(struct context_entry *context,
- unsigned long value)
-{
- context->hi |= value & 7;
-}
-
-static inline void context_set_domain_id(struct context_entry *context,
- unsigned long value)
-{
- context->hi |= (value & ((1 << 16) - 1)) << 8;
-}
-
-static inline int context_domain_id(struct context_entry *c)
-{
- return((c->hi >> 8) & 0xffff);
-}
-
-static inline void context_clear_entry(struct context_entry *context)
-{
- context->lo = 0;
- context->hi = 0;
-}
-
-/*
- * This domain is a statically identity mapping domain.
- * 1. This domain creats a static 1:1 mapping to all usable memory.
- * 2. It maps to each iommu if successful.
- * 3. Each iommu mapps to this domain if successful.
- */
-static struct dmar_domain *si_domain;
-static int hw_pass_through = 1;
-
-#define for_each_domain_iommu(idx, domain) \
- for (idx = 0; idx < g_num_of_iommus; idx++) \
- if (domain->iommu_refcnt[idx])
-
-struct dmar_rmrr_unit {
- struct list_head list; /* list of rmrr units */
- struct acpi_dmar_header *hdr; /* ACPI header */
- u64 base_address; /* reserved base address*/
- u64 end_address; /* reserved end address */
- struct dmar_dev_scope *devices; /* target devices */
- int devices_cnt; /* target device count */
-};
-
-struct dmar_atsr_unit {
- struct list_head list; /* list of ATSR units */
- struct acpi_dmar_header *hdr; /* ACPI header */
- struct dmar_dev_scope *devices; /* target devices */
- int devices_cnt; /* target device count */
- u8 include_all:1; /* include all ports */
-};
-
-static LIST_HEAD(dmar_atsr_units);
-static LIST_HEAD(dmar_rmrr_units);
-
-#define for_each_rmrr_units(rmrr) \
- list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-
-/* bitmap for indexing intel_iommus */
-static int g_num_of_iommus;
-
-static void domain_exit(struct dmar_domain *domain);
-static void domain_remove_dev_info(struct dmar_domain *domain);
-static void dmar_remove_one_dev_info(struct device *dev);
-static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static int intel_iommu_attach_device(struct iommu_domain *domain,
- struct device *dev);
-static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
- dma_addr_t iova);
-
-#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
-int dmar_disabled = 0;
-#else
-int dmar_disabled = 1;
-#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
-
-#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
-int intel_iommu_sm = 1;
-#else
-int intel_iommu_sm;
-#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
-
-int intel_iommu_enabled = 0;
-EXPORT_SYMBOL_GPL(intel_iommu_enabled);
-
-static int dmar_map_gfx = 1;
-static int dmar_forcedac;
-static int intel_iommu_strict;
-static int intel_iommu_superpage = 1;
-static int iommu_identity_mapping;
-static int intel_no_bounce;
-
-#define IDENTMAP_GFX 2
-#define IDENTMAP_AZALIA 4
-
-int intel_iommu_gfx_mapped;
-EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
-
-#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
-#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
-struct device_domain_info *get_domain_info(struct device *dev)
-{
- struct device_domain_info *info;
-
- if (!dev)
- return NULL;
-
- info = dev->archdata.iommu;
- if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
- info == DEFER_DEVICE_DOMAIN_INFO))
- return NULL;
-
- return info;
-}
-
-DEFINE_SPINLOCK(device_domain_lock);
-static LIST_HEAD(device_domain_list);
-
-#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
- to_pci_dev(d)->untrusted)
-
-/*
- * Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element.
- */
-int for_each_device_domain(int (*fn)(struct device_domain_info *info,
- void *data), void *data)
-{
- int ret = 0;
- unsigned long flags;
- struct device_domain_info *info;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry(info, &device_domain_list, global) {
- ret = fn(info, data);
- if (ret) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
- return ret;
- }
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- return 0;
-}
-
-const struct iommu_ops intel_iommu_ops;
-
-static bool translation_pre_enabled(struct intel_iommu *iommu)
-{
- return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
-}
-
-static void clear_translation_pre_enabled(struct intel_iommu *iommu)
-{
- iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
-}
-
-static void init_translation_status(struct intel_iommu *iommu)
-{
- u32 gsts;
-
- gsts = readl(iommu->reg + DMAR_GSTS_REG);
- if (gsts & DMA_GSTS_TES)
- iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
-}
-
-static int __init intel_iommu_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- while (*str) {
- if (!strncmp(str, "on", 2)) {
- dmar_disabled = 0;
- pr_info("IOMMU enabled\n");
- } else if (!strncmp(str, "off", 3)) {
- dmar_disabled = 1;
- no_platform_optin = 1;
- pr_info("IOMMU disabled\n");
- } else if (!strncmp(str, "igfx_off", 8)) {
- dmar_map_gfx = 0;
- pr_info("Disable GFX device mapping\n");
- } else if (!strncmp(str, "forcedac", 8)) {
- pr_info("Forcing DAC for PCI devices\n");
- dmar_forcedac = 1;
- } else if (!strncmp(str, "strict", 6)) {
- pr_info("Disable batched IOTLB flush\n");
- intel_iommu_strict = 1;
- } else if (!strncmp(str, "sp_off", 6)) {
- pr_info("Disable supported super page\n");
- intel_iommu_superpage = 0;
- } else if (!strncmp(str, "sm_on", 5)) {
- pr_info("Intel-IOMMU: scalable mode supported\n");
- intel_iommu_sm = 1;
- } else if (!strncmp(str, "tboot_noforce", 13)) {
- pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
- intel_iommu_tboot_noforce = 1;
- } else if (!strncmp(str, "nobounce", 8)) {
- pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
- intel_no_bounce = 1;
- }
-
- str += strcspn(str, ",");
- while (*str == ',')
- str++;
- }
- return 0;
-}
-__setup("intel_iommu=", intel_iommu_setup);
-
-static struct kmem_cache *iommu_domain_cache;
-static struct kmem_cache *iommu_devinfo_cache;
-
-static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
-{
- struct dmar_domain **domains;
- int idx = did >> 8;
-
- domains = iommu->domains[idx];
- if (!domains)
- return NULL;
-
- return domains[did & 0xff];
-}
-
-static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
- struct dmar_domain *domain)
-{
- struct dmar_domain **domains;
- int idx = did >> 8;
-
- if (!iommu->domains[idx]) {
- size_t size = 256 * sizeof(struct dmar_domain *);
- iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
- }
-
- domains = iommu->domains[idx];
- if (WARN_ON(!domains))
- return;
- else
- domains[did & 0xff] = domain;
-}
-
-void *alloc_pgtable_page(int node)
-{
- struct page *page;
- void *vaddr = NULL;
-
- page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
- if (page)
- vaddr = page_address(page);
- return vaddr;
-}
-
-void free_pgtable_page(void *vaddr)
-{
- free_page((unsigned long)vaddr);
-}
-
-static inline void *alloc_domain_mem(void)
-{
- return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
-}
-
-static void free_domain_mem(void *vaddr)
-{
- kmem_cache_free(iommu_domain_cache, vaddr);
-}
-
-static inline void * alloc_devinfo_mem(void)
-{
- return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
-}
-
-static inline void free_devinfo_mem(void *vaddr)
-{
- kmem_cache_free(iommu_devinfo_cache, vaddr);
-}
-
-static inline int domain_type_is_si(struct dmar_domain *domain)
-{
- return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
-}
-
-static inline bool domain_use_first_level(struct dmar_domain *domain)
-{
- return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
-}
-
-static inline int domain_pfn_supported(struct dmar_domain *domain,
- unsigned long pfn)
-{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
-
- return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
-}
-
-static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
-{
- unsigned long sagaw;
- int agaw = -1;
-
- sagaw = cap_sagaw(iommu->cap);
- for (agaw = width_to_agaw(max_gaw);
- agaw >= 0; agaw--) {
- if (test_bit(agaw, &sagaw))
- break;
- }
-
- return agaw;
-}
-
-/*
- * Calculate max SAGAW for each iommu.
- */
-int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
-{
- return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
-}
-
-/*
- * calculate agaw for each iommu.
- * "SAGAW" may be different across iommus, use a default agaw, and
- * get a supported less agaw for iommus that don't support the default agaw.
- */
-int iommu_calculate_agaw(struct intel_iommu *iommu)
-{
- return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-}
-
-/* This functionin only returns single iommu in a domain */
-struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
-{
- int iommu_id;
-
- /* si_domain and vm domain should not get here. */
- if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
- return NULL;
-
- for_each_domain_iommu(iommu_id, domain)
- break;
-
- if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
- return NULL;
-
- return g_iommus[iommu_id];
-}
-
-static void domain_update_iommu_coherency(struct dmar_domain *domain)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool found = false;
- int i;
-
- domain->iommu_coherency = 1;
-
- for_each_domain_iommu(i, domain) {
- found = true;
- if (!ecap_coherent(g_iommus[i]->ecap)) {
- domain->iommu_coherency = 0;
- break;
- }
- }
- if (found)
- return;
-
- /* No hardware attached; use lowest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!ecap_coherent(iommu->ecap)) {
- domain->iommu_coherency = 0;
- break;
- }
- }
- rcu_read_unlock();
-}
-
-static int domain_update_iommu_snooping(struct intel_iommu *skip)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- int ret = 1;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (iommu != skip) {
- if (!ecap_sc_support(iommu->ecap)) {
- ret = 0;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-static int domain_update_iommu_superpage(struct dmar_domain *domain,
- struct intel_iommu *skip)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- int mask = 0x3;
-
- if (!intel_iommu_superpage) {
- return 0;
- }
-
- /* set iommu_superpage to the smallest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (iommu != skip) {
- if (domain && domain_use_first_level(domain)) {
- if (!cap_fl1gp_support(iommu->cap))
- mask = 0x1;
- } else {
- mask &= cap_super_page_val(iommu->cap);
- }
-
- if (!mask)
- break;
- }
- }
- rcu_read_unlock();
-
- return fls(mask);
-}
-
-/* Some capabilities may be different across iommus */
-static void domain_update_iommu_cap(struct dmar_domain *domain)
-{
- domain_update_iommu_coherency(domain);
- domain->iommu_snooping = domain_update_iommu_snooping(NULL);
- domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
-}
-
-struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
- u8 devfn, int alloc)
-{
- struct root_entry *root = &iommu->root_entry[bus];
- struct context_entry *context;
- u64 *entry;
-
- entry = &root->lo;
- if (sm_supported(iommu)) {
- if (devfn >= 0x80) {
- devfn -= 0x80;
- entry = &root->hi;
- }
- devfn *= 2;
- }
- if (*entry & 1)
- context = phys_to_virt(*entry & VTD_PAGE_MASK);
- else {
- unsigned long phy_addr;
- if (!alloc)
- return NULL;
-
- context = alloc_pgtable_page(iommu->node);
- if (!context)
- return NULL;
-
- __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
- phy_addr = virt_to_phys((void *)context);
- *entry = phy_addr | 1;
- __iommu_flush_cache(iommu, entry, sizeof(*entry));
- }
- return &context[devfn];
-}
-
-static int iommu_dummy(struct device *dev)
-{
- return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
-}
-
-static bool attach_deferred(struct device *dev)
-{
- return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
-}
-
-/**
- * is_downstream_to_pci_bridge - test if a device belongs to the PCI
- * sub-hierarchy of a candidate PCI-PCI bridge
- * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
- * @bridge: the candidate PCI-PCI bridge
- *
- * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
- */
-static bool
-is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
-{
- struct pci_dev *pdev, *pbridge;
-
- if (!dev_is_pci(dev) || !dev_is_pci(bridge))
- return false;
-
- pdev = to_pci_dev(dev);
- pbridge = to_pci_dev(bridge);
-
- if (pbridge->subordinate &&
- pbridge->subordinate->number <= pdev->bus->number &&
- pbridge->subordinate->busn_res.end >= pdev->bus->number)
- return true;
-
- return false;
-}
-
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
-{
- struct dmar_drhd_unit *drhd = NULL;
- struct intel_iommu *iommu;
- struct device *tmp;
- struct pci_dev *pdev = NULL;
- u16 segment = 0;
- int i;
-
- if (iommu_dummy(dev))
- return NULL;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pf_pdev;
-
- pdev = pci_real_dma_dev(to_pci_dev(dev));
-
- /* VFs aren't listed in scope tables; we need to look up
- * the PF instead to find the IOMMU. */
- pf_pdev = pci_physfn(pdev);
- dev = &pf_pdev->dev;
- segment = pci_domain_nr(pdev->bus);
- } else if (has_acpi_companion(dev))
- dev = &ACPI_COMPANION(dev)->dev;
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (pdev && segment != drhd->segment)
- continue;
-
- for_each_active_dev_scope(drhd->devices,
- drhd->devices_cnt, i, tmp) {
- if (tmp == dev) {
- /* For a VF use its original BDF# not that of the PF
- * which we used for the IOMMU lookup. Strictly speaking
- * we could do this for all PCI devices; we only need to
- * get the BDF# from the scope table for ACPI matches. */
- if (pdev && pdev->is_virtfn)
- goto got_pdev;
-
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
- goto out;
- }
-
- if (is_downstream_to_pci_bridge(dev, tmp))
- goto got_pdev;
- }
-
- if (pdev && drhd->include_all) {
- got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
- goto out;
- }
- }
- iommu = NULL;
- out:
- rcu_read_unlock();
-
- return iommu;
-}
-
-static void domain_flush_cache(struct dmar_domain *domain,
- void *addr, int size)
-{
- if (!domain->iommu_coherency)
- clflush_cache_range(addr, size);
-}
-
-static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
-{
- struct context_entry *context;
- int ret = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&iommu->lock, flags);
- context = iommu_context_addr(iommu, bus, devfn, 0);
- if (context)
- ret = context_present(context);
- spin_unlock_irqrestore(&iommu->lock, flags);
- return ret;
-}
-
-static void free_context_table(struct intel_iommu *iommu)
-{
- int i;
- unsigned long flags;
- struct context_entry *context;
-
- spin_lock_irqsave(&iommu->lock, flags);
- if (!iommu->root_entry) {
- goto out;
- }
- for (i = 0; i < ROOT_ENTRY_NR; i++) {
- context = iommu_context_addr(iommu, i, 0, 0);
- if (context)
- free_pgtable_page(context);
-
- if (!sm_supported(iommu))
- continue;
-
- context = iommu_context_addr(iommu, i, 0x80, 0);
- if (context)
- free_pgtable_page(context);
-
- }
- free_pgtable_page(iommu->root_entry);
- iommu->root_entry = NULL;
-out:
- spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
-static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int *target_level)
-{
- struct dma_pte *parent, *pte;
- int level = agaw_to_level(domain->agaw);
- int offset;
-
- BUG_ON(!domain->pgd);
-
- if (!domain_pfn_supported(domain, pfn))
- /* Address beyond IOMMU's addressing capabilities. */
- return NULL;
-
- parent = domain->pgd;
-
- while (1) {
- void *tmp_page;
-
- offset = pfn_level_offset(pfn, level);
- pte = &parent[offset];
- if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
- break;
- if (level == *target_level)
- break;
-
- if (!dma_pte_present(pte)) {
- uint64_t pteval;
-
- tmp_page = alloc_pgtable_page(domain->nid);
-
- if (!tmp_page)
- return NULL;
-
- domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
- pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
- if (domain_use_first_level(domain))
- pteval |= DMA_FL_PTE_XD;
- if (cmpxchg64(&pte->val, 0ULL, pteval))
- /* Someone else set it while we were thinking; use theirs. */
- free_pgtable_page(tmp_page);
- else
- domain_flush_cache(domain, pte, sizeof(*pte));
- }
- if (level == 1)
- break;
-
- parent = phys_to_virt(dma_pte_addr(pte));
- level--;
- }
-
- if (!*target_level)
- *target_level = level;
-
- return pte;
-}
-
-/* return address's pte at specific level */
-static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
- unsigned long pfn,
- int level, int *large_page)
-{
- struct dma_pte *parent, *pte;
- int total = agaw_to_level(domain->agaw);
- int offset;
-
- parent = domain->pgd;
- while (level <= total) {
- offset = pfn_level_offset(pfn, total);
- pte = &parent[offset];
- if (level == total)
- return pte;
-
- if (!dma_pte_present(pte)) {
- *large_page = total;
- break;
- }
-
- if (dma_pte_superpage(pte)) {
- *large_page = total;
- return pte;
- }
-
- parent = phys_to_virt(dma_pte_addr(pte));
- total--;
- }
- return NULL;
-}
-
-/* clear last level pte, a tlb flush should be followed */
-static void dma_pte_clear_range(struct dmar_domain *domain,
- unsigned long start_pfn,
- unsigned long last_pfn)
-{
- unsigned int large_page;
- struct dma_pte *first_pte, *pte;
-
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
-
- /* we don't need lock here; nobody else touches the iova range */
- do {
- large_page = 1;
- first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
- if (!pte) {
- start_pfn = align_to_level(start_pfn + 1, large_page + 1);
- continue;
- }
- do {
- dma_clear_pte(pte);
- start_pfn += lvl_to_nr_pages(large_page);
- pte++;
- } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
-
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
-
- } while (start_pfn && start_pfn <= last_pfn);
-}
-
-static void dma_pte_free_level(struct dmar_domain *domain, int level,
- int retain_level, struct dma_pte *pte,
- unsigned long pfn, unsigned long start_pfn,
- unsigned long last_pfn)
-{
- pfn = max(start_pfn, pfn);
- pte = &pte[pfn_level_offset(pfn, level)];
-
- do {
- unsigned long level_pfn;
- struct dma_pte *level_pte;
-
- if (!dma_pte_present(pte) || dma_pte_superpage(pte))
- goto next;
-
- level_pfn = pfn & level_mask(level);
- level_pte = phys_to_virt(dma_pte_addr(pte));
-
- if (level > 2) {
- dma_pte_free_level(domain, level - 1, retain_level,
- level_pte, level_pfn, start_pfn,
- last_pfn);
- }
-
- /*
- * Free the page table if we're below the level we want to
- * retain and the range covers the entire table.
- */
- if (level < retain_level && !(start_pfn > level_pfn ||
- last_pfn < level_pfn + level_size(level) - 1)) {
- dma_clear_pte(pte);
- domain_flush_cache(domain, pte, sizeof(*pte));
- free_pgtable_page(level_pte);
- }
-next:
- pfn += level_size(level);
- } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
-}
-
-/*
- * clear last level (leaf) ptes and free page table pages below the
- * level we wish to keep intact.
- */
-static void dma_pte_free_pagetable(struct dmar_domain *domain,
- unsigned long start_pfn,
- unsigned long last_pfn,
- int retain_level)
-{
- BUG_ON(!domain_pfn_supported(domain, start_pfn));
- BUG_ON(!domain_pfn_supported(domain, last_pfn));
- BUG_ON(start_pfn > last_pfn);
-
- dma_pte_clear_range(domain, start_pfn, last_pfn);
-
- /* We don't need lock here; nobody else touches the iova range */
- dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
- domain->pgd, 0, start_pfn, last_pfn);
-
- /* free pgd */
- if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- free_pgtable_page(domain->pgd);
- domain->pgd = NULL;
- }
-}
-
-/* When a page at a given level is being unlinked from its parent, we don't
- need to *modify* it at all. All we need to do is make a list of all the
- pages which can be freed just as soon as we've flushed the IOTLB and we
- know the hardware page-walk will no longer touch them.
- The 'pte' argument is the *parent* PTE, pointing to the page that is to
- be freed. */
-static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
- int level, struct dma_pte *pte,
- struct page *freelist)
-{
- struct page *pg;
-
- pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
- pg->freelist = freelist;
- freelist = pg;
-
- if (level == 1)
- return freelist;
-
- pte = page_address(pg);
- do {
- if (dma_pte_present(pte) && !dma_pte_superpage(pte))
- freelist = dma_pte_list_pagetables(domain, level - 1,
- pte, freelist);
- pte++;
- } while (!first_pte_in_page(pte));
-
- return freelist;
-}
-
-static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
- struct dma_pte *pte, unsigned long pfn,
- unsigned long start_pfn,
- unsigned long last_pfn,
- struct page *freelist)
-{
- struct dma_pte *first_pte = NULL, *last_pte = NULL;
-
- pfn = max(start_pfn, pfn);
- pte = &pte[pfn_level_offset(pfn, level)];
-
- do {
- unsigned long level_pfn;
-
- if (!dma_pte_present(pte))
- goto next;
-
- level_pfn = pfn & level_mask(level);
-
- /* If range covers entire pagetable, free it */
- if (start_pfn <= level_pfn &&
- last_pfn >= level_pfn + level_size(level) - 1) {
- /* These suborbinate page tables are going away entirely. Don't
- bother to clear them; we're just going to *free* them. */
- if (level > 1 && !dma_pte_superpage(pte))
- freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
-
- dma_clear_pte(pte);
- if (!first_pte)
- first_pte = pte;
- last_pte = pte;
- } else if (level > 1) {
- /* Recurse down into a le