diff options
author | Joerg Roedel <jroedel@suse.de> | 2020-06-09 15:03:03 +0200 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2020-06-10 17:46:43 +0200 |
commit | 672cf6df9b8a3a6d70a6a5c30397f76fa40d3178 (patch) | |
tree | c6af5ef7ab0bf60f3472f1dcd5d96f325f90bc68 /drivers/iommu/intel-iommu.c | |
parent | ad8694bac410e5d72ad610dfb146f58bf2fe0365 (diff) | |
download | linux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.tar.gz linux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.tar.bz2 linux-672cf6df9b8a3a6d70a6a5c30397f76fa40d3178.zip |
iommu/vt-d: Move Intel IOMMU driver into subdirectory
Move all files related to the Intel IOMMU driver into its own
subdirectory.
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20200609130303.26974-3-joro@8bytes.org
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r-- | drivers/iommu/intel-iommu.c | 6207 |
1 files changed, 0 insertions, 6207 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c deleted file mode 100644 index 648a785e078a..000000000000 --- a/drivers/iommu/intel-iommu.c +++ /dev/null @@ -1,6207 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright © 2006-2014 Intel Corporation. - * - * Authors: David Woodhouse <dwmw2@infradead.org>, - * Ashok Raj <ashok.raj@intel.com>, - * Shaohua Li <shaohua.li@intel.com>, - * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, - * Fenghua Yu <fenghua.yu@intel.com> - * Joerg Roedel <jroedel@suse.de> - */ - -#define pr_fmt(fmt) "DMAR: " fmt -#define dev_fmt(fmt) pr_fmt(fmt) - -#include <linux/init.h> -#include <linux/bitmap.h> -#include <linux/debugfs.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/dmar.h> -#include <linux/dma-mapping.h> -#include <linux/mempool.h> -#include <linux/memory.h> -#include <linux/cpu.h> -#include <linux/timer.h> -#include <linux/io.h> -#include <linux/iova.h> -#include <linux/iommu.h> -#include <linux/intel-iommu.h> -#include <linux/syscore_ops.h> -#include <linux/tboot.h> -#include <linux/dmi.h> -#include <linux/pci-ats.h> -#include <linux/memblock.h> -#include <linux/dma-contiguous.h> -#include <linux/dma-direct.h> -#include <linux/crash_dump.h> -#include <linux/numa.h> -#include <linux/swiotlb.h> -#include <asm/irq_remapping.h> -#include <asm/cacheflush.h> -#include <asm/iommu.h> -#include <trace/events/intel_iommu.h> - -#include "irq_remapping.h" -#include "intel-pasid.h" - -#define ROOT_SIZE VTD_PAGE_SIZE -#define CONTEXT_SIZE VTD_PAGE_SIZE - -#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) -#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) -#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) -#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) - -#define IOAPIC_RANGE_START (0xfee00000) -#define IOAPIC_RANGE_END (0xfeefffff) -#define IOVA_START_ADDR (0x1000) - -#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 - -#define MAX_AGAW_WIDTH 64 -#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) - -#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) -#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) - -/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR - to match. That way, we can use 'unsigned long' for PFNs with impunity. */ -#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ - __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) -#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) - -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) - -/* page table handling */ -#define LEVEL_STRIDE (9) -#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) - -/* - * This bitmap is used to advertise the page sizes our hardware support - * to the IOMMU core, which will then use this information to split - * physically contiguous memory regions it is mapping into page sizes - * that we support. - * - * Traditionally the IOMMU core just handed us the mappings directly, - * after making sure the size is an order of a 4KiB page and that the - * mapping has natural alignment. - * - * To retain this behavior, we currently advertise that we support - * all page sizes that are an order of 4KiB. - * - * If at some point we'd like to utilize the IOMMU core's new behavior, - * we could change this to advertise the real page sizes we support. - */ -#define INTEL_IOMMU_PGSIZES (~0xFFFUL) - -static inline int agaw_to_level(int agaw) -{ - return agaw + 2; -} - -static inline int agaw_to_width(int agaw) -{ - return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); -} - -static inline int width_to_agaw(int width) -{ - return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); -} - -static inline unsigned int level_to_offset_bits(int level) -{ - return (level - 1) * LEVEL_STRIDE; -} - -static inline int pfn_level_offset(unsigned long pfn, int level) -{ - return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; -} - -static inline unsigned long level_mask(int level) -{ - return -1UL << level_to_offset_bits(level); -} - -static inline unsigned long level_size(int level) -{ - return 1UL << level_to_offset_bits(level); -} - -static inline unsigned long align_to_level(unsigned long pfn, int level) -{ - return (pfn + level_size(level) - 1) & level_mask(level); -} - -static inline unsigned long lvl_to_nr_pages(unsigned int lvl) -{ - return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); -} - -/* VT-d pages must always be _smaller_ than MM pages. Otherwise things - are never going to work. */ -static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) -{ - return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); -} - -static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) -{ - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); -} -static inline unsigned long page_to_dma_pfn(struct page *pg) -{ - return mm_to_dma_pfn(page_to_pfn(pg)); -} -static inline unsigned long virt_to_dma_pfn(void *p) -{ - return page_to_dma_pfn(virt_to_page(p)); -} - -/* global iommu list, set NULL for ignored DMAR units */ -static struct intel_iommu **g_iommus; - -static void __init check_tylersburg_isoch(void); -static int rwbf_quirk; - -/* - * set to 1 to panic kernel if can't successfully enable VT-d - * (used when kernel is launched w/ TXT) - */ -static int force_on = 0; -int intel_iommu_tboot_noforce; -static int no_platform_optin; - -#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) - -/* - * Take a root_entry and return the Lower Context Table Pointer (LCTP) - * if marked present. - */ -static phys_addr_t root_entry_lctp(struct root_entry *re) -{ - if (!(re->lo & 1)) - return 0; - - return re->lo & VTD_PAGE_MASK; -} - -/* - * Take a root_entry and return the Upper Context Table Pointer (UCTP) - * if marked present. - */ -static phys_addr_t root_entry_uctp(struct root_entry *re) -{ - if (!(re->hi & 1)) - return 0; - - return re->hi & VTD_PAGE_MASK; -} - -static inline void context_clear_pasid_enable(struct context_entry *context) -{ - context->lo &= ~(1ULL << 11); -} - -static inline bool context_pasid_enabled(struct context_entry *context) -{ - return !!(context->lo & (1ULL << 11)); -} - -static inline void context_set_copied(struct context_entry *context) -{ - context->hi |= (1ull << 3); -} - -static inline bool context_copied(struct context_entry *context) -{ - return !!(context->hi & (1ULL << 3)); -} - -static inline bool __context_present(struct context_entry *context) -{ - return (context->lo & 1); -} - -bool context_present(struct context_entry *context) -{ - return context_pasid_enabled(context) ? - __context_present(context) : - __context_present(context) && !context_copied(context); -} - -static inline void context_set_present(struct context_entry *context) -{ - context->lo |= 1; -} - -static inline void context_set_fault_enable(struct context_entry *context) -{ - context->lo &= (((u64)-1) << 2) | 1; -} - -static inline void context_set_translation_type(struct context_entry *context, - unsigned long value) -{ - context->lo &= (((u64)-1) << 4) | 3; - context->lo |= (value & 3) << 2; -} - -static inline void context_set_address_root(struct context_entry *context, - unsigned long value) -{ - context->lo &= ~VTD_PAGE_MASK; - context->lo |= value & VTD_PAGE_MASK; -} - -static inline void context_set_address_width(struct context_entry *context, - unsigned long value) -{ - context->hi |= value & 7; -} - -static inline void context_set_domain_id(struct context_entry *context, - unsigned long value) -{ - context->hi |= (value & ((1 << 16) - 1)) << 8; -} - -static inline int context_domain_id(struct context_entry *c) -{ - return((c->hi >> 8) & 0xffff); -} - -static inline void context_clear_entry(struct context_entry *context) -{ - context->lo = 0; - context->hi = 0; -} - -/* - * This domain is a statically identity mapping domain. - * 1. This domain creats a static 1:1 mapping to all usable memory. - * 2. It maps to each iommu if successful. - * 3. Each iommu mapps to this domain if successful. - */ -static struct dmar_domain *si_domain; -static int hw_pass_through = 1; - -#define for_each_domain_iommu(idx, domain) \ - for (idx = 0; idx < g_num_of_iommus; idx++) \ - if (domain->iommu_refcnt[idx]) - -struct dmar_rmrr_unit { - struct list_head list; /* list of rmrr units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 base_address; /* reserved base address*/ - u64 end_address; /* reserved end address */ - struct dmar_dev_scope *devices; /* target devices */ - int devices_cnt; /* target device count */ -}; - -struct dmar_atsr_unit { - struct list_head list; /* list of ATSR units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - struct dmar_dev_scope *devices; /* target devices */ - int devices_cnt; /* target device count */ - u8 include_all:1; /* include all ports */ -}; - -static LIST_HEAD(dmar_atsr_units); -static LIST_HEAD(dmar_rmrr_units); - -#define for_each_rmrr_units(rmrr) \ - list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -/* bitmap for indexing intel_iommus */ -static int g_num_of_iommus; - -static void domain_exit(struct dmar_domain *domain); -static void domain_remove_dev_info(struct dmar_domain *domain); -static void dmar_remove_one_dev_info(struct device *dev); -static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static int intel_iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova); - -#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON -int dmar_disabled = 0; -#else -int dmar_disabled = 1; -#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */ - -#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON -int intel_iommu_sm = 1; -#else -int intel_iommu_sm; -#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */ - -int intel_iommu_enabled = 0; -EXPORT_SYMBOL_GPL(intel_iommu_enabled); - -static int dmar_map_gfx = 1; -static int dmar_forcedac; -static int intel_iommu_strict; -static int intel_iommu_superpage = 1; -static int iommu_identity_mapping; -static int intel_no_bounce; - -#define IDENTMAP_GFX 2 -#define IDENTMAP_AZALIA 4 - -int intel_iommu_gfx_mapped; -EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); - -#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) -#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) -struct device_domain_info *get_domain_info(struct device *dev) -{ - struct device_domain_info *info; - - if (!dev) - return NULL; - - info = dev->archdata.iommu; - if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || - info == DEFER_DEVICE_DOMAIN_INFO)) - return NULL; - - return info; -} - -DEFINE_SPINLOCK(device_domain_lock); -static LIST_HEAD(device_domain_list); - -#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ - to_pci_dev(d)->untrusted) - -/* - * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. - */ -int for_each_device_domain(int (*fn)(struct device_domain_info *info, - void *data), void *data) -{ - int ret = 0; - unsigned long flags; - struct device_domain_info *info; - - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &device_domain_list, global) { - ret = fn(info, data); - if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - -const struct iommu_ops intel_iommu_ops; - -static bool translation_pre_enabled(struct intel_iommu *iommu) -{ - return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); -} - -static void clear_translation_pre_enabled(struct intel_iommu *iommu) -{ - iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; -} - -static void init_translation_status(struct intel_iommu *iommu) -{ - u32 gsts; - - gsts = readl(iommu->reg + DMAR_GSTS_REG); - if (gsts & DMA_GSTS_TES) - iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; -} - -static int __init intel_iommu_setup(char *str) -{ - if (!str) - return -EINVAL; - while (*str) { - if (!strncmp(str, "on", 2)) { - dmar_disabled = 0; - pr_info("IOMMU enabled\n"); - } else if (!strncmp(str, "off", 3)) { - dmar_disabled = 1; - no_platform_optin = 1; - pr_info("IOMMU disabled\n"); - } else if (!strncmp(str, "igfx_off", 8)) { - dmar_map_gfx = 0; - pr_info("Disable GFX device mapping\n"); - } else if (!strncmp(str, "forcedac", 8)) { - pr_info("Forcing DAC for PCI devices\n"); - dmar_forcedac = 1; - } else if (!strncmp(str, "strict", 6)) { - pr_info("Disable batched IOTLB flush\n"); - intel_iommu_strict = 1; - } else if (!strncmp(str, "sp_off", 6)) { - pr_info("Disable supported super page\n"); - intel_iommu_superpage = 0; - } else if (!strncmp(str, "sm_on", 5)) { - pr_info("Intel-IOMMU: scalable mode supported\n"); - intel_iommu_sm = 1; - } else if (!strncmp(str, "tboot_noforce", 13)) { - pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); - intel_iommu_tboot_noforce = 1; - } else if (!strncmp(str, "nobounce", 8)) { - pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); - intel_no_bounce = 1; - } - - str += strcspn(str, ","); - while (*str == ',') - str++; - } - return 0; -} -__setup("intel_iommu=", intel_iommu_setup); - -static struct kmem_cache *iommu_domain_cache; -static struct kmem_cache *iommu_devinfo_cache; - -static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - domains = iommu->domains[idx]; - if (!domains) - return NULL; - - return domains[did & 0xff]; -} - -static void set_iommu_domain(struct intel_iommu *iommu, u16 did, - struct dmar_domain *domain) -{ - struct dmar_domain **domains; - int idx = did >> 8; - - if (!iommu->domains[idx]) { - size_t size = 256 * sizeof(struct dmar_domain *); - iommu->domains[idx] = kzalloc(size, GFP_ATOMIC); - } - - domains = iommu->domains[idx]; - if (WARN_ON(!domains)) - return; - else - domains[did & 0xff] = domain; -} - -void *alloc_pgtable_page(int node) -{ - struct page *page; - void *vaddr = NULL; - - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); - if (page) - vaddr = page_address(page); - return vaddr; -} - -void free_pgtable_page(void *vaddr) -{ - free_page((unsigned long)vaddr); -} - -static inline void *alloc_domain_mem(void) -{ - return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); -} - -static void free_domain_mem(void *vaddr) -{ - kmem_cache_free(iommu_domain_cache, vaddr); -} - -static inline void * alloc_devinfo_mem(void) -{ - return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); -} - -static inline void free_devinfo_mem(void *vaddr) -{ - kmem_cache_free(iommu_devinfo_cache, vaddr); -} - -static inline int domain_type_is_si(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; -} - -static inline bool domain_use_first_level(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; -} - -static inline int domain_pfn_supported(struct dmar_domain *domain, - unsigned long pfn) -{ - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - - return !(addr_width < BITS_PER_LONG && pfn >> addr_width); -} - -static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) -{ - unsigned long sagaw; - int agaw = -1; - - sagaw = cap_sagaw(iommu->cap); - for (agaw = width_to_agaw(max_gaw); - agaw >= 0; agaw--) { - if (test_bit(agaw, &sagaw)) - break; - } - - return agaw; -} - -/* - * Calculate max SAGAW for each iommu. - */ -int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); -} - -/* - * calculate agaw for each iommu. - * "SAGAW" may be different across iommus, use a default agaw, and - * get a supported less agaw for iommus that don't support the default agaw. - */ -int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); -} - -/* This functionin only returns single iommu in a domain */ -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) -{ - int iommu_id; - - /* si_domain and vm domain should not get here. */ - if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) - return NULL; - - for_each_domain_iommu(iommu_id, domain) - break; - - if (iommu_id < 0 || iommu_id >= g_num_of_iommus) - return NULL; - - return g_iommus[iommu_id]; -} - -static void domain_update_iommu_coherency(struct dmar_domain *domain) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool found = false; - int i; - - domain->iommu_coherency = 1; - - for_each_domain_iommu(i, domain) { - found = true; - if (!ecap_coherent(g_iommus[i]->ecap)) { - domain->iommu_coherency = 0; - break; - } - } - if (found) - return; - - /* No hardware attached; use lowest common denominator */ - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!ecap_coherent(iommu->ecap)) { - domain->iommu_coherency = 0; - break; - } - } - rcu_read_unlock(); -} - -static int domain_update_iommu_snooping(struct intel_iommu *skip) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - int ret = 1; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (iommu != skip) { - if (!ecap_sc_support(iommu->ecap)) { - ret = 0; - break; - } - } - } - rcu_read_unlock(); - - return ret; -} - -static int domain_update_iommu_superpage(struct dmar_domain *domain, - struct intel_iommu *skip) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - int mask = 0x3; - - if (!intel_iommu_superpage) { - return 0; - } - - /* set iommu_superpage to the smallest common denominator */ - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (iommu != skip) { - if (domain && domain_use_first_level(domain)) { - if (!cap_fl1gp_support(iommu->cap)) - mask = 0x1; - } else { - mask &= cap_super_page_val(iommu->cap); - } - - if (!mask) - break; - } - } - rcu_read_unlock(); - - return fls(mask); -} - -/* Some capabilities may be different across iommus */ -static void domain_update_iommu_cap(struct dmar_domain *domain) -{ - domain_update_iommu_coherency(domain); - domain->iommu_snooping = domain_update_iommu_snooping(NULL); - domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); -} - -struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, - u8 devfn, int alloc) -{ - struct root_entry *root = &iommu->root_entry[bus]; - struct context_entry *context; - u64 *entry; - - entry = &root->lo; - if (sm_supported(iommu)) { - if (devfn >= 0x80) { - devfn -= 0x80; - entry = &root->hi; - } - devfn *= 2; - } - if (*entry & 1) - context = phys_to_virt(*entry & VTD_PAGE_MASK); - else { - unsigned long phy_addr; - if (!alloc) - return NULL; - - context = alloc_pgtable_page(iommu->node); - if (!context) - return NULL; - - __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); - phy_addr = virt_to_phys((void *)context); - *entry = phy_addr | 1; - __iommu_flush_cache(iommu, entry, sizeof(*entry)); - } - return &context[devfn]; -} - -static int iommu_dummy(struct device *dev) -{ - return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; -} - -static bool attach_deferred(struct device *dev) -{ - return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; -} - -/** - * is_downstream_to_pci_bridge - test if a device belongs to the PCI - * sub-hierarchy of a candidate PCI-PCI bridge - * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy - * @bridge: the candidate PCI-PCI bridge - * - * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. - */ -static bool -is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) -{ - struct pci_dev *pdev, *pbridge; - - if (!dev_is_pci(dev) || !dev_is_pci(bridge)) - return false; - - pdev = to_pci_dev(dev); - pbridge = to_pci_dev(bridge); - - if (pbridge->subordinate && - pbridge->subordinate->number <= pdev->bus->number && - pbridge->subordinate->busn_res.end >= pdev->bus->number) - return true; - - return false; -} - -static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) -{ - struct dmar_drhd_unit *drhd = NULL; - struct intel_iommu *iommu; - struct device *tmp; - struct pci_dev *pdev = NULL; - u16 segment = 0; - int i; - - if (iommu_dummy(dev)) - return NULL; - - if (dev_is_pci(dev)) { - struct pci_dev *pf_pdev; - - pdev = pci_real_dma_dev(to_pci_dev(dev)); - - /* VFs aren't listed in scope tables; we need to look up - * the PF instead to find the IOMMU. */ - pf_pdev = pci_physfn(pdev); - dev = &pf_pdev->dev; - segment = pci_domain_nr(pdev->bus); - } else if (has_acpi_companion(dev)) - dev = &ACPI_COMPANION(dev)->dev; - - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (pdev && segment != drhd->segment) - continue; - - for_each_active_dev_scope(drhd->devices, - drhd->devices_cnt, i, tmp) { - if (tmp == dev) { - /* For a VF use its original BDF# not that of the PF - * which we used for the IOMMU lookup. Strictly speaking - * we could do this for all PCI devices; we only need to - * get the BDF# from the scope table for ACPI matches. */ - if (pdev && pdev->is_virtfn) - goto got_pdev; - - *bus = drhd->devices[i].bus; - *devfn = drhd->devices[i].devfn; - goto out; - } - - if (is_downstream_to_pci_bridge(dev, tmp)) - goto got_pdev; - } - - if (pdev && drhd->include_all) { - got_pdev: - *bus = pdev->bus->number; - *devfn = pdev->devfn; - goto out; - } - } - iommu = NULL; - out: - rcu_read_unlock(); - - return iommu; -} - -static void domain_flush_cache(struct dmar_domain *domain, - void *addr, int size) -{ - if (!domain->iommu_coherency) - clflush_cache_range(addr, size); -} - -static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - struct context_entry *context; - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); - context = iommu_context_addr(iommu, bus, devfn, 0); - if (context) - ret = context_present(context); - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; -} - -static void free_context_table(struct intel_iommu *iommu) -{ - int i; - unsigned long flags; - struct context_entry *context; - - spin_lock_irqsave(&iommu->lock, flags); - if (!iommu->root_entry) { - goto out; - } - for (i = 0; i < ROOT_ENTRY_NR; i++) { - context = iommu_context_addr(iommu, i, 0, 0); - if (context) - free_pgtable_page(context); - - if (!sm_supported(iommu)) - continue; - - context = iommu_context_addr(iommu, i, 0x80, 0); - if (context) - free_pgtable_page(context); - - } - free_pgtable_page(iommu->root_entry); - iommu->root_entry = NULL; -out: - spin_unlock_irqrestore(&iommu->lock, flags); -} - -static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) -{ - struct dma_pte *parent, *pte; - int level = agaw_to_level(domain->agaw); - int offset; - - BUG_ON(!domain->pgd); - - if (!domain_pfn_supported(domain, pfn)) - /* Address beyond IOMMU's addressing capabilities. */ - return NULL; - - parent = domain->pgd; - - while (1) { - void *tmp_page; - - offset = pfn_level_offset(pfn, level); - pte = &parent[offset]; - if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) - break; - if (level == *target_level) - break; - - if (!dma_pte_present(pte)) { - uint64_t pteval; - - tmp_page = alloc_pgtable_page(domain->nid); - - if (!tmp_page) - return NULL; - - domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) - pteval |= DMA_FL_PTE_XD; - if (cmpxchg64(&pte->val, 0ULL, pteval)) - /* Someone else set it while we were thinking; use theirs. */ - free_pgtable_page(tmp_page); - else - domain_flush_cache(domain, pte, sizeof(*pte)); - } - if (level == 1) - break; - - parent = phys_to_virt(dma_pte_addr(pte)); - level--; - } - - if (!*target_level) - *target_level = level; - - return pte; -} - -/* return address's pte at specific level */ -static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, - unsigned long pfn, - int level, int *large_page) -{ - struct dma_pte *parent, *pte; - int total = agaw_to_level(domain->agaw); - int offset; - - parent = domain->pgd; - while (level <= total) { - offset = pfn_level_offset(pfn, total); - pte = &parent[offset]; - if (level == total) - return pte; - - if (!dma_pte_present(pte)) { - *large_page = total; - break; - } - - if (dma_pte_superpage(pte)) { - *large_page = total; - return pte; - } - - parent = phys_to_virt(dma_pte_addr(pte)); - total--; - } - return NULL; -} - -/* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct dmar_domain *domain, - unsigned long start_pfn, - unsigned long last_pfn) -{ - unsigned int large_page; - struct dma_pte *first_pte, *pte; - - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); - - /* we don't need lock here; nobody else touches the iova range */ - do { - large_page = 1; - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); - if (!pte) { - start_pfn = align_to_level(start_pfn + 1, large_page + 1); - continue; - } - do { - dma_clear_pte(pte); - start_pfn += lvl_to_nr_pages(large_page); - pte++; - } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); - - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - - } while (start_pfn && start_pfn <= last_pfn); -} - -static void dma_pte_free_level(struct dmar_domain *domain, int level, - int retain_level, struct dma_pte *pte, - unsigned long pfn, unsigned long start_pfn, - unsigned long last_pfn) -{ - pfn = max(start_pfn, pfn); - pte = &pte[pfn_level_offset(pfn, level)]; - - do { - unsigned long level_pfn; - struct dma_pte *level_pte; - - if (!dma_pte_present(pte) || dma_pte_superpage(pte)) - goto next; - - level_pfn = pfn & level_mask(level); - level_pte = phys_to_virt(dma_pte_addr(pte)); - - if (level > 2) { - dma_pte_free_level(domain, level - 1, retain_level, - level_pte, level_pfn, start_pfn, - last_pfn); - } - - /* - * Free the page table if we're below the level we want to - * retain and the range covers the entire table. - */ - if (level < retain_level && !(start_pfn > level_pfn || - last_pfn < level_pfn + level_size(level) - 1)) { - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); - free_pgtable_page(level_pte); - } -next: - pfn += level_size(level); - } while (!first_pte_in_page(++pte) && pfn <= last_pfn); -} - -/* - * clear last level (leaf) ptes and free page table pages below the - * level we wish to keep intact. - */ -static void dma_pte_free_pagetable(struct dmar_domain *domain, - unsigned long start_pfn, - unsigned long last_pfn, - int retain_level) -{ - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); - - dma_pte_clear_range(domain, start_pfn, last_pfn); - - /* We don't need lock here; nobody else touches the iova range */ - dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level, - domain->pgd, 0, start_pfn, last_pfn); - - /* free pgd */ - if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - free_pgtable_page(domain->pgd); - domain->pgd = NULL; - } -} - -/* When a page at a given level is being unlinked from its parent, we don't - need to *modify* it at all. All we need to do is make a list of all the - pages which can be freed just as soon as we've flushed the IOTLB and we - know the hardware page-walk will no longer touch them. - The 'pte' argument is the *parent* PTE, pointing to the page that is to - be freed. */ -static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, - int level, struct dma_pte *pte, - struct page *freelist) -{ - struct page *pg; - - pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); - pg->freelist = freelist; - freelist = pg; - - if (level == 1) - return freelist; - - pte = page_address(pg); - do { - if (dma_pte_present(pte) && !dma_pte_superpage(pte)) - freelist = dma_pte_list_pagetables(domain, level - 1, - pte, freelist); - pte++; - } while (!first_pte_in_page(pte)); - - return freelist; -} - -static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, - struct dma_pte *pte, unsigned long pfn, - unsigned long start_pfn, - unsigned long last_pfn, - struct page *freelist) -{ - struct dma_pte *first_pte = NULL, *last_pte = NULL; - - pfn = max(start_pfn, pfn); - pte = &pte[pfn_level_offset(pfn, level)]; - - do { - unsigned long level_pfn; - - if (!dma_pte_present(pte)) - goto next; - - level_pfn = pfn & level_mask(level); - - /* If range covers entire pagetable, free it */ - if (start_pfn <= level_pfn && - last_pfn >= level_pfn + level_size(level) - 1) { - /* These suborbinate page tables are going away entirely. Don't - bother to clear them; we're just going to *free* them. */ - if (level > 1 && !dma_pte_superpage(pte)) - freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); - - dma_clear_pte(pte); - if (!first_pte) - first_pte = pte; - last_pte = pte; - } else if (level > 1) { - /* Recurse down into a le |