summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/iommu/Kconfig4
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h12
-rw-r--r--drivers/iommu/amd/io_pgtable.c68
-rw-r--r--drivers/iommu/amd/iommu.c147
-rw-r--r--drivers/iommu/intel/Kconfig1
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/iommu.c156
-rw-r--r--drivers/iommu/intel/iommu.h64
-rw-r--r--drivers/iommu/intel/nested.c117
-rw-r--r--drivers/iommu/intel/pasid.c221
-rw-r--r--drivers/iommu/intel/pasid.h6
-rw-r--r--drivers/iommu/iommufd/Makefile1
-rw-r--r--drivers/iommu/iommufd/device.c174
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c304
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c200
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h84
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h39
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c (renamed from drivers/vfio/iova_bitmap.c)5
-rw-r--r--drivers/iommu/iommufd/main.c17
-rw-r--r--drivers/iommu/iommufd/pages.c2
-rw-r--r--drivers/iommu/iommufd/selftest.c326
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c6
-rw-r--r--drivers/vfio/Makefile3
-rw-r--r--drivers/vfio/pci/mlx5/Kconfig1
-rw-r--r--drivers/vfio/pci/mlx5/main.c1
-rw-r--r--drivers/vfio/pci/pds/Kconfig1
-rw-r--r--drivers/vfio/pci/pds/pci_drv.c1
-rw-r--r--drivers/vfio/vfio_main.c1
-rw-r--r--include/linux/io-pgtable.h4
-rw-r--r--include/linux/iommu.h146
-rw-r--r--include/linux/iova_bitmap.h26
-rw-r--r--include/uapi/linux/iommufd.h180
-rw-r--r--tools/testing/selftests/iommu/iommufd.c379
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c7
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h233
36 files changed, 2722 insertions, 218 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 7f04491ca5f0..ee9e2a2edbf5 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -7,6 +7,10 @@ config IOMMU_IOVA
config IOMMU_API
bool
+config IOMMUFD_DRIVER
+ bool
+ default n
+
menuconfig IOMMU_SUPPORT
bool "IOMMU Hardware Support"
depends on MMU
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 9b5fc3356bf2..8bd4c3b183ec 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_IO_PGTABLE
+ select IOMMUFD_DRIVER if IOMMUFD
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 7dc30c2b56b3..dec4e5c2b66b 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -97,7 +97,9 @@
#define FEATURE_GATS_MASK (3ULL)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
#define FEATURE_GIOSUP BIT_ULL(48)
+#define FEATURE_HASUP BIT_ULL(49)
#define FEATURE_EPHSUP BIT_ULL(50)
+#define FEATURE_HDSUP BIT_ULL(52)
#define FEATURE_SNP BIT_ULL(63)
#define FEATURE_PASID_SHIFT 32
@@ -212,6 +214,7 @@
/* macros and definitions for device table entries */
#define DEV_ENTRY_VALID 0x00
#define DEV_ENTRY_TRANSLATION 0x01
+#define DEV_ENTRY_HAD 0x07
#define DEV_ENTRY_PPR 0x34
#define DEV_ENTRY_IR 0x3d
#define DEV_ENTRY_IW 0x3e
@@ -371,9 +374,15 @@
(1ULL << (12 + (9 * (level))))
/*
+ * The IOPTE dirty bit
+ */
+#define IOMMU_PTE_HD_BIT (6)
+
+/*
* Bit value definition for I/O PTE fields
*/
#define IOMMU_PTE_PR BIT_ULL(0)
+#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT)
#define IOMMU_PTE_U BIT_ULL(59)
#define IOMMU_PTE_FC BIT_ULL(60)
#define IOMMU_PTE_IR BIT_ULL(61)
@@ -384,6 +393,7 @@
*/
#define DTE_FLAG_V BIT_ULL(0)
#define DTE_FLAG_TV BIT_ULL(1)
+#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_GLX_SHIFT (56)
@@ -413,6 +423,7 @@
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
+#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
#define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK))
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
@@ -563,6 +574,7 @@ struct protection_domain {
int nid; /* Node ID */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
+ bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2892aa1b4dc1..6c0621f6f572 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,6 +486,73 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
return (__pte & ~offset_mask) | (iova & offset_mask);
}
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
+ unsigned long flags)
+{
+ bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count && test_only; i++) {
+ if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
+ dirty = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < count && !test_only; i++) {
+ if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
+ (unsigned long *)&ptep[i])) {
+ dirty = true;
+ }
+ }
+
+ return dirty;
+}
+
+static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long end = iova + size - 1;
+
+ do {
+ unsigned long pgsize = 0;
+ u64 *ptep, pte;
+
+ ptep = fetch_pte(pgtable, iova, &pgsize);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
+ pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
+ iova += pgsize;
+ continue;
+ }
+
+ /*
+ * Mark the whole IOVA range as dirty even if only one of
+ * the replicated PTEs were marked dirty.
+ */
+ if (pte_test_and_clear_dirty(ptep, pgsize, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
/*
* ----------------------------------------------------
*/
@@ -527,6 +594,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
pgtable->iop.ops.map_pages = iommu_v1_map_pages;
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+ pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
return &pgtable->iop;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 95bd7c25ba6f..b399c5741378 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -37,6 +37,7 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/dma.h>
+#include <uapi/linux/iommufd.h>
#include "amd_iommu.h"
#include "../dma-iommu.h"
@@ -65,6 +66,7 @@ LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
const struct iommu_ops amd_iommu_ops;
+const struct iommu_dirty_ops amd_dirty_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
@@ -1610,6 +1612,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
pte_root |= 1ULL << DEV_ENTRY_PPR;
}
+ if (domain->dirty_tracking)
+ pte_root |= DTE_FLAG_HAD;
+
if (domain->flags & PD_IOMMUV2_MASK) {
u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
u64 glx = domain->glx;
@@ -2155,28 +2160,79 @@ static inline u64 dma_max_address(void)
return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
}
-static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
+static bool amd_iommu_hd_support(struct amd_iommu *iommu)
{
+ return iommu && (iommu->features & FEATURE_HDSUP);
+}
+
+static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
+ struct device *dev, u32 flags)
+{
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
struct protection_domain *domain;
+ struct amd_iommu *iommu = NULL;
+
+ if (dev) {
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+ }
/*
* Since DTE[Mode]=0 is prohibited on SNP-enabled system,
* default to use IOMMU_DOMAIN_DMA[_FQ].
*/
if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
- return NULL;
+ return ERR_PTR(-EINVAL);
+
+ if (dirty_tracking && !amd_iommu_hd_support(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
domain = protection_domain_alloc(type);
if (!domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = dma_max_address();
domain->domain.geometry.force_aperture = true;
+ if (iommu) {
+ domain->domain.type = type;
+ domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap;
+ domain->domain.ops = iommu->iommu.ops->default_domain_ops;
+
+ if (dirty_tracking)
+ domain->domain.dirty_ops = &amd_dirty_ops;
+ }
+
return &domain->domain;
}
+static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
+{
+ struct iommu_domain *domain;
+
+ domain = do_iommu_domain_alloc(type, NULL, 0);
+ if (IS_ERR(domain))
+ return NULL;
+
+ return domain;
+}
+
+static struct iommu_domain *
+amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+
+{
+ unsigned int type = IOMMU_DOMAIN_UNMANAGED;
+
+ if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return do_iommu_domain_alloc(type, dev, flags);
+}
+
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
@@ -2214,6 +2270,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
dev_data->defer_attach = false;
+ /*
+ * Restrict to devices with compatible IOMMU hardware support
+ * when enforcement of dirty tracking is enabled.
+ */
+ if (dom->dirty_ops && !amd_iommu_hd_support(iommu))
+ return -EINVAL;
+
if (dev_data->domain)
detach_device(dev);
@@ -2332,6 +2395,11 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return true;
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
+ case IOMMU_CAP_DIRTY_TRACKING: {
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+
+ return amd_iommu_hd_support(iommu);
+ }
default:
break;
}
@@ -2339,6 +2407,73 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return false;
}
+static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+{
+ struct protection_domain *pdomain = to_pdomain(domain);
+ struct dev_table_entry *dev_table;
+ struct iommu_dev_data *dev_data;
+ bool domain_flush = false;
+ struct amd_iommu *iommu;
+ unsigned long flags;
+ u64 pte_root;
+
+ spin_lock_irqsave(&pdomain->lock, flags);
+ if (!(pdomain->dirty_tracking ^ enable)) {
+ spin_unlock_irqrestore(&pdomain->lock, flags);
+ return 0;
+ }
+
+ list_for_each_entry(dev_data, &pdomain->dev_list, list) {
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ continue;
+
+ dev_table = get_dev_table(iommu);
+ pte_root = dev_table[dev_data->devid].data[0];
+
+ pte_root = (enable ? pte_root | DTE_FLAG_HAD :
+ pte_root & ~DTE_FLAG_HAD);
+
+ /* Flush device DTE */
+ dev_table[dev_data->devid].data[0] = pte_root;
+ device_flush_dte(dev_data);
+ domain_flush = true;
+ }
+
+ /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */
+ if (domain_flush) {
+ amd_iommu_domain_flush_tlb_pde(pdomain);
+ amd_iommu_domain_flush_complete(pdomain);
+ }
+ pdomain->dirty_tracking = enable;
+ spin_unlock_irqrestore(&pdomain->lock, flags);
+
+ return 0;
+}
+
+static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct protection_domain *pdomain = to_pdomain(domain);
+ struct io_pgtable_ops *ops = &pdomain->iop.iop.ops;
+ unsigned long lflags;
+
+ if (!ops || !ops->read_and_clear_dirty)
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&pdomain->lock, lflags);
+ if (!pdomain->dirty_tracking && dirty->bitmap) {
+ spin_unlock_irqrestore(&pdomain->lock, lflags);
+ return -EINVAL;
+ }
+ spin_unlock_irqrestore(&pdomain->lock, lflags);
+
+ return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
+}
+
static void amd_iommu_get_resv_regions(struct device *dev,
struct list_head *head)
{
@@ -2461,9 +2596,15 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
return true;
}
+const struct iommu_dirty_ops amd_dirty_ops = {
+ .set_dirty_tracking = amd_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
+};
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
+ .domain_alloc_user = amd_iommu_domain_alloc_user,
.probe_device = amd_iommu_probe_device,
.release_device = amd_iommu_release_device,
.probe_finalize = amd_iommu_probe_finalize,
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 119d2c57a48e..012cd2541a68 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@ config INTEL_IOMMU
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
+ select IOMMUFD_DRIVER if IOMMUFD
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 7af3b8a4f2a0..5dabf081a779 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3685ba90ec88..d1037280abf7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -282,7 +282,6 @@ static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-static void device_block_translation(struct device *dev);
static void intel_iommu_domain_free(struct iommu_domain *domain);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
@@ -300,6 +299,7 @@ static int iommu_skip_te_disable;
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
+const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@@ -560,7 +560,7 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
}
/* Some capabilities may be different across iommus */
-static void domain_update_iommu_cap(struct dmar_domain *domain)
+void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
@@ -1778,8 +1778,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
return domain;
}
-static int domain_attach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
unsigned long ndomains;
@@ -1828,8 +1827,7 @@ err_unlock:
return ret;
}
-static void domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
@@ -2196,6 +2194,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
+ if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
+ pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
+ return -EINVAL;
+ }
+
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
@@ -3958,7 +3961,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
* all DMA requests without PASID from the device are blocked. If the page
* table has been set, clean up the data structures.
*/
-static void device_block_translation(struct device *dev)
+void device_block_translation(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
@@ -4058,14 +4061,62 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
+static struct iommu_domain *
+intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct intel_iommu *iommu = info->iommu;
+ struct iommu_domain *domain;
+
+ /* Must be NESTING domain */
+ if (parent) {
+ if (!nested_supported(iommu) || flags)
+ return ERR_PTR(-EOPNOTSUPP);
+ return intel_nested_domain_alloc(parent, user_data);
+ }
+
+ if (flags &
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (nested_parent && !nested_supported(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data || (dirty_tracking && !ssads_supported(iommu)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /*
+ * domain_alloc_user op needs to fully initialize a domain before
+ * return, so uses iommu_domain_alloc() here for simple.
+ */
+ domain = iommu_domain_alloc(dev->bus);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ if (nested_parent)
+ to_dmar_domain(domain)->nested_parent = true;
+
+ if (dirty_tracking) {
+ if (to_dmar_domain(domain)->use_first_level) {
+ iommu_domain_free(domain);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ domain->dirty_ops = &intel_dirty_ops;
+ }
+
+ return domain;
+}
+
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
if (domain != &si_domain->domain && domain != &blocking_domain)
domain_exit(to_dmar_domain(domain));
}
-static int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
@@ -4078,6 +4129,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
+ if (domain->dirty_ops && !ssads_supported(iommu))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -4332,6 +4386,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return ecap_sc_support(info->iommu->ecap);
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return ssads_supported(info->iommu);
default:
return false;
}
@@ -4729,6 +4785,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
+ if (domain->dirty_ops)
+ return -EINVAL;
+
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
@@ -4780,6 +4839,7 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
if (!vtd)
return ERR_PTR(-ENOMEM);
+ vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
vtd->cap_reg = iommu->cap;
vtd->ecap_reg = iommu->ecap;
*length = sizeof(*vtd);
@@ -4787,10 +4847,88 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
return vtd;
}
+static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ int ret;
+
+ spin_lock(&dmar_domain->lock);
+ if (dmar_domain->dirty_tracking == enable)
+ goto out_unlock;
+
+ list_for_each_entry(info, &dmar_domain->devices, link) {
+ ret = intel_pasid_setup_dirty_tracking(info->iommu,
+ info->domain, info->dev,
+ IOMMU_NO_PASID, enable);
+ if (ret)
+ goto err_unwind;
+ }
+
+ dmar_domain->dirty_tracking = enable;
+out_unlock:
+ spin_unlock(&dmar_domain->lock);
+
+ return 0;
+
+err_unwind:
+ list_for_each_entry(info, &dmar_domain->devices, link)
+ intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
+ info->dev, IOMMU_NO_PASID,
+ dmar_domain->dirty_tracking);
+ spin_unlock(&dmar_domain->lock);
+ return ret;
+}
+
+static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long end = iova + size - 1;
+ unsigned long pgsize;
+
+ /*
+ * IOMMUFD core calls into a dirty tracking disabled domain without an
+ * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+ * have occurred when we stopped dirty tracking. This ensures that we
+ * never inherit dirtied bits from a previous cycle.
+ */
+ if (!dmar_domain->dirty_tracking && dirty->bitmap)
+ return -EINVAL;
+
+ do {
+ struct dma_pte *pte;
+ int lvl = 0;
+
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
+ GFP_ATOMIC);
+ pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
+ if (!pte || !dma_pte_present(pte)) {
+ iova += pgsize;
+ continue;
+ }
+
+ if (dma_sl_pte_test_and_clear_dirty(pte, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
+const struct iommu_dirty_ops intel_dirty_ops = {
+ .set_dirty_tracking = intel_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
+};
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
+ .domain_alloc_user = intel_iommu_domain_alloc_user,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 7dac94f62b4e..d796d0d9b114 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+#include <uapi/linux/iommufd.h>
/*
* VT-d hardware uses 4KiB page size regardless of host page size.
@@ -48,6 +49,9 @@
#define DMA_FL_PTE_DIRTY BIT_ULL(6)
#define DMA_FL_PTE_XD BIT_ULL(63)
+#define DMA_SL_PTE_DIRTY_BIT 9
+#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
+
#define ADDR_WIDTH_5LEVEL (57)
#define ADDR_WIDTH_4LEVEL (48)
@@ -539,6 +543,10 @@ enum {
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
#define pasid_supported(iommu) (sm_supported(iommu) && \
ecap_pasid((iommu)->ecap))
+#define ssads_supported(iommu) (sm_supported(iommu) && \
+ ecap_slads((iommu)->ecap))
+#define nested_supported(iommu) (sm_supported(iommu) && \
+ ecap_nest((iommu)->ecap))
struct pasid_entry;
struct pasid_state_entry;
@@ -592,20 +600,45 @@ struct dmar_domain {
* otherwise, goes through the second
* level.
*/
+ u8 dirty_tracking:1; /* Dirty tracking is enabled */
+ u8 nested_parent:1; /* Has other domains nested on it */
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
+ union {
+ /* DMA remapping domain */
+ struct {
+ /* virtual address */
+ struct dma_pte *pgd;
+ /* max guest address width */
+ int gaw;
+ /*
+ * adjusted guest address width:
+ * 0: level 2 30-bit
+ * 1: level 3 39-bit
+ * 2: level 4 48-bit
+ * 3: level 5 57-bit
+ */
+ int agaw;
+ /* maximum mapped address */
+ u64 max_addr;
+ };
+
+ /* Nested user domain */
+ struct {
+ /* parent page table which the user domain is nested on */
+ struct dmar_domain *s2_domain;
+ /* user page table pointer (in GPA) */
+ unsigned long s1_pgtbl;
+ /* page table attributes */
+ struct iommu_hwpt_vtd_s1 s1_cfg;
+ };
+ };
struct iommu_domain domain; /* generic domain data structure for
iommu core */
@@ -781,6 +814,16 @@ static inline bool dma_pte_present(struct dma_pte *pte)
return (pte->val & 3) != 0;
}
+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+ unsigned long flags)
+{
+ if (flags & IOMMU_DIRTY_NO_CLEAR)
+ return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+
+ return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+ (unsigned long *)&pte->val);
+}
+
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
return (pte->val & DMA_PTE_LARGE_PAGE);
@@ -836,12 +879,21 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
+int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+void device_block_translation(struct device *dev);
+int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+void domain_update_iommu_cap(struct dmar_domain *domain);
+
int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);