summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/dma/idxd/init.c2
-rw-r--r--drivers/iommu/io-pgfault.c63
-rw-r--r--drivers/iommu/iommu-priv.h11
-rw-r--r--drivers/iommu/iommu-sva.c42
-rw-r--r--drivers/iommu/iommu.c185
-rw-r--r--drivers/iommu/iommufd/Makefile1
-rw-r--r--drivers/iommu/iommufd/device.c7
-rw-r--r--drivers/iommu/iommufd/fault.c433
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c38
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h80
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h8
-rw-r--r--drivers/iommu/iommufd/main.c6
-rw-r--r--drivers/iommu/iommufd/selftest.c64
-rw-r--r--include/linux/iommu.h41
-rw-r--r--include/uapi/linux/iommufd.h109
-rw-r--r--tools/testing/selftests/iommu/iommufd.c22
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h86
18 files changed, 1083 insertions, 117 deletions
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index a7295943fa22..385c488c9cd1 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd)
* DMA domain is owned by the driver, it should support all valid
* types such as DMA-FQ, identity, etc.
*/
- ret = iommu_attach_device_pasid(domain, dev, pasid);
+ ret = iommu_attach_device_pasid(domain, dev, pasid, NULL);
if (ret) {
dev_err(dev, "failed to attach device pasid %d, domain type %d",
pasid, domain->type);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 06d78fcc79fd..cd679c13752e 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group)
}
EXPORT_SYMBOL_GPL(iopf_free_group);
-static struct iommu_domain *get_domain_for_iopf(struct device *dev,
- struct iommu_fault *fault)
-{
- struct iommu_domain *domain;
-
- if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
- domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
- if (IS_ERR(domain))
- domain = NULL;
- } else {
- domain = iommu_get_domain_for_dev(dev);
- }
-
- if (!domain || !domain->iopf_handler) {
- dev_warn_ratelimited(dev,
- "iopf (pasid %d) without domain attached or handler installed\n",
- fault->prm.pasid);
-
- return NULL;
- }
-
- return domain;
-}
-
/* Non-last request of a group. Postpone until the last one. */
static int report_partial_fault(struct iommu_fault_param *fault_param,
struct iommu_fault *fault)
@@ -134,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
list_add(&group->pending_node, &iopf_param->faults);
mutex_unlock(&iopf_param->lock);
+ group->fault_count = list_count_nodes(&group->faults);
+
return group;
}
@@ -206,20 +184,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
if (group == &abort_group)
goto err_abort;
- group->domain = get_domain_for_iopf(dev, fault);
- if (!group->domain)
+ if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
+ group->attach_handle = iommu_attach_handle_get(dev->iommu_group,
+ fault->prm.pasid,
+ 0);
+ if (IS_ERR(group->attach_handle)) {
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+
+ if (!ops->user_pasid_table)
+ goto err_abort;
+
+ /*
+ * The iommu driver for this device supports user-
+ * managed PASID table. Therefore page faults for
+ * any PASID should go through the NESTING domain
+ * attached to the device RID.
+ */
+ group->attach_handle =
+ iommu_attach_handle_get(dev->iommu_group,
+ IOMMU_NO_PASID,
+ IOMMU_DOMAIN_NESTED);
+ if (IS_ERR(group->attach_handle))
+ goto err_abort;
+ }
+ } else {
+ group->attach_handle =
+ iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(group->attach_handle))
+ goto err_abort;
+ }
+
+ if (!group->attach_handle->domain->iopf_handler)
goto err_abort;
/*
* On success iopf_handler must call iopf_group_response() and
* iopf_free_group()
*/
- if (group->domain->iopf_handler(group))
+ if (group->attach_handle->domain->iopf_handler(group))
goto err_abort;
return;
err_abort:
+ dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
+ fault->prm.pasid);
iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
if (group == &abort_group)
__iopf_free_group(group);
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index 5f731d994803..c37801c32f33 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -28,4 +28,15 @@ void iommu_device_unregister_bus(struct iommu_device *iommu,
const struct bus_type *bus,
struct notifier_block *nb);
+struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group,
+ ioasid_t pasid,
+ unsigned int type);
+int iommu_attach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group,
+ struct iommu_attach_handle *handle);
+void iommu_detach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group);
+int iommu_replace_group_handle(struct iommu_group *group,
+ struct iommu_domain *new_domain,
+ struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 18a35e798b72..69cde094440e 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
}
iommu_mm->pasid = pasid;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
- INIT_LIST_HEAD(&iommu_mm->sva_handles);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
* initialization to iommu_mm fields. If it does, readers may see a
@@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
*/
struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
{
+ struct iommu_group *group = dev->iommu_group;
+ struct iommu_attach_handle *attach_handle;
struct iommu_mm_data *iommu_mm;
struct iommu_domain *domain;
struct iommu_sva *handle;
int ret;
+ if (!group)
+ return ERR_PTR(-ENODEV);
+
mutex_lock(&iommu_sva_lock);
/* Allocate mm->pasid if necessary. */
@@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_unlock;
}
- list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) {
- if (handle->dev == dev) {
- refcount_inc(&handle->users);
- mutex_unlock(&iommu_sva_lock);
- return handle;
+ /* A bond already exists, just take a reference`. */
+ attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA);
+ if (!IS_ERR(attach_handle)) {
+ handle = container_of(attach_handle, struct iommu_sva, handle);
+ if (attach_handle->domain->mm != mm) {
+ ret = -EBUSY;
+ goto out_unlock;
}
+ refcount_inc(&handle->users);
+ mutex_unlock(&iommu_sva_lock);
+ return handle;
+ }
+
+ if (PTR_ERR(attach_handle) != -ENOENT) {
+ ret = PTR_ERR(attach_handle);
+ goto out_unlock;
}
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
@@ -99,7 +113,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
/* Search for an existing domain. */
list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
+ &handle->handle);
if (!ret) {
domain->users++;
goto out;
@@ -113,7 +128,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_handle;
}
- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
+ &handle->handle);
if (ret)
goto out_free_domain;
domain->users = 1;
@@ -121,10 +137,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
out:
refcount_set(&handle->users, 1);
- list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
mutex_unlock(&iommu_sva_lock);
handle->dev = dev;
- handle->domain = domain;
return handle;
out_free_domain:
@@ -147,7 +161,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
*/
void iommu_sva_unbind_device(struct iommu_sva *handle)
{
- struct iommu_domain *domain = handle->domain;
+ struct iommu_domain *domain = handle->handle.domain;
struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm;
struct device *dev = handle->dev;
@@ -156,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
mutex_unlock(&iommu_sva_lock);
return;
}
- list_del(&handle->handle_item);
iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
if (--domain->users == 0) {
@@ -170,7 +183,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{
- struct iommu_domain *domain = handle->domain;
+ struct iommu_domain *domain = handle->handle.domain;
return mm_get_enqcmd_pasid(domain->mm);
}
@@ -259,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work)
if (status != IOMMU_PAGE_RESP_SUCCESS)
break;
- status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
+ status = iommu_sva_handle_mm(&iopf->fault,
+ group->attach_handle->domain->mm);
}
iopf_group_response(group, status);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9df7cc75c1bc..8484285fbaa8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @domain: the iommu domain.
* @dev: the attached device.
* @pasid: the pasid of the device.
+ * @handle: the attach handle.
*
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
- void *curr;
int ret;
if (!domain->ops->set_dev_pasid)
@@ -3382,11 +3383,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
}
- curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
- if (curr) {
- ret = xa_err(curr) ? : -EBUSY;
+ if (handle)
+ handle->domain = domain;
+
+ ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
+ if (ret)
goto out_unlock;
- }
ret = __iommu_set_group_pasid(domain, group, pasid);
if (ret)
@@ -3414,46 +3416,11 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid, domain);
- WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
+ xa_erase(&group->pasid_array, pasid);
mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
-/*
- * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
- * @dev: the queried device
- * @pasid: the pasid of the device
- * @type: matched domain type, 0 for any match
- *
- * This is a variant of iommu_get_domain_for_dev(). It returns the existing
- * domain attached to pasid of a device. Callers must hold a lock around this
- * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
- * type is being manipulated. This API does not internally resolve races with
- * attach/detach.
- *
- * Return: attached domain on success, NULL otherwise.
- */
-struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
- ioasid_t pasid,
- unsigned int type)
-{
- /* Caller must be a probed driver on dev */
- struct iommu_group *group = dev->iommu_group;
- struct iommu_domain *domain;
-
- if (!group)
- return NULL;
-
- xa_lock(&group->pasid_array);
- domain = xa_load(&group->pasid_array, pasid);
- if (type && domain && domain->type != type)
- domain = ERR_PTR(-EBUSY);
- xa_unlock(&group->pasid_array);
-
- return domain;
-}
-EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
-
ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
int ret;
@@ -3480,3 +3447,137 @@ void iommu_free_global_pasid(ioasid_t pasid)
ida_free(&iommu_global_pasid_ida, pasid);
}
EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
+
+/**
+ * iommu_attach_handle_get - Return the attach handle
+ * @group: the iommu group that domain was attached to
+ * @pasid: the pasid within the group
+ * @type: matched domain type, 0 for any match
+ *
+ * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
+ *
+ * Return the attach handle to the caller. The life cycle of an iommu attach
+ * handle is from the time when the domain is attached to the time when the
+ * domain is detached. Callers are required to synchronize the call of
+ * iommu_attach_handle_get() with domain attachment and detachment. The attach
+ * handle can only be used during its life cycle.
+ */
+struct iommu_attach_handle *
+iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
+{
+ struct iommu_attach_handle *handle;
+
+ xa_lock(&group->pasid_array);
+ handle = xa_load(&group->pasid_array, pasid);
+ if (!handle)
+ handle = ERR_PTR(-ENOENT);
+ else if (type && handle->domain->type != type)
+ handle = ERR_PTR(-EBUSY);
+ xa_unlock(&group->pasid_array);
+
+ return handle;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL);
+
+/**
+ * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
+ * @domain: IOMMU domain to attach
+ * @group: IOMMU group that will be attached
+ * @handle: attach handle
+ *
+ * Returns 0 on success and error code on failure.
+ *
+ * This is a variant of iommu_attach_group(). It allows the caller to provide
+ * an attach handle and use it when the domain is attached. This is currently
+ * used by IOMMUFD to deliver the I/O page faults.
+ */
+int iommu_attach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group,
+ struct iommu_attach_handle *handle)
+{
+ int ret;
+
+ if (handle)
+ handle->domain = domain;
+
+ mutex_lock(&group->mutex);
+ ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ if (ret)
+ goto err_unlock;
+
+ ret = __iommu_attach_group(domain, group);
+ if (ret)
+ goto err_erase;
+ mutex_unlock(&group->mutex);
+
+ return 0;
+err_erase:
+ xa_erase(&group->pasid_array, IOMMU_NO_PASID);
+err_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, IOMMUFD_INTERNAL);
+
+/**
+ * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
+ * @domain: IOMMU domain to attach
+ * @group: IOMMU group that will be attached
+ *
+ * Detach the specified IOMMU domain from the specified IOMMU group.
+ * It must be used in conjunction with iommu_attach_group_handle().
+ */
+void iommu_detach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group)
+{
+ mutex_lock(&group->mutex);
+ __iommu_group_set_core_domain(group);
+ xa_erase(&group->pasid_array, IOMMU_NO_PASID);
+ mutex_unlock(&group->mutex);
+}
+EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, IOMMUFD_INTERNAL);
+
+/**
+ * iommu_replace_group_handle - replace the domain that a group is attached to
+ * @group: IOMMU group that will be attached to the new domain
+ * @new_domain: new IOMMU domain to replace with
+ * @handle: attach handle
+ *
+ * This is a variant of iommu_group_replace_domain(). It allows the caller to
+ * provide an attach handle for the new domain and use it when the domain is
+ * attached.
+ */
+int iommu_replace_group_handle(struct iommu_group *group,
+ struct iommu_domain *new_domain,
+ struct iommu_attach_handle *handle)
+{
+ void *curr;
+ int ret;
+
+ if (!new_domain)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ if (handle) {
+ ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
+ if (ret)
+ goto err_unlock;
+ }
+
+ ret = __iommu_group_set_domain(group, new_domain);
+ if (ret)
+ goto err_release;
+
+ curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ WARN_ON(xa_is_err(curr));
+
+ mutex_unlock(&group->mutex);
+
+ return 0;
+err_release:
+ xa_release(&group->pasid_array, IOMMU_NO_PASID);
+err_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, IOMMUFD_INTERNAL);
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 34b446146961..cf4605962bea 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
device.o \
+ fault.o \
hw_pagetable.o \
io_pagetable.o \
ioas.o \
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 873630c111c1..9a7ec5997c61 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
refcount_inc(&idev->obj.users);
/* igroup refcount moves into iommufd_device */
idev->igroup = igroup;
+ mutex_init(&idev->iopf_lock);
/*
* If the caller fails after this success it must call
@@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* attachment.
*/
if (list_empty(&idev->igroup->device_list)) {
- rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+ rc = iommufd_hwpt_attach_device(hwpt, idev);
if (rc)
goto err_unresv;
idev->igroup->hwpt = hwpt;
@@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
mutex_lock(&idev->igroup->lock);
list_del(&idev->group_item);
if (list_empty(&idev->igroup->device_list)) {
- iommu_detach_group(hwpt->domain, idev->igroup->group);
+ iommufd_hwpt_detach_device(hwpt, idev);
idev->igroup->hwpt = NULL;
}
if (hwpt_is_paging(hwpt))
@@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev,
goto err_unlock;
}
- rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+ rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
if (rc)
goto err_unresv;
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
new file mode 100644
index 000000000000..54d6cd20a673
--- /dev/null
+++ b/drivers/iommu/iommufd/fault.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2024 Intel Corporation
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/iommufd.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <uapi/linux/iommufd.h>
+
+#include "../iommu-priv.h"
+#include "iommufd_private.h"
+
+static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+ struct device *dev = idev->dev;
+ int ret;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
+ return -EINVAL;
+
+ mutex_lock(&idev->iopf_lock);
+ /* Device iopf has already been on. */
+ if (++idev->iopf_enabled > 1) {
+ mutex_unlock(&idev->iopf_lock);
+ return 0;
+ }
+
+ ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
+ if (ret)
+ --idev->iopf_enabled;
+ mutex_unlock(&idev->iopf_lock);
+
+ return ret;
+}
+
+static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+ mutex_lock(&idev->iopf_lock);
+ if (!WARN_ON(idev->iopf_enabled == 0)) {
+ if (--idev->iopf_enabled == 0)
+ iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ }
+ mutex_unlock(&idev->iopf_lock);
+}
+
+static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommufd_attach_handle *handle;
+ int ret;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->idev = idev;
+ ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ if (ret)
+ kfree(handle);
+
+ return ret;
+}
+
+int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ int ret;
+
+ if (!hwpt->fault)
+ return -EINVAL;
+
+ ret = iommufd_fault_iopf_enable(idev);
+ if (ret)
+ return ret;
+
+ ret = __fault_domain_attach_dev(hwpt, idev);
+ if (ret)
+ iommufd_fault_iopf_disable(idev);
+
+ return ret;
+}
+
+static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle)
+{
+ struct iommufd_fault *fault = hwpt->fault;
+ struct iopf_group *group, *next;
+ unsigned long index;
+
+ if (!fault)
+ return;
+
+ mutex_lock(&fault->mutex);
+ list_for_each_entry_safe(group, next, &fault->deliver, node) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+
+ xa_for_each(&fault->response, index, group) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev)
+{
+ struct iommu_attach_handle *handle;
+
+ handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
+ if (!handle)
+ return NULL;
+
+ return to_iommufd_handle(handle);
+}
+
+void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommufd_attach_handle *handle;
+
+ handle = iommufd_device_get_attach_handle(idev);
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ iommufd_auto_response_faults(hwpt, handle);
+ iommufd_fault_iopf_disable(idev);
+ kfree(handle);
+}
+
+static int __fault_domain_replace_dev(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommufd_attach_handle *handle, *curr = NULL;
+ int ret;
+
+ if (old->fault)
+ curr = iommufd_device_get_attach_handle(idev);
+
+ if (hwpt->fault) {
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->handle.domain = hwpt->domain;
+ handle->idev = idev;
+ ret = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, &handle->handle);
+ } else {
+ ret = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, NULL);
+ }
+
+ if (!ret && curr) {
+ iommufd_auto_response_faults(old, curr);
+ kfree(curr);
+ }
+
+ return ret;
+}
+
+int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ bool iopf_off = !hwpt->fault && old->fault;
+ bool iopf_on = hwpt->fault && !old->fault;
+ int ret;
+
+ if (iopf_on) {
+ ret = iommufd_fault_iopf_enable(idev);
+ if (ret)
+ return ret;
+ }
+
+ ret = __fault_domain_replace_dev(idev, hwpt, old);
+ if (ret) {
+ if (iopf_on)
+ iommufd_fault_iopf_disable(idev);
+ return ret;
+ }
+
+ if (iopf_off)
+ iommufd_fault_iopf_disable(idev);
+
+ return 0;
+}
+
+void iommufd_fault_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
+ struct iopf_group *group, *next;
+
+ /*
+ * The iommufd object's reference count is zero at this point.
+ * We can be confident that no other threads are currently
+ * accessing this pointer. Therefore, acquiring the mutex here
+ * is unnecessary.
+ */
+ list_for_each_entry_safe(group, next, &fault->deliver, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+}
+
+static void iommufd_compose_fault_message(struct iommu_fault *fault,
+ struct iommu_hwpt_pgfault *hwpt_fault,
+ struct iommufd_device *idev,
+ u32 cookie)
+{
+ hwpt_fault->flags = fault->prm.flags;
+ hwpt_fault->dev_id = idev->obj.id;
+ hwpt_fault->pasid = fault->prm.pasid;
+ hwpt_fault->grpid = fault->prm.grpid;
+ hwpt_fault->perm = fault->prm.perm;
+ hwpt_fault->addr = fault->prm.addr;
+ hwpt_fault->length = 0;
+ hwpt_fault->cookie = cookie;
+}
+
+static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
+ struct iommufd_fault *fault = filep->private_data;
+ struct iommu_hwpt_pgfault data;
+ struct iommufd_device *idev;
+ struct iopf_group *group;
+ struct iopf_fault *iopf;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % fault_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (!list_empty(&fault->deliver) && count > done) {
+ group = list_first_entry(&fault->deliver,
+ struct iopf_group, node);
+
+ if (group->fault_count * fault_size > count - done)
+ break;
+
+ rc = xa_alloc(&fault->response, &group->cookie, group,
+ xa_limit_32b, GFP_KERNEL);
+ if (rc)
+ break;
+
+ idev = to_iommufd_handle(group->attach_handle)->idev;
+ list_for_each_entry(iopf, &group->faults, list) {
+ iommufd_compose_fault_message(&iopf->fault,
+ &data, idev,
+ group->cookie);
+ if (copy_to_user(buf + done, &data, fault_size)) {
+ xa_erase(&fault->response, group->cookie);
+ rc = -EFAULT;
+ break;
+ }
+ done += fault_size;
+ }
+
+ list_del(&group->node);
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t response_size = sizeof(struct iommu_hwpt_page_response);
+ struct iommufd_fault *fault = filep->private_data;
+ struct iommu_hwpt_page_response response;
+ struct iopf_group *group;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % response_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (count > done) {
+ rc = copy_from_user(&response, buf + done, response_size);
+ if (rc)
+ break;
+
+ group = xa_erase(&fault->response, response.cookie);
+ if (!group) {
+ rc = -EINVAL;
+ break;
+ }
+
+ iopf_group_response(group, response.code);
+ iopf_free_group(group);
+ done += response_size;
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+static __poll_t iommufd_fault_fops_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct iommufd_fault *fault = filep->private_data;
+ __poll_t pollflags = EPOLLOUT;
+
+ poll_wait(filep, &fault->wait_queue, wait);
+ mutex_lock(&fault->mutex);
+ if (!list_empty(&fault->deliver))
+ pollflags |= EPOLLIN | EPOLLRDNORM;
+ mutex_unlock(&fault->mutex);
+
+ return pollflags;
+}
+
+static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
+{
+ struct iommufd_fault *fault = filep->private_data;
+
+ refcount_dec(&fault->obj.users);
+ iommufd_ctx_put(fault->ictx);
+ return 0;
+}
+
+static const struct file_operations iommufd_fault_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .read = iommufd_fault_fops_read,
+ .write = iommufd_fault_fops_write,
+ .poll = iommufd_fault_fops_poll,
+ .release = iommufd_fault_fops_release,
+ .llseek = no_llseek,
+};
+
+int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_fault_alloc *cmd = ucmd->cmd;
+ struct iommufd_fault *fault;
+ struct file *filep;
+ int fdno;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
+ if (IS_ERR(fault))
+ return PTR_ERR(fault);
+
+ fault->ictx = ucmd->ictx;
+ INIT_LIST_HEAD(&fault->deliver);
+ xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
+ mutex_init(&fault->mutex);
+ init_waitqueue_head(&fault->wait_queue);
+
+ filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
+ fault, O_RDWR);
+ if (IS_ERR(filep)) {
+ rc = PTR_ERR(filep);
+ goto out_abort;
+ }
+
+ refcount_inc(&fault->obj.users);
+ iommufd_ctx_get(fault->ictx);
+ fault->filep = filep;
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_fput;
+ }
+
+ cmd->out_fault_id = fault->obj.id;
+ cmd->out_fault_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+ iommufd_object_finalize(ucmd->ictx, &fault->obj);
+
+ fd_install(fdno, fault->filep);
+
+ return 0;
+out_put_fdno:
+ put_unused_fd(fdno);
+out_fput:
+ fput(filep);
+ refcount_dec(&fault->obj.users);
+ iommufd_ctx_put(fault->ictx);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
+
+ return rc;
+}
+
+int iommufd_fault_iopf_handler(struct iopf_group *group)
+{
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_fault *fault;
+
+ hwpt = group->attach_handle->domain->fault_data;
+ fault = hwpt->fault;
+
+ mutex_lock(&fault->mutex);
+ list_add_tail(&group->node, &fault->deliver);
+ mutex_unlock(&fault->mutex);
+
+ wake_up_interruptible(&fault->wait_queue);
+
+ return 0;
+}
diff