summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 07:59:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 07:59:23 -0700
commit320b164abb32db876866a4ff8c2cb710524ac6ea (patch)
tree1f79119cde6e24c9f1d01fb1e51252bca7c4cdd5 /drivers/gpu/drm/amd/amdkfd
parent0adb32858b0bddf4ada5f364a84ed60b196dbcda (diff)
parent694f54f680f7fd8e9561928fbfc537d9afbc3d79 (diff)
downloadlinux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.gz
linux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.bz2
linux-320b164abb32db876866a4ff8c2cb710524ac6ea.zip
Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "Cannonlake and Vega12 support are probably the two major things. This pull lacks nouveau, Ben had some unforseen leave and a few other blockers so we'll see how things look or maybe leave it for this merge window. core: - Device links to handle sound/gpu pm dependency - Color encoding/range properties - Plane clipping into plane check helper - Backlight helpers - DP TP4 + HBR3 helper support amdgpu: - Vega12 support - Enable DC by default on all supported GPUs - Powerplay restructuring and cleanup - DC bandwidth calc updates - DC backlight on pre-DCE11 - TTM backing store dropping support - SR-IOV fixes - Adding "wattman" like functionality - DC crc support - Improved DC dual-link handling amdkfd: - GPUVM support for dGPU - KFD events for dGPU - Enable PCIe atomics for dGPUs - HSA process eviction support - Live-lock fixes for process eviction - VM page table allocation fix for large-bar systems panel: - Raydium RM68200 - AUO G104SN02 V2 - KEO TX31D200VM0BAA - ARM Versatile panels i915: - Cannonlake support enabled - AUX-F port support added - Icelake base enabling until internal milestone of forcewake support - Query uAPI interface (used for GPU topology information currently) - Compressed framebuffer support for sprites - kmem cache shrinking when GPU is idle - Avoid boosting GPU when waited item is being processed already - Avoid retraining LSPCON link unnecessarily - Decrease request signaling latency - Deprecation of I915_SET_COLORKEY_NONE - Kerneldoc and compiler warning cleanup for upcoming CI enforcements - Full range ycbcr toggling - HDCP support i915/gvt: - Big refactor for shadow ppgtt - KBL context save/restore via LRI cmd (Weinan) - Properly unmap dma for guest page (Changbin) vmwgfx: - Lots of various improvements etnaviv: - Use the drm gpu scheduler - prep work for GC7000L support vc4: - fix alpha blending - Expose perf counters to userspace pl111: - Bandwidth checking/limiting - Versatile panel support sun4i: - A83T HDMI support - A80 support - YUV plane support - H3/H5 HDMI support omapdrm: - HPD support for DVI connector - remove lots of static variables msm: - DSI updates from 10nm / SDM845 - fix for race condition with a3xx/a4xx fence completion irq - some refactoring/prep work for eventual a6xx support (ie. when we have a userspace) - a5xx debugfs enhancements - some mdp5 fixes/cleanups to prepare for eventually merging writeback - support (ie. when we have a userspace) tegra: - mmap() fixes for fbdev devices - Overlay plane for hw cursor fix - dma-buf cache maintenance support mali-dp: - YUV->RGB conversion support rockchip: - rk3399/chromebook fixes and improvements rcar-du: - LVDS support move to drm bridge - DT bindings for R8A77995 - Driver/DT support for R8A77970 tilcdc: - DRM panel support" * tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux: (1646 commits) drm/i915: Fix hibernation with ACPI S0 target state drm/i915/execlists: Use a locked clear_bit() for synchronisation with interrupt drm/i915: Specify which engines to reset following semaphore/event lockups drm/i915/dp: Write to SET_POWER dpcd to enable MST hub. drm/amdkfd: Use ordered workqueue to restore processes drm/amdgpu: Fix acquiring VM on large-BAR systems drm/amd/pp: clean header file hwmgr.h drm/amd/pp: use mlck_table.count for array loop index limit drm: Fix uabi regression by allowing garbage mode->type from userspace drm/amdgpu: Add an ATPX quirk for hybrid laptop drm/amdgpu: fix spelling mistake: "asssert" -> "assert" drm/amd/pp: Add new asic support in pp_psm.c drm/amd/pp: Clean up powerplay code on Vega12 drm/amd/pp: Add smu irq handlers for legacy asics drm/amd/pp: Fix set wrong temperature range on smu7 drm/amdgpu: Don't change preferred domian when fallback GTT v5 drm/vmwgfx: Bump version patchlevel and date drm/vmwgfx: use monotonic event timestamps drm/vmwgfx: Unpin the screen object backup buffer when not used drm/vmwgfx: Stricter count of legacy surface device resources ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c535
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c356
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c323
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c93
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c59
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c357
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h78
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c44
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c37
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h101
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c664
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h7
25 files changed, 2538 insertions, 326 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index bc5a2945bd2b..ed2f06c9f346 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,7 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
+ depends on DRM_AMDGPU && X86_64
+ imply AMD_IOMMU_V2
help
Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index a317e76ffb5e..0d0242240c47 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+ifneq ($(CONFIG_AMD_IOMMU_V2),)
+amdkfd-y += kfd_iommu.o
+endif
+
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 62c3d9cd6ef1..cd679cf1fd30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
+#include <linux/file.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
return 0;
}
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_process_apertures_new_args *args = data;
+ struct kfd_process_device_apertures *pa;
+ struct kfd_process_device *pdd;
+ uint32_t nodes = 0;
+ int ret;
+
+ dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+ if (args->num_of_nodes == 0) {
+ /* Return number of nodes, so that user space can alloacate
+ * sufficient memory
+ */
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p))
+ goto out_unlock;
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ args->num_of_nodes++;
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd);
+
+ goto out_unlock;
+ }
+
+ /* Fill in process-aperture information for all available
+ * nodes, but not more than args->num_of_nodes as that is
+ * the amount of memory allocated by user
+ */
+ pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+ args->num_of_nodes), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ mutex_lock(&p->mutex);
+
+ if (!kfd_has_process_device_data(p)) {
+ args->num_of_nodes = 0;
+ kfree(pa);
+ goto out_unlock;
+ }
+
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+ pa[nodes].gpu_id = pdd->dev->id;
+ pa[nodes].lds_base = pdd->lds_base;
+ pa[nodes].lds_limit = pdd->lds_limit;
+ pa[nodes].gpuvm_base = pdd->gpuvm_base;
+ pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+ pa[nodes].scratch_base = pdd->scratch_base;
+ pa[nodes].scratch_limit = pdd->scratch_limit;
+
+ dev_dbg(kfd_device,
+ "gpu id %u\n", pdd->dev->id);
+ dev_dbg(kfd_device,
+ "lds_base %llX\n", pdd->lds_base);
+ dev_dbg(kfd_device,
+ "lds_limit %llX\n", pdd->lds_limit);
+ dev_dbg(kfd_device,
+ "gpuvm_base %llX\n", pdd->gpuvm_base);
+ dev_dbg(kfd_device,
+ "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+ dev_dbg(kfd_device,
+ "scratch_base %llX\n", pdd->scratch_base);
+ dev_dbg(kfd_device,
+ "scratch_limit %llX\n", pdd->scratch_limit);
+ nodes++;
+
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd && (nodes < args->num_of_nodes));
+ mutex_unlock(&p->mutex);
+
+ args->num_of_nodes = nodes;
+ ret = copy_to_user(
+ (void __user *)args->kfd_process_device_apertures_ptr,
+ pa,
+ (nodes * sizeof(struct kfd_process_device_apertures)));
+ kfree(pa);
+ return ret ? -EFAULT : 0;
+
+out_unlock:
+ mutex_unlock(&p->mutex);
+ return 0;
+}
+
static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_event_args *args = data;
int err;
+ /* For dGPUs the event page is allocated in user mode. The
+ * handle is passed to KFD with the first call to this IOCTL
+ * through the event_page_offset field.
+ */
+ if (args->event_page_offset) {
+ struct kfd_dev *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
+ if (!kfd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto out_unlock;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n",
+ args->event_page_offset);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ mutex_unlock(&p->mutex);
+
+ err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
+ mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ return err;
+ }
+ }
+
err = kfd_event_create(filp, p, args->event_type,
args->auto_reset != 0, args->node_id,
&args->event_id, &args->event_trigger_data,
@@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_slot_index);
return err;
+
+out_unlock:
+ mutex_unlock(&p->mutex);
+ return err;
}
static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
@@ -901,7 +1049,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
mutex_unlock(&p->mutex);
- if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+ if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
+ pdd->qpd.vmid != 0)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
@@ -954,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
return 0;
}
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+ void *data)
+{
+ struct kfd_ioctl_acquire_vm_args *args = data;
+ struct kfd_process_device *pdd;
+ struct kfd_dev *dev;
+ struct file *drm_file;
+ int ret;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ drm_file = fget(args->drm_fd);
+ if (!drm_file)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (pdd->drm_file) {
+ ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_device_init_vm(pdd, drm_file);
+ if (ret)
+ goto err_unlock;
+ /* On success, the PDD keeps the drm_file reference */
+ mutex_unlock(&p->mutex);
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&p->mutex);
+ fput(drm_file);
+ return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+{
+ struct kfd_local_mem_info mem_info;
+
+ if (debug_largebar) {
+ pr_debug("Simulate large-bar allocation on non large-bar machine\n");
+ return true;
+ }
+
+ if (dev->device_info->needs_iommu_device)
+ return false;
+
+ dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
+ if (mem_info.local_mem_size_private == 0 &&
+ mem_info.local_mem_size_public > 0)
+ return true;
+ return false;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+ struct kfd_process_device *pdd;
+ void *mem;
+ struct kfd_dev *dev;
+ int idr_handle;
+ long err;
+ uint64_t offset = args->mmap_offset;
+ uint32_t flags = args->flags;
+
+ if (args->size == 0)
+ return -EINVAL;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+ (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+ !kfd_dev_is_large_bar(dev)) {
+ pr_err("Alloc host visible vram on small bar is not allowed\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto err_unlock;
+ }
+
+ err = dev->kfd2kgd->alloc_memory_of_gpu(
+ dev->kgd, args->va_addr, args->size,
+ pdd->vm, (struct kgd_mem **) &mem, &offset,
+ flags);
+
+ if (err)
+ goto err_unlock;
+
+ idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+ if (idr_handle < 0) {
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ mutex_unlock(&p->mutex);
+
+ args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+ args->mmap_offset = offset;
+
+ return 0;
+
+err_free:
+ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+ mutex_unlock(&p->mutex);
+ return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+ struct kfd_process_device *pdd;
+ void *mem;
+ struct kfd_dev *dev;
+ int ret;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ mem = kfd_process_device_translate_handle(
+ pdd, GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+
+ /* If freeing the buffer failed, leave the handle in place for
+ * clean-up during process tear-down.
+ */
+ if (!ret)
+ kfd_process_device_remove_obj_handle(
+ pdd, GET_IDR_HANDLE(args->handle));
+
+err_unlock:
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+ struct kfd_process_device *pdd, *peer_pdd;
+ void *mem;
+ struct kfd_dev *dev, *peer;
+ long err = 0;
+ int i;
+ uint32_t *devices_arr = NULL;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ if (!args->n_devices) {
+ pr_debug("Device IDs array empty\n");
+ return -EINVAL;
+ }
+ if (args->n_success > args->n_devices) {
+ pr_debug("n_success exceeds n_devices\n");
+ return -EINVAL;
+ }
+
+ devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+ GFP_KERNEL);
+ if (!devices_arr)
+ return -ENOMEM;
+
+ err = copy_from_user(devices_arr,
+ (void __user *)args->device_ids_array_ptr,
+ args->n_devices * sizeof(*devices_arr));
+ if (err != 0) {
+ err = -EFAULT;
+ goto copy_from_user_failed;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto bind_process_to_device_failed;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ err = -ENOMEM;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ for (i = args->n_success; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (!peer) {
+ pr_debug("Getting device by id failed for 0x%x\n",
+ devices_arr[i]);
+ err = -EINVAL;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd)) {
+ err = PTR_ERR(peer_pdd);
+ goto get_mem_obj_from_handle_failed;
+ }
+ err = peer->kfd2kgd->map_memory_to_gpu(
+ peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ if (err) {
+ pr_err("Failed to map to gpu %d/%d\n",
+ i, args->n_devices);
+ goto map_memory_to_gpu_failed;
+ }
+ args->n_success = i+1;
+ }
+
+ mutex_unlock(&p->mutex);
+
+ err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ if (err) {
+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ goto sync_memory_failed;
+ }
+
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd);
+ }
+
+ kfree(devices_arr);
+
+ return err;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+ mutex_unlock(&p->mutex);
+copy_from_user_failed:
+sync_memory_failed:
+ kfree(devices_arr);
+
+ return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+ struct kfd_process_device *pdd, *peer_pdd;
+ void *mem;
+ struct kfd_dev *dev, *peer;
+ long err = 0;
+ uint32_t *devices_arr = NULL, i;
+
+ dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+ if (!dev)
+ return -EINVAL;
+
+ if (!args->n_devices) {
+ pr_debug("Device IDs array empty\n");
+ return -EINVAL;
+ }
+ if (args->n_success > args->n_devices) {
+ pr_debug("n_success exceeds n_devices\n");
+ return -EINVAL;
+ }
+
+ devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+ GFP_KERNEL);
+ if (!devices_arr)
+ return -ENOMEM;
+
+ err = copy_from_user(devices_arr,
+ (void __user *)args->device_ids_array_ptr,
+ args->n_devices * sizeof(*devices_arr));
+ if (err != 0) {
+ err = -EFAULT;
+ goto copy_from_user_failed;
+ }
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ err = PTR_ERR(pdd);
+ goto bind_process_to_device_failed;
+ }
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(args->handle));
+ if (!mem) {
+ err = -ENOMEM;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ for (i = args->n_success; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (!peer) {
+ err = -EINVAL;
+ goto get_mem_obj_from_handle_failed;
+ }
+
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (!peer_pdd) {
+ err = -ENODEV;
+ goto get_mem_obj_from_handle_failed;
+ }
+ err = dev->kfd2kgd->unmap_memory_to_gpu(
+ peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+ if (err) {
+ pr_err("Failed to unmap from gpu %d/%d\n",
+ i, args->n_devices);
+ goto unmap_memory_from_gpu_failed;
+ }
+ args->n_success = i+1;
+ }
+ kfree(devices_arr);
+
+ mutex_unlock(&p->mutex);
+
+ return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+ mutex_unlock(&p->mutex);
+copy_from_user_failed:
+ kfree(devices_arr);
+ return err;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1016,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
kfd_ioctl_set_trap_handler, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+ kfd_ioctl_get_process_apertures_new, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+ kfd_ioctl_acquire_vm, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+ kfd_ioctl_alloc_memory_of_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+ kfd_ioctl_free_memory_of_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+ kfd_ioctl_map_memory_to_gpu, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+ kfd_ioctl_unmap_memory_from_gpu, 0),
+
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2bc2816767a7..4f126ef6139b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -22,10 +22,10 @@
#include <linux/pci.h>
#include <linux/acpi.h>
-#include <linux/amd-iommu.h>
#include "kfd_crat.h"
#include "kfd_priv.h"
#include "kfd_topology.h"
+#include "kfd_iommu.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
crat_table->length = sizeof(struct crat_header);
status = acpi_get_table("DSDT", 0, &acpi_table);
- if (status == AE_NOT_FOUND)
+ if (status != AE_OK)
pr_warn("DSDT table not found for OEM information\n");
else {
crat_table->oem_revision = acpi_table->revision;
@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_generic *sub_type_hdr;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
- struct amd_iommu_device_info iommu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
int ret = 0;
- const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
- AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
- AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
- iommu_info.flags = 0;
- if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
- if ((iommu_info.flags & required_iommu_flags) ==
- required_iommu_flags)
- cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
- }
+ if (!kfd_iommu_check_device(kdev))
+ cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
@@ -1125,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
+ if (debug_largebar)
+ local_mem_info.local_mem_size_private = 0;
+
if (local_mem_info.local_mem_size_private == 0)
ret = kfd_fill_gpu_memory_affinity(&avail_size,
kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 3da25f7bda6b..9d4af961c5d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -33,6 +33,7 @@
#include "kfd_pm4_headers_diq.h"
#include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
static DEFINE_MUTEX(kfd_dbgmgr_mutex);
@@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
}
/* get actual type of DBGDevice cpsch or not */
- if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
type = DBGDEV_TYPE_NODIQ;
kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a8fa33a08de3..3346699960dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#include <linux/amd-iommu.h>
+#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
@@ -28,9 +30,12 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler_gfx8.asm"
+#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
+static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
+#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
@@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = {
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false,
+ .needs_iommu_device = true,
+ .needs_pci_atomics = false,
};
static const struct kfd_device_info carrizo_device_info = {
@@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = {
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
+ .needs_iommu_device = true,
+ .needs_pci_atomics = false,
};
+#endif
+
+static const struct kfd_device_info hawaii_device_info = {
+ .asic_family = CHIP_HAWAII,
+ .max_pasid_bits = 16,
+ /* max num of queues for KV.TODO should be a dynamic value */
+ .max_no_of_hqd = 24,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = false,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info tonga_device_info = {
+ .asic_family = CHIP_TONGA,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = false,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+};
+
+static const struct kfd_device_info tonga_vf_device_info = {
+ .asic_family = CHIP_TONGA,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = false,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info fiji_device_info = {
+ .asic_family = CHIP_FIJI,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cws