diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 07:59:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 07:59:23 -0700 |
| commit | 320b164abb32db876866a4ff8c2cb710524ac6ea (patch) | |
| tree | 1f79119cde6e24c9f1d01fb1e51252bca7c4cdd5 /drivers/gpu/drm/amd/amdkfd | |
| parent | 0adb32858b0bddf4ada5f364a84ed60b196dbcda (diff) | |
| parent | 694f54f680f7fd8e9561928fbfc537d9afbc3d79 (diff) | |
| download | linux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.gz linux-320b164abb32db876866a4ff8c2cb710524ac6ea.tar.bz2 linux-320b164abb32db876866a4ff8c2cb710524ac6ea.zip | |
Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"Cannonlake and Vega12 support are probably the two major things. This
pull lacks nouveau, Ben had some unforseen leave and a few other
blockers so we'll see how things look or maybe leave it for this merge
window.
core:
- Device links to handle sound/gpu pm dependency
- Color encoding/range properties
- Plane clipping into plane check helper
- Backlight helpers
- DP TP4 + HBR3 helper support
amdgpu:
- Vega12 support
- Enable DC by default on all supported GPUs
- Powerplay restructuring and cleanup
- DC bandwidth calc updates
- DC backlight on pre-DCE11
- TTM backing store dropping support
- SR-IOV fixes
- Adding "wattman" like functionality
- DC crc support
- Improved DC dual-link handling
amdkfd:
- GPUVM support for dGPU
- KFD events for dGPU
- Enable PCIe atomics for dGPUs
- HSA process eviction support
- Live-lock fixes for process eviction
- VM page table allocation fix for large-bar systems
panel:
- Raydium RM68200
- AUO G104SN02 V2
- KEO TX31D200VM0BAA
- ARM Versatile panels
i915:
- Cannonlake support enabled
- AUX-F port support added
- Icelake base enabling until internal milestone of forcewake support
- Query uAPI interface (used for GPU topology information currently)
- Compressed framebuffer support for sprites
- kmem cache shrinking when GPU is idle
- Avoid boosting GPU when waited item is being processed already
- Avoid retraining LSPCON link unnecessarily
- Decrease request signaling latency
- Deprecation of I915_SET_COLORKEY_NONE
- Kerneldoc and compiler warning cleanup for upcoming CI enforcements
- Full range ycbcr toggling
- HDCP support
i915/gvt:
- Big refactor for shadow ppgtt
- KBL context save/restore via LRI cmd (Weinan)
- Properly unmap dma for guest page (Changbin)
vmwgfx:
- Lots of various improvements
etnaviv:
- Use the drm gpu scheduler
- prep work for GC7000L support
vc4:
- fix alpha blending
- Expose perf counters to userspace
pl111:
- Bandwidth checking/limiting
- Versatile panel support
sun4i:
- A83T HDMI support
- A80 support
- YUV plane support
- H3/H5 HDMI support
omapdrm:
- HPD support for DVI connector
- remove lots of static variables
msm:
- DSI updates from 10nm / SDM845
- fix for race condition with a3xx/a4xx fence completion irq
- some refactoring/prep work for eventual a6xx support (ie. when we
have a userspace)
- a5xx debugfs enhancements
- some mdp5 fixes/cleanups to prepare for eventually merging
writeback
- support (ie. when we have a userspace)
tegra:
- mmap() fixes for fbdev devices
- Overlay plane for hw cursor fix
- dma-buf cache maintenance support
mali-dp:
- YUV->RGB conversion support
rockchip:
- rk3399/chromebook fixes and improvements
rcar-du:
- LVDS support move to drm bridge
- DT bindings for R8A77995
- Driver/DT support for R8A77970
tilcdc:
- DRM panel support"
* tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux: (1646 commits)
drm/i915: Fix hibernation with ACPI S0 target state
drm/i915/execlists: Use a locked clear_bit() for synchronisation with interrupt
drm/i915: Specify which engines to reset following semaphore/event lockups
drm/i915/dp: Write to SET_POWER dpcd to enable MST hub.
drm/amdkfd: Use ordered workqueue to restore processes
drm/amdgpu: Fix acquiring VM on large-BAR systems
drm/amd/pp: clean header file hwmgr.h
drm/amd/pp: use mlck_table.count for array loop index limit
drm: Fix uabi regression by allowing garbage mode->type from userspace
drm/amdgpu: Add an ATPX quirk for hybrid laptop
drm/amdgpu: fix spelling mistake: "asssert" -> "assert"
drm/amd/pp: Add new asic support in pp_psm.c
drm/amd/pp: Clean up powerplay code on Vega12
drm/amd/pp: Add smu irq handlers for legacy asics
drm/amd/pp: Fix set wrong temperature range on smu7
drm/amdgpu: Don't change preferred domian when fallback GTT v5
drm/vmwgfx: Bump version patchlevel and date
drm/vmwgfx: use monotonic event timestamps
drm/vmwgfx: Unpin the screen object backup buffer when not used
drm/vmwgfx: Stricter count of legacy surface device resources
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
25 files changed, 2538 insertions, 326 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a2945bd2b..ed2f06c9f346 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,7 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && X86_64 + imply AMD_IOMMU_V2 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a317e76ffb5e..0d0242240c47 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o +ifneq ($(CONFIG_AMD_IOMMU_V2),) +amdkfd-y += kfd_iommu.o +endif + amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 62c3d9cd6ef1..cd679cf1fd30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> +#include <linux/file.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, return 0; } +static int kfd_ioctl_get_process_apertures_new(struct file *filp, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_process_apertures_new_args *args = data; + struct kfd_process_device_apertures *pa; + struct kfd_process_device *pdd; + uint32_t nodes = 0; + int ret; + + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + + if (args->num_of_nodes == 0) { + /* Return number of nodes, so that user space can alloacate + * sufficient memory + */ + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) + goto out_unlock; + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + args->num_of_nodes++; + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd); + + goto out_unlock; + } + + /* Fill in process-aperture information for all available + * nodes, but not more than args->num_of_nodes as that is + * the amount of memory allocated by user + */ + pa = kzalloc((sizeof(struct kfd_process_device_apertures) * + args->num_of_nodes), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) { + args->num_of_nodes = 0; + kfree(pa); + goto out_unlock; + } + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + pa[nodes].gpu_id = pdd->dev->id; + pa[nodes].lds_base = pdd->lds_base; + pa[nodes].lds_limit = pdd->lds_limit; + pa[nodes].gpuvm_base = pdd->gpuvm_base; + pa[nodes].gpuvm_limit = pdd->gpuvm_limit; + pa[nodes].scratch_base = pdd->scratch_base; + pa[nodes].scratch_limit = pdd->scratch_limit; + + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + nodes++; + + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd && (nodes < args->num_of_nodes)); + mutex_unlock(&p->mutex); + + args->num_of_nodes = nodes; + ret = copy_to_user( + (void __user *)args->kfd_process_device_apertures_ptr, + pa, + (nodes * sizeof(struct kfd_process_device_apertures))); + kfree(pa); + return ret ? -EFAULT : 0; + +out_unlock: + mutex_unlock(&p->mutex); + return 0; +} + static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_create_event_args *args = data; int err; + /* For dGPUs the event page is allocated in user mode. The + * handle is passed to KFD with the first call to this IOCTL + * through the event_page_offset field. + */ + if (args->event_page_offset) { + struct kfd_dev *kfd; + struct kfd_process_device *pdd; + void *mem, *kern_addr; + uint64_t size; + + if (p->signal_page) { + pr_err("Event page is already set\n"); + return -EINVAL; + } + + kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); + if (!kfd) { + pr_err("Getting device by id failed in %s\n", __func__); + return -EINVAL; + } + + mutex_lock(&p->mutex); + pdd = kfd_bind_process_to_device(kfd, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto out_unlock; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->event_page_offset)); + if (!mem) { + pr_err("Can't find BO, offset is 0x%llx\n", + args->event_page_offset); + err = -EINVAL; + goto out_unlock; + } + mutex_unlock(&p->mutex); + + err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, + mem, &kern_addr, &size); + if (err) { + pr_err("Failed to map event page to kernel\n"); + return err; + } + + err = kfd_event_page_set(p, kern_addr, size); + if (err) { + pr_err("Failed to set event page\n"); + return err; + } + } + err = kfd_event_create(filp, p, args->event_type, args->auto_reset != 0, args->node_id, &args->event_id, &args->event_trigger_data, @@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, &args->event_slot_index); return err; + +out_unlock: + mutex_unlock(&p->mutex); + return err; } static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, @@ -901,7 +1049,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && + pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); @@ -954,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep, return 0; } +static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_acquire_vm_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + struct file *drm_file; + int ret; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + drm_file = fget(args->drm_fd); + if (!drm_file) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + ret = -EINVAL; + goto err_unlock; + } + + if (pdd->drm_file) { + ret = pdd->drm_file == drm_file ? 0 : -EBUSY; + goto err_unlock; + } + + ret = kfd_process_device_init_vm(pdd, drm_file); + if (ret) + goto err_unlock; + /* On success, the PDD keeps the drm_file reference */ + mutex_unlock(&p->mutex); + + return 0; + +err_unlock: + mutex_unlock(&p->mutex); + fput(drm_file); + return ret; +} + +bool kfd_dev_is_large_bar(struct kfd_dev *dev) +{ + struct kfd_local_mem_info mem_info; + + if (debug_largebar) { + pr_debug("Simulate large-bar allocation on non large-bar machine\n"); + return true; + } + + if (dev->device_info->needs_iommu_device) + return false; + + dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); + if (mem_info.local_mem_size_private == 0 && + mem_info.local_mem_size_public > 0) + return true; + return false; +} + +static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int idr_handle; + long err; + uint64_t offset = args->mmap_offset; + uint32_t flags = args->flags; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && + (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + !kfd_dev_is_large_bar(dev)) { + pr_err("Alloc host visible vram on small bar is not allowed\n"); + return -EINVAL; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_unlock; + } + + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, + pdd->vm, (struct kgd_mem **) &mem, &offset, + flags); + + if (err) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; + } + + mutex_unlock(&p->mutex); + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + args->mmap_offset = offset; + + return 0; + +err_free: + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); +err_unlock: + mutex_unlock(&p->mutex); + return err; +} + +static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_free_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int ret; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + ret = -EINVAL; + goto err_unlock; + } + + mem = kfd_process_device_translate_handle( + pdd, GET_IDR_HANDLE(args->handle)); + if (!mem) { + ret = -EINVAL; + goto err_unlock; + } + + ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. + */ + if (!ret) + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + +err_unlock: + mutex_unlock(&p->mutex); + return ret; +} + +static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_map_memory_to_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + int i; + uint32_t *devices_arr = NULL; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + pr_debug("Getting device by id failed for 0x%x\n", + devices_arr[i]); + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_bind_process_to_device(peer, p); + if (IS_ERR(peer_pdd)) { + err = PTR_ERR(peer_pdd); + goto get_mem_obj_from_handle_failed; + } + err = peer->kfd2kgd->map_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to map to gpu %d/%d\n", + i, args->n_devices); + goto map_memory_to_gpu_failed; + } + args->n_success = i+1; + } + + mutex_unlock(&p->mutex); + + err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd); + } + + kfree(devices_arr); + + return err; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +map_memory_to_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: +sync_memory_failed: + kfree(devices_arr); + + return err; +} + +static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + uint32_t *devices_arr = NULL, i; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_get_process_device_data(peer, p); + if (!peer_pdd) { + err = -ENODEV; + goto get_mem_obj_from_handle_failed; + } + err = dev->kfd2kgd->unmap_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to unmap from gpu %d/%d\n", + i, args->n_devices); + goto unmap_memory_from_gpu_failed; + } + args->n_success = i+1; + } + kfree(devices_arr); + + mutex_unlock(&p->mutex); + + return 0; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +unmap_memory_from_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: + kfree(devices_arr); + return err; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1016,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, kfd_ioctl_set_trap_handler, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + kfd_ioctl_get_process_apertures_new, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, + kfd_ioctl_acquire_vm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, + kfd_ioctl_alloc_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, + kfd_ioctl_free_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, + kfd_ioctl_map_memory_to_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, + kfd_ioctl_unmap_memory_from_gpu, 0), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2bc2816767a7..4f126ef6139b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -22,10 +22,10 @@ #include <linux/pci.h> #include <linux/acpi.h> -#include <linux/amd-iommu.h> #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" +#include "kfd_iommu.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) crat_table->length = sizeof(struct crat_header); status = acpi_get_table("DSDT", 0, &acpi_table); - if (status == AE_NOT_FOUND) + if (status != AE_OK) pr_warn("DSDT table not found for OEM information\n"); else { crat_table->oem_revision = acpi_table->revision; @@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_computeunit *cu; struct kfd_cu_info cu_info; - struct amd_iommu_device_info iommu_info; int avail_size = *size; uint32_t total_num_of_cu; int num_of_cache_entries = 0; int cache_mem_filled = 0; int ret = 0; - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; struct kfd_local_mem_info local_mem_info; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) @@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - iommu_info.flags = 0; - if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { - if ((iommu_info.flags & required_iommu_flags) == - required_iommu_flags) - cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; - } + if (!kfd_iommu_check_device(kdev)) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; crat_table->total_entries++; @@ -1125,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); + if (debug_largebar) + local_mem_info.local_mem_size_private = 0; + if (local_mem_info.local_mem_size_private == 0) ret = kfd_fill_gpu_memory_affinity(&avail_size, kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7bda6b..9d4af961c5d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -33,6 +33,7 @@ #include "kfd_pm4_headers_diq.h" #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" static DEFINE_MUTEX(kfd_dbgmgr_mutex); @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) } /* get actual type of DBGDevice cpsch or not */ - if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) type = DBGDEV_TYPE_NODIQ; kfd_dbgdev_init(new_buff->dbgdev, pdev, type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a08de3..3346699960dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include <linux/amd-iommu.h> +#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> @@ -28,9 +30,12 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler_gfx8.asm" +#include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 +static atomic_t kfd_device_suspended = ATOMIC_INIT(0); +#ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; +#endif + +static const struct kfd_device_info hawaii_device_info = { + .asic_family = CHIP_HAWAII, + .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info tonga_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info tonga_vf_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info fiji_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cws |
