diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 91 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 66 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1506 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 |
9 files changed, 1741 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 5dd317579e77..8522c2ea1f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -130,6 +130,7 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3ec4bada61d7..5a881107a15a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -58,6 +58,7 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + amdgpu_amdkfd_gpuvm_init_mem_limits(); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d1bab32622df..05a228d60241 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,15 +28,41 @@ #include <linux/types.h> #include <linux/mmu_context.h> #include <kgd_kfd_interface.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include "amdgpu_sync.h" +#include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; struct amdgpu_device; +struct kfd_bo_va_list { + struct list_head bo_list; + struct amdgpu_bo_va *bo_va; + void *kgd_dev; + bool is_mapped; + uint64_t va; + uint64_t pte_flags; +}; + struct kgd_mem { + struct mutex lock; struct amdgpu_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + struct list_head bo_va_list; + /* protected by amdkfd_process_info.lock */ + struct ttm_validate_buffer validate_list; + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; + uint64_t va; + + uint32_t mapping_flags; + + struct amdkfd_process_info *process_info; + + struct amdgpu_sync sync; + + bool aql_queue; }; /* KFD Memory Eviction */ @@ -52,6 +78,41 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ + struct list_head vm_list_head; + /* List head for all KFD BOs that belong to a KFD process. */ + struct list_head kfd_bo_list; + /* Lock to protect kfd_bo_list */ + struct mutex lock; + + /* Number of VMs */ + unsigned int n_vms; + /* Eviction Fence */ + struct amdgpu_amdkfd_fence *eviction_fence; +}; + +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; +}; + int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); @@ -96,4 +157,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); valid; \ }) +/* GPUVM API */ +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); + +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1362181b10d0..65783d1eddca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -143,6 +143,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -199,7 +203,20 @@ static const struct kfd2kgd_calls kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -855,3 +872,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid\n"); + return 0; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 5130eac7afdd..1b5bf1353f0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -101,6 +101,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -159,7 +163,20 @@ static const struct kfd2kgd_calls kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -816,3 +833,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return -EINVAL; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 000000000000..e0371a9967b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -0,0 +1,1506 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/list.h> +#include <drm/drmP.h> +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_amdkfd.h" + +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + +/* Impose limit on how much memory KFD can use */ +static struct { + uint64_t max_system_mem_limit; + int64_t system_mem_used; + spinlock_t mem_limit_lock; +} kfd_mem_limit; + +/* Struct used for amdgpu_amdkfd_bo_validate */ +struct amdgpu_vm_parser { + uint32_t domain; + bool wait; +}; + +static const char * const domain_bit_to_string[] = { + "CPU", + "GTT", + "VRAM", + "GDS", + "GWS", + "OA" +}; + +#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] + + + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, + struct kgd_mem *mem) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) + if (entry->bo_va->base.vm == avm) + return false; + + return true; +} + +/* Set memory usage limits. Current, limits are + * System (kernel) memory - 3/8th System RAM + */ +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ + struct sysinfo si; + uint64_t mem; + + si_meminfo(&si); + mem = si.totalram - si.totalhigh; + mem *= si.mem_unit; + + spin_lock_init(&kfd_mem_limit.mem_limit_lock); + kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20)); +} + +static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + if (kfd_mem_limit.system_mem_used + (acc_size + size) > + kfd_mem_limit.max_system_mem_limit) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += (acc_size + size); + } +err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; +} + +static void unreserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) + kfd_mem_limit.system_mem_used -= (acc_size + size); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + + +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's + * reservation object. + * + * @bo: [IN] Remove eviction fence(s) from this BO + * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * is present in the shared list. + * @ef_list: [OUT] Returns list of eviction fences. These fences are removed + * from BO's reservation object shared list. + * @ef_count: [OUT] Number of fences in ef_list. + * + * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be + * called to restore the eviction fences and to avoid memory leak. This is + * useful for shared BOs. + * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. + */ +static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence *ef, + struct amdgpu_amdkfd_fence ***ef_list, + unsigned int *ef_count) +{ + struct reservation_object_list *fobj; + struct reservation_object *resv; + unsigned int i = 0, j = 0, k = 0, shared_count; + unsigned int count = 0; + struct amdgpu_amdkfd_fence **fence_list; + + if (!ef && !ef_list) + return -EINVAL; + + if (ef_list) { + *ef_list = NULL; + *ef_count = 0; + } + + resv = bo->tbo.resv; + fobj = reservation_object_get_list(resv); + + if (!fobj) + return 0; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + /* Go through all the shared fences in the resevation object. If + * ef is specified and it exists in the list, remove it and reduce the + * count. If ef is not specified, then get the count of eviction fences + * present. + */ + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + if (ef) { + if (f->context == ef->base.context) { + dma_fence_put(f); + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } else if (to_amdgpu_amdkfd_fence(f)) + count++; + } + write_seqcount_end(&resv->seq); + preempt_enable(); + + if (ef || !count) + return 0; + + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) + return -ENOMEM; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + j = 0; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + struct amdgpu_amdkfd_fence *efence; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + efence = to_amdgpu_amdkfd_fence(f); + if (efence) { + fence_list[k++] = efence; + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } + + write_seqcount_end(&resv->seq); + preempt_enable(); + + *ef_list = fence_list; + *ef_count = k; + + return 0; +} + +/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's + * reservation object. + * + * @bo: [IN] Add eviction fences to this BO + * @ef_list: [IN] List of eviction fences to be added + * @ef_count: [IN] Number of fences in ef_list. + * + * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this + * function. + */ +static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence **ef_list, + unsigned int ef_count) +{ + int i; + + if (!ef_list || !ef_count) + return; + + for (i = 0; i < ef_count; i++) { + amdgpu_bo_fence(bo, &ef_list[i]->base, true); + /* Re-adding the fence takes an additional reference. Drop that + * reference. + */ + dma_fence_put(&ef_list[i]->base); + } + + kfree(ef_list); +} + +static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), + "Called with userptr BO")) + return -EINVAL; + + amdgpu_ttm_placement_from_domain(bo, domain); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto validate_fail; + if (wait) { + struct amdgpu_amdkfd_fence **ef_list; + unsigned int ef_count; + + ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, + &ef_count); + if (ret) + goto validate_fail; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); + } + +validate_fail: + return ret; +} + +static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_vm_parser *p = param; + + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); +} + +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling + * during page table updates can invalidate page directory entries + * again. Page directories are only updated after updating page + * tables. + */ +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) +{ + struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); + return ret; + } + + ret = amdgpu_amdkfd_validate(¶m, pd); + if (ret) { + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } + + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; + + if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + return ret; + } + } + + return 0; +} + +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + +static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) +{ + struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + int ret; + + ret = amdgpu_vm_update_directories(adev, vm); + if (ret) + return ret; + + return sync_vm_fence(adev, sync, vm->last_update); +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if needed and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(kvm); + if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); +} + +static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *entry = &mem->validate_list; + struct amdgpu_bo *bo = mem->bo; + + INIT_LIST_HEAD(&entry->head); + entry->shared = true; + entry->bo = &bo->tbo; + mutex_lock(&process_info->lock); + list_add_tail(&entry->head, &process_info->kfd_bo_list); + mutex_unlock(&process_info->lock); +} + +/* Reserving a BO and its page table BOs must happen atomically to + * avoid deadlocks. Some operations update multiple VMs at once. Track + * all the reservation info in a context structure. Optionally a sync + * object can track VM updates. + */ +struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ + unsigned int n_vms; /* Number of VMs reserved */ + struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ + struct ww_acquire_ctx ticket; /* Reservation ticket */ + struct list_head list, duplicates; /* BO lists */ + struct amdgpu_sync *sync; /* Pointer to sync object */ + bool reserved; /* Whether BOs are reserved */ +}; + +enum bo_vm_match { + BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ + BO_VM_MAPPED, /* Match VMs where a BO is mapped */ + BO_VM_ALL, /* Match all VMs a BO was added to */ +}; + +/** + * reserve_bo_and_vm - reserve a BO and a VM unconditionally. + * @mem: KFD BO structure. + * @vm: the VM to reserve. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + */ +static int reserve_bo_and_vm(struct kgd_mem *mem, + struct amdgpu_vm *vm, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + int ret; + + WARN_ON(!vm); + + ctx->reserved = false; + ctx->n_vms = 1; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else { + pr_err("Failed to reserve buffers in ttm\n"); + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + * + * Returns 0 for success, negative for failure. + */ +static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + struct amdgpu_vm *vm, enum bo_vm_match map_type, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; + unsigned int i; + int ret; + + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), + GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + } + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + i = 0; + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, + &ctx->vm_pd[i]); + i++; + } + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else + pr_err("Failed to reserve buffers in ttm.\n"); + + if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context + * @ctx: Reservation context to unreserve + * @wait: Optionally wait for a sync object representing pending VM updates + * @intr: Whether the wait is interruptible + * + * Also frees any resources allocated in + * reserve_bo_and_(cond_)vm(s). Returns the status from + * amdgpu_sync_wait. + */ +static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) +{ + int ret = 0; + + if (wait) + ret = amdgpu_sync_wait(ctx->sync, intr); + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); |