diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e4d4e55c08ad..11672bfe4fad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -172,6 +172,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0); size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; uint64_t vram_size = 0; @@ -196,7 +198,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return -EINVAL; vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { system_mem_needed = size; ttm_mem_needed = size; } @@ -220,7 +222,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) { + vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { ret = -ENOMEM; goto release; } @@ -232,7 +234,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, "adev reference can't be null when vram is used"); if (adev && xcp_id >= 0) { adev->kfd.vram_used[xcp_id] += vram_needed; - adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ? + adev->kfd.vram_used_aligned[xcp_id] += + (adev->flags & AMD_IS_APU) ? vram_needed : ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } @@ -260,7 +263,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, if (adev) { adev->kfd.vram_used[xcp_id] -= size; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { adev->kfd.vram_used_aligned[xcp_id] -= size; kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; @@ -414,6 +417,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, "Called with userptr BO")) return -EINVAL; + /* bo has been pinned, not need validate it */ + if (bo->tbo.pin_count) + return 0; + amdgpu_bo_placement_from_domain(bo, domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -889,7 +896,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev && !adev->gmc.is_app_apu) && + if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -1087,7 +1094,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { - pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + if (ret == -EAGAIN) + pr_debug("Failed to get user pages, try again\n"); + else + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; } @@ -1188,7 +1198,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -1470,13 +1481,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) if (unlikely(ret)) return ret; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { + /* + * If bo is not contiguous on VRAM, move to system memory first to ensure + * we can get contiguous VRAM space after evicting other BOs. + */ + if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { + struct ttm_operation_ctx ctx = { true, false }; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) { + pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret); + goto out; + } + } + } + ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); if (ret) pr_err("Error in Pinning BO to domain: %d\n", domain); amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); +out: amdgpu_bo_unreserve(bo); - return ret; } @@ -1647,6 +1675,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0); ssize_t available; uint64_t vram_available, system_mem_available, ttm_mem_available; @@ -1654,9 +1684,10 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - adev->kfd.vram_used_aligned[xcp_id] - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt; + - reserved_for_pt + - reserved_for_ras; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { system_mem_available = no_system_mem_limit ? kfd_mem_limit.max_system_mem_limit : kfd_mem_limit.max_system_mem_limit - @@ -1704,7 +1735,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1712,6 +1743,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; + + /* For contiguous VRAM allocation */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS) + alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; } xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? 0 : fpriv->xcp_id; @@ -1951,7 +1986,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (size) { if (!is_imported && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - (adev->gmc.is_app_apu && + ((adev->flags & AMD_IS_APU) && mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else @@ -2373,8 +2408,9 @@ static int import_obj_create(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && + !(adev->flags & AMD_IS_APU) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; @@ -2709,7 +2745,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i /* keep mem without hmm range at userptr_inval_list */ if (!mem->range) - continue; + continue; /* Only check mem with hmm range associated */ valid = amdgpu_ttm_tt_get_user_pages_done( @@ -2954,9 +2990,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * if (!attachment->is_mapped) continue; - if (attachment->bo_va->base.bo->tbo.pin_count) - continue; - kfd_mem_dmaunmap_attachment(mem, attachment); ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { |