// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright 2020-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/types.h>
#include <linux/hmm.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
#include <linux/migrate.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_res_cursor.h"
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
#endif
#define dev_fmt(fmt) "kfd_migrate: " fmt
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
uint64_t src_addr, dst_addr;
uint64_t pte_flags;
void *cpu_addr;
int r;
/* use gart window 0 */
*gart_addr = adev->gmc.gart_start;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8;
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED,
&job);
if (r)
return r;
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
pte_flags |= AMDGPU_PTE_WRITEABLE;
pte_flags |= adev->gart.gart_pte_flags;
cpu_addr = &job->ibs[0].ptr[num_dw];
amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
fence = amdgpu_job_submit(job);
dma_fence_put(fence);
return r;
}
/**
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
* @sys: system DMA pointer to be copied
* @vram: vram destination DMA pointer
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
*
* ram address uses GART table continuous entries mapping to ram pages,
* vram address uses direct mapping of vram pages, which must have npages
* number of continuous pages.
* GART update and sdma uses same buf copy function ring, sdma is splited to
* multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
* the last sdma finish fence which is returned to check copy memory is done.
*
* Context: Process context, takes and releases gtt_window_lock
*
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
uint64_t *vram, uint64_t npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint64_t gart_s, gart_d;
struct dma_fence *next;
uint64_t size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
while (npages) {
size = min(GTT_MAX_PAGES, npages);
if (direction == FROM_VRAM_TO_RAM) {
gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
} else if (direction == FROM_RAM_TO_VRAM) {
r = svm_migrate_gart_map(ring, size, sys, &gart_s,
KFD_IOCTL_SVM_FLAG_GPU_RO);
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}
if (r) {
dev_err(adev->dev, "fail %d create gart mapping\n", r);
goto out_unlock;
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, false, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
}
dma_fence_put(*mfence);
*mfence = next;
npages -= size;
if (npages) {
sys += size;
vram += size;
}
}
out_unlock:
mutex_unlock(