From 7b19bba58f773b397b6c5a76681c8dc40467d9ff Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Sat, 9 Feb 2019 07:52:55 +0100 Subject: drm/amd/display: Use vrr friendly pageflip throttling in DC. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In VRR mode, keep track of the vblank count of the last completed pageflip in amdgpu_crtc->last_flip_vblank, as recorded in the pageflip completion handler after each completed flip. Use that count to prevent mmio programming a new pageflip within the same vblank in which the last pageflip completed, iow. to throttle pageflips to at most one flip per video frame, while at the same time allowing to request a flip not only before start of vblank, but also anywhere within vblank. The old logic did the same, and made sense for regular fixed refresh rate flipping, but in vrr mode it prevents requesting a flip anywhere inside the possibly huge vblank, thereby reducing framerate in vrr mode instead of improving it, by delaying a slightly delayed flip requests up to a maximum vblank duration + 1 scanout duration. This would limit VRR usefulness to only help applications with a very high GPU demand, which can submit the flip request before start of vblank, but then have to wait long for fences to complete. With this method a flip can be both requested and - after fences have completed - executed, ie. it doesn't matter if the request (amdgpu_dm_do_flip()) gets delayed until deep into the extended vblank due to cpu execution delays. This also allows clients which want to regulate framerate within the vrr range a much more fine-grained control of flip timing, a feature that might be useful for video playback, and is very useful for neuroscience/vision research applications. In regular non-VRR mode, retain the old flip submission behavior. This to keep flip scheduling for fullscreen X11/GLX OpenGL clients intact, if they use the GLX_OML_sync_control extensions glXSwapBufferMscOML(, ..., target_msc,...) function with a specific target_msc target vblank count. glXSwapBuffersMscOML() or DRI3/Present PresentPixmap() will not flip at the proper target_msc for a non-zero target_msc if VRR mode is active with this patch. They'd often flip one frame too early. However, this limitation should not matter much in VRR mode, as scheduling based on vblank counts is pretty futile/unusable under variable refresh duration anyway, so no real extra harm is done. According to some testing already done with this patch by Nicholas on top of my tests, IGT tests didn't report any problems. If fixes stuttering and flickering when flipping at rates below the minimum vrr refresh rate. Fixes: bb47de736661 ("drm/amdgpu: Set FreeSync state using drm VRR properties") Signed-off-by: Mario Kleiner Cc: Cc: Harry Wentland Cc: Alex Deucher Cc: Michel Dänzer Tested-by: Bruno Filipe Reviewed-by: Nicholas Kazlauskas Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 698fd8a2f775..889e443eeee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -406,6 +406,7 @@ struct amdgpu_crtc { struct amdgpu_flip_work *pflip_works; enum amdgpu_flip_status pflip_status; int deferred_flip_completion; + u64 last_flip_vblank; /* pll sharing */ struct amdgpu_atom_ss ss; bool ss_enabled; -- cgit v1.2.3 From f8b18cf4c33452ee17936c63a0dea5278c4ac050 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 10 Feb 2019 21:50:53 -0500 Subject: drm/amdgpu: use BACO on vega12 if platform supports it Use BACO for reset of the platform supports it. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ed89a101f73f..36158db6a82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -461,6 +461,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA12: soc15_asic_get_baco_capability(adev, &baco_reset); break; default: -- cgit v1.2.3 From 57731a07795ae80790c6ae7d8e7001cdbd6d14c0 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 23 Jul 2018 17:45:46 -0400 Subject: drm/amdgpu: use HMM callback to replace mmu notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in DRM_AMDGPU_USERPTR Kconfig. It supports both KFD userptr and gfx userptr paths. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 160 ++++++++++++++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +- 4 files changed, 72 insertions(+), 98 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 9221e5489069..960a63355705 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - select MMU_NOTIFIER + select HMM_MIRROR help - This option selects CONFIG_MMU_NOTIFIER if it isn't already - selected to enabled full userptr support. + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it + isn't already selected to enabled full userptr support. config DRM_AMDGPU_GART_DEBUGFS bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 466da5954a68..851001ced5e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -172,7 +172,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 3e6823fdd939..f000704f984d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include #include @@ -58,14 +58,12 @@ * * @adev: amdgpu device pointer * @mm: process address space - * @mn: MMU notifier structure * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion + * @mirror: HMM mirror function support * * Data for each amdgpu device and process address space. */ @@ -73,7 +71,6 @@ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; - struct mmu_notifier mn; enum amdgpu_mn_type type; /* only used on destruction */ @@ -85,8 +82,9 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; struct rb_root_cached objects; - struct mutex read_lock; - atomic_t recursion; + + /* HMM mirror */ + struct hmm_mirror mirror; }; /** @@ -103,7 +101,7 @@ struct amdgpu_mn_node { }; /** - * amdgpu_mn_destroy - destroy the MMU notifier + * amdgpu_mn_destroy - destroy the HMM mirror * * @work: previously sheduled work item * @@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work) } up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + + hmm_mirror_unregister(&amn->mirror); kfree(amn); } /** - * amdgpu_mn_release - callback to notify about mm destruction + * amdgpu_hmm_mirror_release - callback to notify about mm destruction * - * @mn: our notifier - * @mm: the mm this callback is about + * @mirror: the HMM mirror (mm) this callback is about * - * Shedule a work item to lazy destroy our notifier. + * Shedule a work item to lazy destroy HMM mirror. */ -static void amdgpu_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); INIT_WORK(&amn->work, amdgpu_mn_destroy); schedule_work(&amn->work); } - /** * amdgpu_mn_lock - take the write side lock for this notifier * @@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { if (blockable) - mutex_lock(&amn->read_lock); - else if (!mutex_trylock(&amn->read_lock)) + down_read(&amn->lock); + else if (!down_read_trylock(&amn->lock)) return -EAGAIN; - if (atomic_inc_return(&amn->recursion) == 1) - down_read_non_owner(&amn->lock); - mutex_unlock(&amn->read_lock); - return 0; } @@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) */ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&amn->recursion) == 0) - up_read_non_owner(&amn->lock); + up_read(&amn->lock); } /** @@ -235,143 +226,128 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change * - * @mn: our notifier - * @range: mmu notifier context + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; /* TODO we should be able to split locking for interval tree and * amdgpu_mn_invalidate_node */ - if (amdgpu_mn_read_lock(amn, range->blockable)) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; - if (!range->blockable) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); - amdgpu_mn_invalidate_node(node, range->start, end); + amdgpu_mn_invalidate_node(node, start, end); } + amdgpu_mn_read_unlock(amn); + return 0; } /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * We temporarily evict all BOs between start and end. This * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; - if (amdgpu_mn_read_lock(amn, range->blockable)) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; - if (!range->blockable) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); list_for_each_entry(bo, &node->bos, mn_list) { struct kgd_mem *mem = bo->kfd_bo; if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - range->start, - end)) - amdgpu_amdkfd_evict_userptr(mem, range->mm); + start, end)) + amdgpu_amdkfd_evict_userptr(mem, amn->mm); } } + amdgpu_mn_read_unlock(amn); + return 0; } -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - - amdgpu_mn_read_unlock(amn); -} +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { [AMDGPU_MN_TYPE_GFX] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, + .release = amdgpu_hmm_mirror_release }, [AMDGPU_MN_TYPE_HSA] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, + .release = amdgpu_hmm_mirror_release }, }; -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - /** - * amdgpu_mn_get - create notifier context + * amdgpu_mn_get - create HMM mirror context * * @adev: amdgpu device pointer * @type: type of MMU notifier context * - * Creates a notifier context for current->mm. + * Creates a HMM mirror context for current->mm. */ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) @@ -401,12 +377,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, amn->mm = mm; init_rwsem(&amn->lock); amn->type = type; - amn->mn.ops = &amdgpu_mn_ops[type]; amn->objects = RB_ROOT_CACHED; - mutex_init(&amn->read_lock); - atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&amn->mn, mm); + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; + r = hmm_mirror_register(&amn->mirror, mm); if (r) goto free_amn; @@ -432,7 +406,7 @@ free_amn: * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an MMU notifier for the given BO at the specified address. + * Registers an HMM mirror for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -488,11 +462,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) } /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates * * @bo: amdgpu buffer object * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of HMM mirror updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..0a51fd00021c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -34,7 +34,7 @@ enum amdgpu_mn_type { AMDGPU_MN_TYPE_HSA, }; -#if defined(CONFIG_MMU_NOTIFIER) +#if defined(CONFIG_HMM_MIRROR) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, -- cgit v1.2.3 From 915d3eecfa23693bac9e54cdacf84fb4efdcc5c4 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 13 Dec 2018 15:35:28 -0500 Subject: drm/amdgpu: replace get_user_pages with HMM mirror helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 ++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 138 +++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 178 +++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- 9 files changed, 182 insertions(+), 278 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..0e1711a75b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1921dec3df7a..31e3953dcb6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -499,28 +499,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -533,11 +517,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -596,7 +576,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -660,7 +639,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -1275,15 +1253,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1757,25 +1726,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; + bo->tbo.ttm->pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1784,12 +1739,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; @@ -1819,7 +1768,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1843,7 +1793,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1859,10 +1809,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1871,16 +1819,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); + /* Stop HMM track the userptr update. We dont check the return + * value for concurrent CPU page table update because we will + * reschedule the restore worker if process_info->evicted_bos + * is updated. + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1910,8 +1858,15 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52a5e4fdc95b..545302d0955f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -540,14 +539,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -578,7 +577,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -614,79 +612,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -755,17 +719,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1224,7 +1178,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; @@ -1234,15 +1187,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -EAGAIN; + goto error_abort; } job->owner = p->filp; @@ -1338,6 +1299,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..555285e329ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index f000704f984d..41ccee49a224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -220,8 +220,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } @@ -502,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 0a51fd00021c..4803e216e174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,9 +25,10 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 73e71e61dc99..1e675048f790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -705,98 +706,102 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; + struct hmm_range range; }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct hmm_range *range = >t->range; + int r = 0, i; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + amdgpu_hmm_init_range(range); down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range->vma = find_vma(mm, gtt->userptr); + if (!range_in_vma(range->vma, gtt->userptr, end)) + r = -EFAULT; + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + range->vma->vm_file) + r = -EPERM; + if (r) + goto out; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), + GFP_KERNEL); + if (range->pfns == NULL) { + r = -ENOMEM; + goto out; } + range->start = gtt->userptr; + range->end = end; - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + range->pfns[0] = range->flags[HMM_PFN_VALID]; + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + for (i = 1; i < ttm->num_pages; i++) + range->pfns[i] = range->pfns[0]; - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + /* This may trigger page table update */ + r = hmm_vma_fault(range, true); + if (r) + goto out_free_pfns; - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + up_read(&mm->mmap_sem); - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + for (i = 0; i < ttm->num_pages; i++) + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); - if (r < 0) - goto release_pages; + return 0; - pinned += r; +out_free_pfns: + kvfree(range->pfns); + range->pfns = NULL; +out: + up_read(&mm->mmap_sem); + return r; +} - } while (pinned < ttm->num_pages); +/** + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated + * + * Returns: true if pages are still valid + */ +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + bool r = false; - up_read(&mm->mmap_sem); - return 0; + if (!gtt || !gtt->userptr) + return false; + + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); + if (gtt->range.pfns) { + r = hmm_vma_range_done(>t->range); + kvfree(gtt->range.pfns); + gtt->range.pfns = NULL; + } -release_pages: - release_pages(pages, pinned); - up_read(&mm->mmap_sem); return r; } @@ -809,16 +814,10 @@ release_pages: */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); - + for (i = 0; i < ttm->num_pages; ++i) ttm->pages[i] = pages ? pages[i] : NULL; - } } /** @@ -903,10 +902,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + + if (gtt->range.pfns && + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1256,11 +1256,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1289,7 +1284,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1302,48 +1296,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? - */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..8988c87fff9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, -- cgit v1.2.3 From 6b8f7e3dee7883084932bbdfce471a2960c6db5d Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 21 Feb 2019 12:39:21 -0500 Subject: drm/amdgpu: fix HMM config dependency issue Only select HMM_MIRROR will get kernel config dependency warnings if CONFIG_HMM is missing in the config. Add depends on HMM will solve the issue. Add conditional compilation to fix compilation errors if HMM_MIRROR is not enabled as HMM config is not enabled. Remove unused function amdgpu_ttm_tt_mark_user_pages. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 ++++++----------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 13 ++++++++++++- 3 files changed, 19 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 960a63355705..67553effb649 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,6 +26,7 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU + depends on ARCH_HAS_HMM select HMM_MIRROR help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1e675048f790..fee73ff00153 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -712,7 +712,9 @@ struct amdgpu_ttm_tt { uint64_t userptr; struct task_struct *usertask; uint32_t userflags; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) struct hmm_range range; +#endif }; /** @@ -722,6 +724,7 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -804,6 +807,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) return r; } +#endif /** * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. @@ -820,29 +824,6 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) ttm->pages[i] = pages ? pages[i] : NULL; } -/** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty - * - * Called while unpinning userptr pages - */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; - - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } -} - /** * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages * @@ -904,9 +885,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) if (gtt->range.pfns && ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 8988c87fff9d..c2b7669004ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -101,10 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + return false; +} +#endif + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); -- cgit v1.2.3 From aa8e2435b3d4878a0db8917805e86f61001fe174 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 7 Feb 2019 09:59:28 +0100 Subject: drm/ttm: Define a single DRM_FILE_PAGE_OFFSET constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most TTM drivers define the constant DRM_FILE_PAGE_OFFSET of the same value. The only exception is vboxvideo, which is being converted to the new offset by this patch. Unifying the constants in a single place simplifies the driver code. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Hans de Goede Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fee73ff00153..d5349c631ffc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -51,8 +51,6 @@ #include "amdgpu_sdma.h" #include "bif/bif_4_1_d.h" -#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) - static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, -- cgit v1.2.3 From 7d1500f9fbfc78e6a34be3f8f4b5ba13e8227199 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 7 Feb 2019 09:59:29 +0100 Subject: drm/ttm: Remove file_page_offset parameter from ttm_bo_device_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter file_page_offset is a constant shared by all drivers. Just replace it with the constant itself. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Hans de Goede Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5349c631ffc..d5140d0085d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1618,7 +1618,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, - DRM_FILE_PAGE_OFFSET, adev->need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); -- cgit v1.2.3 From bed2dd84212eb0151ae3bfc39cdaa25a951749c1 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 7 Feb 2019 09:59:30 +0100 Subject: drm/ttm: Quick-test mmap offset in ttm_bo_mmap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A BO's address has to be at least the minimum offset. Sharing this test in ttm_bo_mmap() removes code from drivers. A full buffer-address validation is still done within drm_vma_offset_lockup_locked(). Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Hans de Goede Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5140d0085d4..cd0ccfbbcb84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1823,14 +1823,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) { - struct drm_file *file_priv; - struct amdgpu_device *adev; - - if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) - return -EINVAL; + struct drm_file *file_priv = filp->private_data; + struct amdgpu_device *adev = file_priv->minor->dev->dev_private; - file_priv = filp->private_data; - adev = file_priv->minor->dev->dev_private; if (adev == NULL) return -EINVAL; -- cgit v1.2.3 From 6490bd764c9c63df1a65ae8de290c8fc36f53a21 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 25 Feb 2019 12:56:53 -0500 Subject: drm/amdgpu: Eliminate the set_pde_pte function pointer in amdgpu_gmc_funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the gmc_*_set_pde_pte functions are the same across different ASICs, so we can eliminate the set_pde_pte function pointer and instead use a generic function. Signed-off-by: Yong Zhao Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 27 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 10 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 15 ------ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 26 ---------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 61 +++++++---------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 86 +++++++++++---------------------- 6 files changed, 77 insertions(+), 148 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d73367cab4f3..5a32a0d2ad31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -79,6 +79,33 @@ uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) return pd_addr; } +/** + * amdgpu_gmc_set_pte_pde - update the page tables using CPU + * + * @adev: amdgpu_device pointer + * @cpu_pt_addr: cpu address of the page table + * @gpu_page_idx: entry in the page table to update + * @addr: dst addr to write into pte/pde + * @flags: access flags + * + * Update the page tables using CPU. + */ +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) +{ + void __iomem *ptr = (void *)cpu_pt_addr; + uint64_t value; + + /* + * The following is for PTE only. GART does not have PDEs. + */ + value = addr & 0x0000FFFFFFFFF000ULL; + value |= flags; + writeq(value, ptr + (gpu_page_idx * 8)); + return 0; +} + /** * amdgpu_gmc_agp_addr - return the address in the AGP address space * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 81e6070d255b..d6c10b4d68c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -71,12 +71,6 @@ struct amdgpu_gmc_funcs { /* Change the VMID -> PASID mapping */ void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, unsigned pasid); - /* write pte/pde updates using the cpu */ - int (*set_pte_pde)(struct amdgpu_device *adev, - void *cpu_pt_addr, /* cpu addr of page table */ - uint32_t gpu_page_idx, /* pte/pde to update */ - uint64_t addr, /* addr to write into pte/pde */ - uint64_t flags); /* access flags */ /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); /* set pte flags based per asic */ @@ -155,7 +149,6 @@ struct amdgpu_gmc { #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) @@ -189,6 +182,9 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags); +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags); uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo); uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo); void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9fc3296592fe..8a812e11a50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -383,20 +383,6 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) { @@ -1169,7 +1155,6 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, - .set_pte_pde = gmc_v6_0_set_pte_pde, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 761dcfb2fec0..f9f5bef64feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -460,31 +460,6 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v7_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags - * - * Update the page tables using the CPU. - */ -static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) { @@ -1376,7 +1351,6 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, - .set_pte_pde = gmc_v7_0_set_pte_pde, .set_prt = gmc_v7_0_set_prt, .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, .get_vm_pde = gmc_v7_0_get_vm_pde diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 34440672f938..34d547cae135 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -662,50 +662,26 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v8_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags +/* + * PTE format on VI: + * 63:40 reserved + * 39:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 reserved + * 2 snooped + * 1 system + * 0 valid * - * Update the page tables using the CPU. + * PDE format on VI: + * 63:59 block fragment size + * 58:40 reserved + * 39:1 physical base address of PTE + * bits 5:1 must be 0. + * 0 valid */ -static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - /* - * PTE format on VI: - * 63:40 reserved - * 39:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 reserved - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on VI: - * 63:59 block fragment size - * 58:40 reserved - * 39:1 physical base address of PTE - * bits 5:1 must be 0. - * 0 valid - */ - value = addr & 0x000000FFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) @@ -1743,7 +1719,6 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, - .set_pte_pde = gmc_v8_0_set_pte_pde, .set_prt = gmc_v8_0_set_prt, .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, .get_vm_pde = gmc_v8_0_get_vm_pde diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 600259b4e291..d91274037c9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -466,64 +466,37 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, reg, pasid); } -/** - * gmc_v9_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the pa