Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "Here's the main drm pull request for 4.7, it's been a busy one, and I've been a bit more distracted in real life this merge window. Lots more ARM drivers, not sure if it'll ever end. I think I've at least one more coming the next merge window. But changes are all over the place, support for AMD Polaris GPUs is in here, some missing GM108 support for nouveau (found in some Lenovos), a bunch of MST and skylake fixes. I've also noticed a few fixes from Arnd in my inbox, that I'll try and get in asap, but I didn't think they should hold this up. New drivers: - Hisilicon kirin display driver - Mediatek MT8173 display driver - ARC PGU - bitstreamer on Synopsys ARC SDP boards - Allwinner A13 initial RGB output driver - Analogix driver for DisplayPort IP found in exynos and rockchip DRM Core: - UAPI headers fixes and C++ safety - DRM connector reference counting - DisplayID mode parsing for Dell 5K monitors - Removal of struct_mutex from drivers - Connector registration cleanups - MST robustness fixes - MAINTAINERS updates - Lockless GEM object freeing - Generic fbdev deferred IO support panel: - Support for a bunch of new panels i915: - VBT refactoring - PLL computation cleanups - DSI support for BXT - Color manager support - More atomic patches - GEM improvements - GuC fw loading fixes - DP detection fixes - SKL GPU hang fixes - Lots of BXT fixes radeon/amdgpu: - Initial Polaris support - GPUVM/Scheduler/Clock/Power improvements - ASYNC pageflip support - New mesa feature support nouveau: - GM108 support - Power sensor support improvements - GR init + ucode fixes. - Use GPU provided topology information vmwgfx: - Add host messaging support gma500: - Some cleanups and fixes atmel: - Bridge support - Async atomic commit support fsl-dcu: - Timing controller for LCD support - Pixel clock polarity support rcar-du: - Misc fixes exynos: - Pipeline clock support - Exynoss4533 SoC support - HW trigger mode support - export HDMI_PHY clock - DECON5433 fixes - Use generic prime functions - use DMA mapping APIs rockchip: - Lots of little fixes vc4: - Render node support - Gamma ramp support - DPI output support msm: - Mostly cleanups and fixes - Conversion to generic struct fence etnaviv: - Fix for prime buffer handling - Allow hangcheck to be coalesced with other wakeups tegra: - Gamme table size fix" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1050 commits) drm/edid: add displayid detailed 1 timings to the modelist. (v1.1) drm/edid: move displayid validation to it's own function. drm/displayid: Iterate over all DisplayID blocks drm/edid: move displayid tiled block parsing into separate function. drm: Nuke ->vblank_disable_allowed drm/vmwgfx: Report vmwgfx version to vmware.log drm/vmwgfx: Add VMWare host messaging capability drm/vmwgfx: Kill some lockdep warnings drm/nouveau/gr/gf100-: fix race condition in fecs/gpccs ucode drm/nouveau/core: recognise GM108 chipsets drm/nouveau/gr/gm107-: fix touching non-existent ppcs in attrib cb setup drm/nouveau/gr/gk104-: share implementation of ppc exception init drm/nouveau/gr/gk104-: move rop_active_fbps init to nonctx drm/nouveau/bios/pll: check BIT table version before trying to parse it drm/nouveau/bios/pll: prevent oops when limits table can't be parsed drm/nouveau/volt/gk104: round up in gk104_volt_set drm/nouveau/fb/gm200: setup mmu debug buffer registers at init() drm/nouveau/fb/gk20a,gm20b: setup mmu debug buffer registers at init() drm/nouveau/fb/gf100-: allocate mmu debug buffers drm/nouveau/fb: allow chipset-specific actions for oneinit() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-23 11:48:48 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-23 11:48:48 -0700
commit: 1d6da87a3241deb13d073c4125d19ed0e5a0c62c (patch)
tree: 42b7a9842618dad2afe7db9709cc6217ced03120 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent: 1f40c49570eb01436786a9b5845c4469a9a1f362 (diff)
parent: a39ed680bddb1ead592e22ed812c7e47286bfc03 (diff)
download: linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.tar.gz
linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.tar.bz2
linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.zip
1 files changed, 202 insertions, 140 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b6c011b83641..ea708cb94862 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -166,74 +166,109 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 {
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &vm->ids[ring->idx];
 	struct fence *updates = sync->last_vm_update;
+	struct amdgpu_vm_id *id;
+	unsigned i = ring->idx;
 	int r;
 
 	mutex_lock(&adev->vm_manager.lock);
 
-	/* check if the id is still valid */
-	if (id->mgr_id) {
-		struct fence *flushed = id->flushed_updates;
-		bool is_later;
-		long owner;
+	/* Check if we can use a VMID already assigned to this VM */
+	do {
+		struct fence *flushed;
 
-		if (!flushed)
-			is_later = true;
-		else if (!updates)
-			is_later = false;
-		else
-			is_later = fence_is_later(updates, flushed);
+		id = vm->ids[i++];
+		if (i == AMDGPU_MAX_RINGS)
+			i = 0;
+
+		/* Check all the prerequisites to using this VMID */
+		if (!id)
+			continue;
+
+		if (atomic64_read(&id->owner) != vm->client_id)
+			continue;
+
+		if (pd_addr != id->pd_gpu_addr)
+			continue;
 
-		owner = atomic_long_read(&id->mgr_id->owner);
-		if (!is_later && owner == (long)id &&
-		    pd_addr == id->pd_gpu_addr) {
+		if (id->last_user != ring &&
+		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
+			continue;
+
+		flushed  = id->flushed_updates;
+		if (updates && (!flushed || fence_is_later(updates, flushed)))
+			continue;
 
+		/* Good we can use this VMID */
+		if (id->last_user == ring) {
 			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->mgr_id->active);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
+					      id->first);
+			if (r)
+				goto error;
+		}
+
+		/* And remember this submission as user of the VMID */
+		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+		if (r)
+			goto error;
+
+		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		vm->ids[ring->idx] = id;
+
+		*vm_id = id - adev->vm_manager.ids;
+		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
 
-			fence_put(id->mgr_id->active);
-			id->mgr_id->active = fence_get(fence);
+	} while (i != ring->idx);
 
-			list_move_tail(&id->mgr_id->list,
-				       &adev->vm_manager.ids_lru);
+	id = list_first_entry(&adev->vm_manager.ids_lru,
+			      struct amdgpu_vm_id,
+			      list);
 
-			*vm_id = id->mgr_id - adev->vm_manager.ids;
-			*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-			trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
-						*vm_pd_addr);
+	if (!amdgpu_sync_is_idle(&id->active)) {
+		struct list_head *head = &adev->vm_manager.ids_lru;
+		struct amdgpu_vm_id *tmp;
 
-			mutex_unlock(&adev->vm_manager.lock);
-			return 0;
+		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
+					 list) {
+			if (amdgpu_sync_is_idle(&id->active)) {
+				list_move(&id->list, head);
+				head = &id->list;
+			}
 		}
+		id = list_first_entry(&adev->vm_manager.ids_lru,
+				      struct amdgpu_vm_id,
+				      list);
 	}
 
-	id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_manager_id,
-				      list);
+	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	if (r)
+		goto error;
 
-	r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
-	if (!r) {
-		fence_put(id->mgr_id->active);
-		id->mgr_id->active = fence_get(fence);
+	fence_put(id->first);
+	id->first = fence_get(fence);
 
-		fence_put(id->flushed_updates);
-		id->flushed_updates = fence_get(updates);
+	fence_put(id->last_flush);
+	id->last_flush = NULL;
 
-		id->pd_gpu_addr = pd_addr;
+	fence_put(id->flushed_updates);
+	id->flushed_updates = fence_get(updates);
 
-		list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
-		atomic_long_set(&id->mgr_id->owner, (long)id);
+	id->pd_gpu_addr = pd_addr;
 
-		*vm_id = id->mgr_id - adev->vm_manager.ids;
-		*vm_pd_addr = pd_addr;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
-	}
+	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	id->last_user = ring;
+	atomic64_set(&id->owner, vm->client_id);
+	vm->ids[ring->idx] = id;
+
+	*vm_id = id - adev->vm_manager.ids;
+	*vm_pd_addr = pd_addr;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 
+error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
@@ -247,43 +282,62 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  *
  * Emit a VM flush when it is necessary.
  */
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vm_id, uint64_t pd_addr,
-		     uint32_t gds_base, uint32_t gds_size,
-		     uint32_t gws_base, uint32_t gws_size,
-		     uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring,
+		    unsigned vm_id, uint64_t pd_addr,
+		    uint32_t gds_base, uint32_t gds_size,
+		    uint32_t gws_base, uint32_t gws_size,
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		mgr_id->gds_base != gds_base ||
-		mgr_id->gds_size != gds_size ||
-		mgr_id->gws_base != gws_base ||
-		mgr_id->gws_size != gws_size ||
-		mgr_id->oa_base != oa_base ||
-		mgr_id->oa_size != oa_size);
+		id->gds_base != gds_base ||
+		id->gds_size != gds_size ||
+		id->gws_base != gws_base ||
+		id->gws_size != gws_size ||
+		id->oa_base != oa_base ||
+		id->oa_size != oa_size);
+	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
+		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush &&
+	    pd_addr != AMDGPU_VM_NO_FLUSH) {
+		struct fence *fence;
+
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
 		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+
+		mutex_lock(&adev->vm_manager.lock);
+		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
+			r = amdgpu_fence_emit(ring, &fence);
+			if (r) {
+				mutex_unlock(&adev->vm_manager.lock);
+				return r;
+			}
+			fence_put(id->last_flush);
+			id->last_flush = fence;
+		}
+		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
-		mgr_id->gds_base = gds_base;
-		mgr_id->gds_size = gds_size;
-		mgr_id->gws_base = gws_base;
-		mgr_id->gws_size = gws_size;
-		mgr_id->oa_base = oa_base;
-		mgr_id->oa_size = oa_size;
+		id->gds_base = gds_base;
+		id->gds_size = gds_size;
+		id->gws_base = gws_base;
+		id->gws_size = gws_size;
+		id->oa_base = oa_base;
+		id->oa_size = oa_size;
 		amdgpu_ring_emit_gds_switch(ring, vm_id,
 					    gds_base, gds_size,
 					    gws_base, gws_size,
 					    oa_base, oa_size);
 	}
+
+	return 0;
 }
 
 /**
@@ -296,14 +350,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
  */
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
 {
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
-
-	mgr_id->gds_base = 0;
-	mgr_id->gds_size = 0;
-	mgr_id->gws_base = 0;
-	mgr_id->gws_size = 0;
-	mgr_id->oa_base = 0;
-	mgr_id->oa_size = 0;
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+
+	id->gds_base = 0;
+	id->gds_size = 0;
+	id->gws_base = 0;
+	id->gws_size = 0;
+	id->oa_base = 0;
+	id->oa_size = 0;
 }
 
 /**
@@ -335,8 +389,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw access flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
@@ -348,8 +402,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
-				   struct amdgpu_gart *gtt,
-				   uint32_t gtt_flags,
+				   uint64_t src,
+				   dma_addr_t *pages_addr,
 				   struct amdgpu_ib *ib,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
@@ -357,12 +411,11 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 {
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
-	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
-		uint64_t src = gtt->table_addr + (addr >> 12) * 8;
+	if (src) {
+		src += (addr >> 12) * 8;
 		amdgpu_vm_copy_pte(adev, ib, pe, src, count);
 
-	} else if (gtt) {
-		dma_addr_t *pages_addr = gtt->pages_addr;
+	} else if (pages_addr) {
 		amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
 				    count, incr, flags);
 
@@ -412,7 +465,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
-	amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
+	amdgpu_vm_update_pages(adev, 0, NULL, &job->ibs[0], addr, 0, entries,
 			       0, 0);
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
@@ -522,7 +575,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, NULL, 0, ib,
+				amdgpu_vm_update_pages(adev, 0, NULL, ib,
 						       last_pde, last_pt,
 						       count, incr,
 						       AMDGPU_PTE_VALID);
@@ -537,7 +590,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, last_pde, last_pt,
 				       count, incr, AMDGPU_PTE_VALID);
 
 	if (ib->length_dw != 0) {
@@ -570,8 +623,8 @@ error_free:
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw mapping flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @ib: IB for the update
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
@@ -579,8 +632,8 @@ error_free:
  * @flags: hw mapping flags
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
-				struct amdgpu_gart *gtt,
-				uint32_t gtt_flags,
+				uint64_t src,
+				dma_addr_t *pages_addr,
 				struct amdgpu_ib *ib,
 				uint64_t pe_start, uint64_t pe_end,
 				uint64_t addr, uint32_t flags)
@@ -618,10 +671,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 		return;
 
 	/* system pages are non continuously */
-	if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
+	if (src || pages_addr || !(flags & AMDGPU_PTE_VALID) ||
+	    (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
+		amdgpu_vm_update_pages(adev, src, pages_addr, ib, pe_start,
 				       addr, count, AMDGPU_GPU_PAGE_SIZE,
 				       flags);
 		return;
@@ -630,21 +684,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, pe_start, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
+	amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_start, addr, count,
 			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_end, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
@@ -653,8 +707,8 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw mapping flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
@@ -664,8 +718,8 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * Update the page tables in the range @start - @end.
  */
 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				  struct amdgpu_gart *gtt,
-				  uint32_t gtt_flags,
+				  uint64_t src,
+				  dma_addr_t *pages_addr,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_ib *ib,
 				  uint64_t start, uint64_t end,
@@ -693,7 +747,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 
 		if (last_pe_end != pe_start) {
 
-			amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+			amdgpu_vm_frag_ptes(adev, src, pages_addr, ib,
 					    last_pe_start, last_pe_end,
 					    last_dst, flags);
 
@@ -708,17 +762,16 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-			    last_pe_start, last_pe_end,
-			    last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, src, pages_addr, ib, last_pe_start,
+			    last_pe_end, last_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: flags as they are used for GTT
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of mapped range
  * @last: last mapped entry
@@ -730,8 +783,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct amdgpu_gart *gtt,
-				       uint32_t gtt_flags,
+				       uint64_t src,
+				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint32_t flags, uint64_t addr,
@@ -762,11 +815,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
-	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
+	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
 
-	} else if (gtt) {
+	} else if (pages_addr) {
 		/* header for write data commands */
 		ndw += ncmds * 4;
 
@@ -796,8 +849,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
-			      addr, flags);
+	amdgpu_vm_update_ptes(adev, src, pages_addr, vm, ib, start,
+			      last + 1, addr, flags);
 
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > ndw);
@@ -823,11 +876,12 @@ error_free:
  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
+ * @gtt_flags: flags as they are used for GTT
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update
  * @addr: addr to set the area to
- * @gtt_flags: flags as they are used for GTT
+ * @flags: HW flags for the mapping
  * @fence: optional resulting fence
  *
  * Split the mapping into smaller chunks so that each update fits
@@ -835,16 +889,16 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
-				      struct amdgpu_gart *gtt,
 				      uint32_t gtt_flags,
+				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
-				      uint64_t addr, struct fence **fence)
+				      uint32_t flags, uint64_t addr,
+				      struct fence **fence)
 {
 	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
 
-	uint64_t start = mapping->it.start;
-	uint32_t flags = gtt_flags;
+	uint64_t src = 0, start = mapping->it.start;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -857,10 +911,15 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 
 	trace_amdgpu_vm_bo_update(mapping);
 
+	if (pages_addr) {
+		if (flags == gtt_flags)
+			src = adev->gart.table_addr + (addr >> 12) * 8;
+		addr = 0;
+	}
 	addr += mapping->offset;
 
-	if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
-		return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+	if (!pages_addr || src)
+		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
 
@@ -868,7 +927,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;
 
 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -899,16 +958,20 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 {
 	struct amdgpu_vm *vm = bo_va->vm;
 	struct amdgpu_bo_va_mapping *mapping;
-	struct amdgpu_gart *gtt = NULL;
-	uint32_t flags;
+	dma_addr_t *pages_addr = NULL;
+	uint32_t gtt_flags, flags;
 	uint64_t addr;
 	int r;
 
 	if (mem) {
+		struct ttm_dma_tt *ttm;
+
 		addr = (u64)mem->start << PAGE_SHIFT;
 		switch (mem->mem_type) {
 		case TTM_PL_TT:
-			gtt = &bo_va->bo->adev->gart;
+			ttm = container_of(bo_va->bo->tbo.ttm, struct
+					   ttm_dma_tt, ttm);
+			pages_addr = ttm->dma_address;
 			break;
 
 		case TTM_PL_VRAM:
@@ -923,6 +986,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
+	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->vm_status))
@@ -930,7 +994,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
+		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+					       mapping, flags, addr,
 					       &bo_va->last_pt_update);
 		if (r)
 			return r;
@@ -976,8 +1041,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
-					       0, NULL);
+		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
+					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
 			return r;
@@ -1320,11 +1385,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amd_sched_rq *rq;
 	int i, r;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		vm->ids[i].mgr_id = NULL;
-		vm->ids[i].flushed_updates = NULL;
-	}
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
+	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
@@ -1416,15 +1480,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_vm_id *id = &vm->ids[i];
-
-		if (id->mgr_id)
-			atomic_long_cmpxchg(&id->mgr_id->owner,
-					    (long)id, 0);
-		fence_put(id->flushed_updates);
-	}
 }
 
 /**
@@ -1443,11 +1498,13 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
 		amdgpu_vm_reset_id(adev, i);
+		amdgpu_sync_create(&adev->vm_manager.ids[i].active);
 		list_add_tail(&adev->vm_manager.ids[i].list,
 			      &adev->vm_manager.ids_lru);
 	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
+	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
 
 /**
@@ -1461,6 +1518,11 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
 	unsigned i;
 
-	for (i = 0; i < AMDGPU_NUM_VM; ++i)
-		fence_put(adev->vm_manager.ids[i].active);
+	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
+		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
+
+		fence_put(adev->vm_manager.ids[i].first);
+		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
+		fence_put(id->flushed_updates);
+	}
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-23 11:48:48 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-23 11:48:48 -0700
commit	1d6da87a3241deb13d073c4125d19ed0e5a0c62c (patch)
tree	42b7a9842618dad2afe7db9709cc6217ced03120 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent	1f40c49570eb01436786a9b5845c4469a9a1f362 (diff)
parent	a39ed680bddb1ead592e22ed812c7e47286bfc03 (diff)
download	linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.tar.gz linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.tar.bz2 linux-1d6da87a3241deb13d073c4125d19ed0e5a0c62c.zip