summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-11-28 12:39:50 +1000
committerDave Airlie <airlied@redhat.com>2019-11-28 14:33:01 +1000
commit0a6cad5df541108cfd3fbd79eef48eb824c89bdc (patch)
tree767d41bf88ec067b85fe3d87a190ef08e59a1ce7
parentacc61b8929365e63a3e8c8c8913177795aa45594 (diff)
parent9ca7d19ff8ba6207bccab46536814fe4839df80a (diff)
downloadlinux-0a6cad5df541108cfd3fbd79eef48eb824c89bdc.tar.gz
linux-0a6cad5df541108cfd3fbd79eef48eb824c89bdc.tar.bz2
linux-0a6cad5df541108cfd3fbd79eef48eb824c89bdc.zip
Merge branch 'vmwgfx-coherent' of git://people.freedesktop.org/~thomash/linux into drm-next
Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation is signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. Provide mm helpers needed for this and that also allow for huge pmd- and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7). The code has been tested and exercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Thomas Hellstrom <thomas_os@shipmail.org> Link: https://patchwork.freedesktop.org/patch/msgid/20191113131639.4653-1-thomas_os@shipmail.org
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c174
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig1
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile2
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h233
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c488
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c193
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c395
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c74
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h16
-rw-r--r--include/drm/ttm/ttm_bo_api.h14
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/mm.h13
-rw-r--r--include/linux/pagewalk.h9
-rw-r--r--include/uapi/drm/vmwgfx_drm.h4
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/mapping_dirty_helpers.c315
-rw-r--r--mm/pagewalk.c99
23 files changed, 1996 insertions, 123 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 4b34a278d65b..11863fbdd5d6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
-#define TTM_BO_VM_NUM_PREFAULT 16
-
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ * 0 on success and the bo was reserved.
+ * VM_FAULT_RETRY if blocking wait.
+ * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
- vma->vm_private_data;
- struct ttm_bo_device *bdev = bo->bdev;
- unsigned long page_offset;
- unsigned long page_last;
- unsigned long pfn;
- struct ttm_tt *ttm = NULL;
- struct page *page;
- int err;
- int i;
- vm_fault_t ret = VM_FAULT_NOPAGE;
- unsigned long address = vmf->address;
- struct ttm_mem_type_manager *man =
- &bdev->man[bo->mem.mem_type];
- struct vm_area_struct cvma;
-
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
+ return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ * VM_FAULT_NOPAGE on success or pending signal
+ * VM_FAULT_SIGBUS on unspecified error
+ * VM_FAULT_OOM on out-of-memory
+ * VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+ pgoff_t num_prefault)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vm_area_struct cvma = *vma;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ struct ttm_bo_device *bdev = bo->bdev;
+ unsigned long page_offset;
+ unsigned long page_last;
+ unsigned long pfn;
+ struct ttm_tt *ttm = NULL;
+ struct page *page;
+ int err;
+ pgoff_t i;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ unsigned long address = vmf->address;
+ struct ttm_mem_type_manager *man =
+ &bdev->man[bo->mem.mem_type];
+
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
- if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
- }
+ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+ return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
+ return VM_FAULT_NOPAGE;
default:
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
- if (unlikely(ret != 0)) {
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* The BO has already been unreserved. */
- return ret;
- }
-
- goto out_unlock;
- }
+ if (unlikely(ret != 0))
+ return ret;
err = ttm_mem_io_lock(man, true);
- if (unlikely(err != 0)) {
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
- }
+ if (unlikely(err != 0))
+ return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
- /*
- * Make a local vma copy to modify the page_prot member
- * and vm_flags if necessary. The vma parameter is protected
- * by mmap_sem in write mode.
- */
- cvma = *vma;
- cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
-
- if (bo->mem.bus.is_iomem) {
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
- } else {
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+ if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
-
- /* Allocate all page at once, most common usage */
- if (ttm_tt_populate(ttm, &ctx)) {
+ if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
+ } else {
+ /* Iomem should not be marked encrypted */
+ cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
- for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+ for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
- /* Iomem should not be marked encrypted */
- cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
-out_unlock:
+ return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pgprot_t prot;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ prot = vm_get_page_prot(vma->vm_flags);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
dma_resv_unlock(bo->base.resv);
+
return ret;
}
-static void ttm_bo_vm_open(struct vm_area_struct *vma)
+void ttm_bo_vm_open(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo =
- (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
+EXPORT_SYMBOL(ttm_bo_vm_open);
-static void ttm_bo_vm_close(struct vm_area_struct *vma)
+void ttm_bo_vm_close(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
+EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 6b28a326f8bb..15acdf2a7c0f 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
+ select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 8841bd30e1e5..c877a21a0739 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
- vmwgfx_validation.o \
+ vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
-
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+ size_t bytes;
+ size_t img_stride;
+ size_t row_stride;
+ struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+ const struct svga3d_surface_desc *desc;
+ struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+ size_t mip_chain_bytes;
+ size_t sheet_bytes;
+ u32 num_mip_levels;
+ u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+ u32 sub_resource;
+ u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+ u32 mip_level, u32 layer)
+{
+ return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+ SVGA3dSurfaceFormat format,
+ u32 num_mip_levels,
+ u32 num_layers,
+ u32 num_samples,
+ struct svga3dsurface_cache *cache)
+{
+ const struct svga3d_surface_desc *desc;
+ u32 i;
+
+ memset(cache, 0, sizeof(*cache));
+ cache->desc = desc = svga3dsurface_get_desc(format);
+ cache->num_mip_levels = num_mip_levels;
+ cache->num_layers = num_layers;
+ for (i = 0; i < cache->num_mip_levels; i++) {
+ struct svga3dsurface_mip *mip = &cache->mip[i];
+
+ mip->size = svga3dsurface_get_mip_size(*size, i);
+ mip->bytes = svga3dsurface_get_image_buffer_size
+ (desc, &mip->size, 0);
+ mip->row_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+ desc->bytes_per_block * num_samples;
+ if (!mip->row_stride)
+ goto invalid_dim;
+
+ mip->img_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.height,
+ desc->block_size.height) *
+ mip->row_stride;
+ if (!mip->img_stride)
+ goto invalid_dim;
+
+ cache->mip_chain_bytes += mip->bytes;
+ }
+ cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
+ if (!cache->sheet_bytes)
+ goto invalid_dim;
+
+ return 0;
+
+invalid_dim:
+ VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
+ return -EINVAL;
+}
+
+/**
+ * svga3dsurface_get_loc - Get a surface location from an offset into the
+ * backing store
+ * @cache: Surface layout data.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ * @offset: Offset into the surface backing store.
+ */
+static inline void
+svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc,
+ size_t offset)
+{
+ const struct svga3dsurface_mip *mip = &cache->mip[0];
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 layer;
+ int i;
+
+ if (offset >= cache->sheet_bytes)
+ offset %= cache->sheet_bytes;
+
+ layer = offset / cache->mip_chain_bytes;
+ offset -= layer * cache->mip_chain_bytes;
+ for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
+ if (mip->bytes > offset)
+ break;
+ offset -= mip->bytes;
+ }
+
+ loc->sub_resource = svga3dsurface_subres(cache, i, layer);
+ loc->z = offset / mip->img_stride;
+ offset -= loc->z * mip->img_stride;
+ loc->z *= desc->block_size.depth;
+ loc->y = offset / mip->row_stride;
+ offset -= loc->y * mip->row_stride;
+ loc->y *= desc->block_size.height;
+ loc->x = offset / desc->bytes_per_block;
+ loc->x *= desc->block_size.width;
+}
+
+/**
+ * svga3dsurface_inc_loc - Clamp increment a surface location with one block
+ * size
+ * in each dimension.
+ * @loc: Pointer to a struct svga3dsurface_loc to be incremented.
+ *
+ * When computing the size of a range as size = end - start, the range does not
+ * include the end element. However a location representing the last byte
+ * of a touched region in the backing store *is* included in the range.
+ * This function modifies such a location to match the end definition
+ * given as start + size which is the one used in a SVGA3dBox.
+ */
+static inline void
+svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc)
+{
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 mip = loc->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+
+ loc->sub_resource++;
+ loc->x += desc->block_size.width;
+ if (loc->x > size->width)
+ loc->x = size->width;
+ loc->y += desc->block_size.height;
+ if (loc->y > size->height)
+ loc->y = size->height;
+ loc->z += desc->block_size.depth;
+ if (loc->z > size->depth)
+ loc->z = size->depth;
+}
+
+/**
+ * svga3dsurface_min_loc - The start location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ */
+static inline void
+svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ loc->sub_resource = sub_resource;
+ loc->x = loc->y = loc->z = 0;
+}
+
+/**
+ * svga3dsurface_min_loc - The end location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ *
+ * Following the end definition given in svga3dsurface_inc_loc(),
+ * Compute the end location of a surface subresource.
+ */
+static inline void
+svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ const struct drm_vmw_size *size;
+ u32 mip;
+
+ loc->sub_resource = sub_resource + 1;
+ mip = sub_resource % cache->num_mip_levels;
+ size = &cache->mip[mip].size;
+ loc->x = size->width;
+ loc->y = size->height;
+ loc->z = size->depth;
+}
+
#endif /* _SVGA3D_SURFACEDEFS_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 74016a08d118..8b71bf6b58ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
+ WARN_ON(vmw_bo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
+ struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
- vmw_bo_unmap(&vmw_user_bo->vbo);
+ WARN_ON(vbo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
+ vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
- INIT_LIST_HEAD(&vmw_bo->res_list);
+ vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..a31e726d6d71 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180704"
+#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
+ * @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
- struct list_head res_list;
+ struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
+ struct vmw_bo_dirty *dirty;
};
/**
@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
- * Protecte by resource reserved.
+ * Protected by resource reserved.
+ * @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset
@@ -157,29 +160,32 @@ struct vmw_res_func;
* pin-count greater than zero. It is not on the resource LRU lists and its
* backup buffer is pinned. Hence it can't be evicted.
* @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
* @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
* @binding_head: List head for the context binding list. Protected by
* the @dev_priv::binding_mutex
* @res_free: The resource destructor.
* @hw_destroy: Callback to destroy the resource on the device, as part of
* resource destruction.
*/
+struct vmw_resource_dirty;
struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
- bool res_dirty;
- bool backup_dirty;
+ u32 res_dirty : 1;
+ u32 backup_dirty : 1;
+ u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+ struct rb_node mob_node;
struct list_head lru_head;
- struct list_head mob_head;
struct list_head binding_head;
+ struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
};
@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
extern struct vmw_resource *
vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying);
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
void vmw_resource_mob_attach(struct vmw_resource *res);
void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault);
/**
* vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res);
*/
static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
{
- return !list_empty(&res->mob_head);
+ return !RB_EMPTY_NODE(&res->mob_node);
}
/**
@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log);
#define VMW_DEBUG_USER(fmt, ...) \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index ff86d49dc5e8..934ad7c0c342 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
offsetof(typeof(*cmd), sid));
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
VMW_RES_DIRTY_NONE, user_surface_converter,
&cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index 000000000000..f07aa857587c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/**************************************************************************
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+ VMW_BO_DIRTY_PAGETABLE,
+ VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of consecutive method change triggers
+ * @ref_count: Reference count for this structure
+ * @bitmap_size: The size of the bitmap in bits. Typically equal to the
+ * nuber of pages in the bo.
+ * @size: The accounting size for this struct.
+ * @bitmap: A bitmap where each bit represents a page. A set bit means a
+ * dirty page.
+ */
+struct vmw_bo_dirty {