From 6545135a5ed2eac064f23bee3a19a81cfffbe573 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 20 Jun 2017 13:39:14 +0200 Subject: drm/qxl: fix __user annotations Drop them from u64 fields, tag local variables correctly instead. While being at it switch the code to use u64_to_user_ptr(). Signed-off-by: Gerd Hoffmann Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170620113916.6967-2-kraxel@redhat.com --- include/uapi/drm/qxl_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h index 7eef42213051..880999d2d863 100644 --- a/include/uapi/drm/qxl_drm.h +++ b/include/uapi/drm/qxl_drm.h @@ -80,8 +80,8 @@ struct drm_qxl_reloc { }; struct drm_qxl_command { - __u64 __user command; /* void* */ - __u64 __user relocs; /* struct drm_qxl_reloc* */ + __u64 command; /* void* */ + __u64 relocs; /* struct drm_qxl_reloc* */ __u32 type; __u32 command_size; __u32 relocs_num; @@ -91,7 +91,7 @@ struct drm_qxl_command { struct drm_qxl_execbuffer { __u32 flags; /* for future use */ __u32 commands_num; - __u64 __user commands; /* struct drm_qxl_command* */ + __u64 commands; /* struct drm_qxl_command* */ }; struct drm_qxl_update_area { -- cgit v1.2.3 From f30994622b2bf8e4fa224237ac65304b27a9cb6a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 25 Jul 2017 11:27:17 -0700 Subject: drm/vc4: Add an ioctl for labeling GEM BOs for summary stats This has proven immensely useful for debugging memory leaks and overallocation (which is a rather serious concern on the platform, given that we typically run at about 256MB of CMA out of up to 1GB total memory, with framebuffers that are about 8MB ecah). The state of the art without this is to dump debug logs from every GL application, guess as to kernel allocations based on bo_stats, and try to merge that all together into a global picture of memory allocation state. With this, you can add a couple of calls to the debug build of the 3D driver and get a pretty detailed view of GPU memory usage from /debug/dri/0/bo_stats (or when we debug print to dmesg on allocation failure). The Mesa side currently labels at the gallium resource level (so you see that a 1920x20 pixmap has been created, presumably for the window system panel), but we could extend that to be even more useful with glObjectLabel() names being sent all the way down to the kernel. (partial) example of sorted debugfs output with Mesa labeling all resources: kernel BO cache: 16392kb BOs (3) tiling shadow 1920x1080: 8160kb BOs (1) resource 1920x1080@32/0: 8160kb BOs (1) scanout resource 1920x1080@32/0: 8100kb BOs (1) kernel: 8100kb BOs (1) v2: Use strndup_user(), use lockdep assertion instead of just a comment, fix an array[-1] reference, extend comment about name freeing. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net Reviewed-by: Chris Wilson --- include/uapi/drm/vc4_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 6ac4c5c014cb..551628e571f9 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -40,6 +40,7 @@ extern "C" { #define DRM_VC4_GET_PARAM 0x07 #define DRM_VC4_SET_TILING 0x08 #define DRM_VC4_GET_TILING 0x09 +#define DRM_VC4_LABEL_BO 0x0a #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -51,6 +52,7 @@ extern "C" { #define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param) #define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling) #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling) +#define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -311,6 +313,15 @@ struct drm_vc4_set_tiling { __u64 modifier; }; +/** + * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes. + */ +struct drm_vc4_label_bo { + __u32 handle; + __u32 len; + __u64 name; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From e6fc3b68558e4c6d8d160b5daf2511b99afa8814 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jul 2017 20:46:38 -0700 Subject: drm: Plumb modifiers through plane init This is the plumbing for supporting fb modifiers on planes. Modifiers have already been introduced to some extent, but this series will extend this to allow querying modifiers per plane. Based on this, the client to enable optimal modifications for framebuffers. This patch simply allows the DRM drivers to initialize their list of supported modifiers upon initializing the plane. v2: A minor addition from Daniel v3: * Updated commit message * s/INVALID/DRM_FORMAT_MOD_INVALID (Liviu) * Remove some excess newlines (Liviu) * Update comment for > 64 modifiers (Liviu) v4: Minor comment adjustments (Liviu) v5: Some new platforms added due to rebase v6: Add some missed plane inits (or maybe they're new - who knows at this point) (Daniel) Signed-off-by: Ben Widawsky Reviewed-by: Daniel Stone (v2) Reviewed-by: Liviu Dudau Signed-off-by: Daniel Stone --- include/uapi/drm/drm_fourcc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 7586c46f68bf..76c9101a7fc6 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -185,6 +185,8 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07 /* add more to the end as needed */ +#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + #define fourcc_mod_code(vendor, val) \ ((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL)) @@ -196,6 +198,15 @@ extern "C" { * authoritative source for all of these. */ +/* + * Invalid Modifier + * + * This modifier can be used as a sentinel to terminate the format modifiers + * list, or to initialize a variable with an invalid modifier. It might also be + * used to report an error back to userspace for certain APIs. + */ +#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED) + /* * Linear Layout * -- cgit v1.2.3 From db1689aa61bd1efb5ce9b896e7aa860a85b7f1b6 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jul 2017 20:46:39 -0700 Subject: drm: Create a format/modifier blob Updated blob layout (Rob, Daniel, Kristian, xerpi) v2: * Removed __packed, and alignment (.+) * Fix indent in drm_format_modifier fields (Liviu) * Remove duplicated modifier > 64 check (Liviu) * Change comment about modifier (Liviu) * Remove arguments to blob creation, use plane instead (Liviu) * Fix data types (Ben) * Make the blob part of uapi (Daniel) v3: Remove unused ret field. Change i, and j to unsigned int (Emil) v4: Use plane->modifier_count instead of recounting (Daniel) v5: Rename modifiers to modifiers_property (Ville) Use sizeof(__u32) instead to reflect UAPI nature (Ville) Make BUILD_BUG_ON for blob header size Cc: Rob Clark Cc: Kristian H. Kristensen Signed-off-by: Ben Widawsky Reviewed-by: Daniel Stone (v2) Reviewed-by: Liviu Dudau (v2) Reviewed-by: Emil Velikov (v3) Signed-off-by: Daniel Stone Link: http://patchwork.freedesktop.org/patch/msgid/20170724034641.13369-2-ben@bwidawsk.net --- include/uapi/drm/drm_mode.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 403339f98a92..a2bb7161f020 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -712,6 +712,56 @@ struct drm_mode_atomic { __u64 user_data; }; +struct drm_format_modifier_blob { +#define FORMAT_BLOB_CURRENT 1 + /* Version of this blob format */ + u32 version; + + /* Flags */ + u32 flags; + + /* Number of fourcc formats supported */ + u32 count_formats; + + /* Where in this blob the formats exist (in bytes) */ + u32 formats_offset; + + /* Number of drm_format_modifiers */ + u32 count_modifiers; + + /* Where in this blob the modifiers exist (in bytes) */ + u32 modifiers_offset; + + /* u32 formats[] */ + /* struct drm_format_modifier modifiers[] */ +}; + +struct drm_format_modifier { + /* Bitmask of formats in get_plane format list this info applies to. The + * offset allows a sliding window of which 64 formats (bits). + * + * Some examples: + * In today's world with < 65 formats, and formats 0, and 2 are + * supported + * 0x0000000000000005 + * ^-offset = 0, formats = 5 + * + * If the number formats grew to 128, and formats 98-102 are + * supported with the modifier: + * + * 0x0000003c00000000 0000000000000000 + * ^ + * |__offset = 64, formats = 0x3c00000000 + * + */ + __u64 formats; + __u32 offset; + __u32 pad; + + /* The modifier that applies to the >get_plane format list bitmask. */ + __u64 modifier; +}; + /** * Create a new 'blob' data property, copying length bytes from data pointer, * and returning new blob ID. -- cgit v1.2.3 From f89823c212246d0671cc51e69894a3df1a743aee Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Aug 2017 18:05:50 +0100 Subject: drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Andrzej Datczuk Reviewed-by: Andrzej Datczuk Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com --- include/uapi/drm/i915_drm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ccbd6a2bbe0..ce3833fa1e06 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1467,6 +1471,22 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + __u64 __user mux_regs_ptr; + __u64 __user boolean_regs_ptr; + __u64 __user flex_regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From adb8a5a5eb9f16997f11ecacf25a647134011dd7 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 6 Aug 2017 18:44:23 +0200 Subject: uapi drm/armada_drm.h: use __u32 and __u64 instead of uint32_t and uint64_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are defined in linux/types.h or drm/drm.h. Fixes user space compilation errors like: drm/armada_drm.h:26:2: error: unknown type name ‘uint32_t’ uint32_t handle; ^~~~~~~~ Signed-off-by: Mikko Rapeli Cc: Emil Velikov Cc: Gabriel Laskar Cc: Russell King Cc: Rob Clark Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170806164428.2273-33-mikko.rapeli@iki.fi --- include/uapi/drm/armada_drm.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h index 72e326f9c7de..0cb932416cfe 100644 --- a/include/uapi/drm/armada_drm.h +++ b/include/uapi/drm/armada_drm.h @@ -23,27 +23,27 @@ extern "C" { DRM_##dir(DRM_COMMAND_BASE + DRM_ARMADA_##name, struct drm_armada_##str) struct drm_armada_gem_create { - uint32_t handle; - uint32_t size; + __u32 handle; + __u32 size; }; #define DRM_IOCTL_ARMADA_GEM_CREATE \ ARMADA_IOCTL(IOWR, GEM_CREATE, gem_create) struct drm_armada_gem_mmap { - uint32_t handle; - uint32_t pad; - uint64_t offset; - uint64_t size; - uint64_t addr; + __u32 handle; + __u32 pad; + __u64 offset; + __u64 size; + __u64 addr; }; #define DRM_IOCTL_ARMADA_GEM_MMAP \ ARMADA_IOCTL(IOWR, GEM_MMAP, gem_mmap) struct drm_armada_gem_pwrite { - uint64_t ptr; - uint32_t handle; - uint32_t offset; - uint32_t size; + __u64 ptr; + __u32 handle; + __u32 offset; + __u32 size; }; #define DRM_IOCTL_ARMADA_GEM_PWRITE \ ARMADA_IOCTL(IOW, GEM_PWRITE, gem_pwrite) -- cgit v1.2.3 From 3be8eddd9d58a925b461b582fa5aa422a9c145ee Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 25 Jul 2017 09:27:33 -0700 Subject: drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order. This is useful to allow GL to provide defined results for overlapping glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited window movement without first blitting to a temporary. x11perf -copywinwin100 goes from 1850/sec to 4850/sec. v2: Default to the same behavior as before when the flags aren't passed. (suggested by Boris) Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20170725162733.28007-2-eric@anholt.net Reviewed-by: Boris Brezillon --- include/uapi/drm/vc4_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 551628e571f9..afae87004963 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -155,6 +155,16 @@ struct drm_vc4_submit_cl { __u32 pad:24; #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) +/* By default, the kernel gets to choose the order that the tiles are + * rendered in. If this is set, then the tiles will be rendered in a + * raster order, with the right-to-left vs left-to-right and + * top-to-bottom vs bottom-to-top dictated by + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping + * blits to be implemented using the 3D engine. + */ +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) __u32 flags; /* Returned value of the seqno of this render job (for the @@ -294,6 +304,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 struct drm_vc4_get_param { __u32 param; -- cgit v1.2.3 From bbfb6ce86c9889a5d434e2e603d41e0ce5b552e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Aug 2017 09:58:12 -0700 Subject: drm/i915: Implement .get_format_info() hook for CCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. By providing our own format information for the CCS formats, we should be able to make framebuffer_check() do the right thing for the CCS surface as well. Note that we'll return the same format info for both Y and Yf tiled format as that's what happens with the non-CCS Y vs. Yf as well. If desired, we could potentially return a unique pointer for each pixel_format+tiling+ccs combination, in which case we immediately be able to tell if any of that stuff changed by just comparing the pointers. But that does sound a bit wasteful space wise. v2: Drop the 'dev' argument from the hook v3: Include the description of the CCS surface layout v4: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason) v5: Re-drop 'dev', fix commit message, add missing drm_fourcc.h description of CCS layout. (daniels) Cc: Daniel Vetter Cc: Ben Widawsky Cc: Jason Ekstrand Acked-by: Jason Ekstrand Reviewed-by: Ben Widawsky (v3) Reviewed-by: Daniel Stone Signed-off-by: Ville Syrjä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Stone --- include/uapi/drm/drm_fourcc.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 76c9101a7fc6..3ad838d3f93f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -263,6 +263,26 @@ extern "C" { */ #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) +/* + * Intel color control surface (CCS) for render compression + * + * The framebuffer format must be one of the 8:8:8:8 RGB formats. + * The main surface will be plane index 0 and must be Y/Yf-tiled, + * the CCS will be plane index 1. + * + * Each CCS tile matches a 1024x512 pixel area of the main surface. + * To match certain aspects of the 3D hardware the CCS is + * considered to be made up of normal 128Bx32 Y tiles, Thus + * the CCS pitch must be specified in multiples of 128 bytes. + * + * In reality the CCS tile appears to be a 64Bx64 Y tile, composed + * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks. + * But that fact is not relevant unless the memory is accessed + * directly. + */ +#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4) +#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit v1.2.3 From cf6e7bac6357f0ccca51fcb5eb325e724f6b4c95 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Aug 2017 15:57:33 +0100 Subject: drm/i915: Add support for drm syncobjs This commit adds support for waiting on or signaling DRM syncobjs as part of execbuf. It does so by hijacking the currently unused cliprects pointer to instead point to an array of i915_gem_exec_fence structs which containe a DRM syncobj and a flags parameter which specifies whether to wait on it or to signal it. This implementation theoretically allows for both flags to be set in which case it waits on the dma_fence that was in the syncobj and then immediately replaces it with the dma_fence from the current execbuf. v2: - Rebase on new syncobj API v3: - Pull everything out into helpers - Do all allocation in gem_execbuffer2 - Pack the flags in the bottom 2 bits of the drm_syncobj* v4: - Prevent a potential race on syncobj->fence Testcase: igt/gem_exec_fence/syncobj* Signed-off-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/1499289202-25441-1-git-send-email-jason.ekstrand@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170815145733.4562-1-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ce3833fa1e06..6598fb76d2c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -435,6 +435,11 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + typedef struct drm_i915_getparam { __s32 param; /* @@ -816,6 +821,17 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -830,7 +846,11 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -931,7 +951,14 @@ struct drm_i915_gem_execbuffer2 { * element). */ #define I915_EXEC_BATCH_FIRST (1<<18) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Tue, 15 Aug 2017 23:00:20 -0400 Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2 v2: * Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA * Removed size parameter from the ioctl, it was unused * Removed hole in ioctl number space * No more call to write_config_static_mem * Return correct error code from ioctl Signed-off-by: Moses Reuben Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d6833426fdef..1b9c5609d523 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args { uint32_t wait_result; /* from KFD */ }; +struct kfd_ioctl_set_scratch_backing_va_args { + uint64_t va_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args { #define AMDKFD_IOC_DBG_WAVE_CONTROL \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x11 +#define AMDKFD_COMMAND_END 0x12 #endif -- cgit v1.2.3 From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 15 Aug 2017 23:00:22 -0400 Subject: drm/amdkfd: Implement image tiling mode support v2 v2: Removed hole in ioctl number space Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1b9c5609d523..7b4567bacfc2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args { uint32_t pad; }; +struct kfd_ioctl_get_tile_config_args { + /* to KFD: pointer to tile array */ + uint64_t tile_config_ptr; + /* to KFD: pointer to macro tile array */ + uint64_t macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_macro_tile_configs; + + uint32_t gpu_id; /* to KFD */ + uint32_t gb_addr_config; /* from KFD */ + uint32_t num_banks; /* from KFD */ + uint32_t num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args { #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x12 +#define AMDKFD_COMMAND_END 0x13 #endif -- cgit v1.2.3 From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 24 Aug 2017 16:08:14 +0100 Subject: drm: rename u32 in __u32 in uapi All other fields use __ Cc: Ben Widawsky Fixes: db1689aa61b ("drm: Create a format/modifier blob") Signed-off-by: Lionel Landwerlin Signed-off-by: Daniel Stone Reviewed-by: Chris Wilson Reviewed-by: Emil Velikov Reviewed-by: Ben Widawsky Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com --- include/uapi/drm/drm_mode.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a2bb7161f020..54fc38c3c3f1 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -715,24 +715,24 @@ struct drm_mode_atomic { struct drm_format_modifier_blob { #define FORMAT_BLOB_CURRENT 1 /* Version of this blob format */ - u32 version; + __u32 version; /* Flags */ - u32 flags; + __u32 flags; /* Number of fourcc formats supported */ - u32 count_formats; + __u32 count_formats; /* Where in this blob the formats exist (in bytes) */ - u32 formats_offset; + __u32 formats_offset; /* Number of drm_format_modifiers */ - u32 count_modifiers; + __u32 count_modifiers; /* Where in this blob the modifiers exist (in bytes) */ - u32 modifiers_offset; + __u32 modifiers_offset; - /* u32 formats[] */ + /* __u32 formats[] */ /* struct drm_format_modifier modifiers[] */ }; -- cgit v1.2.3 From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 5 Jul 2017 01:37:55 -0700 Subject: drm/vmwgfx: Prepare to support fence fd Make the fields and flags available. Signed-off-by: Sinclair Yeh Reviewed-by: Deepak Singh Rawat Reviewed-by: Thomas Hellstrom --- include/uapi/drm/vmwgfx_drm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index d9dfde9aa757..0bc784f5e0db 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg { * @version: Allows expanding the execbuf ioctl parameters without breaking * backwards compatibility, since user-space will always tell the kernel * which version it uses. - * @flags: Execbuf flags. None currently. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device * * Argument to the DRM_VMW_EXECBUF Ioctl. */ #define DRM_VMW_EXECBUF_VERSION 2 +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + struct drm_vmw_execbuf_arg { __u64 commands; __u32 command_size; @@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg { __u32 version; __u32 flags; __u32 context_handle; - __u32 pad64; + __s32 imported_fence_fd; }; /** @@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg { * @passed_seqno: The highest seqno number processed by the hardware * so far. This can be used to mark user-space fence objects as signaled, and * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported * @error: This member should've been set to -EFAULT on submission. * The following actions should be take on completion: * error == -EFAULT: Fence communication failed. The host is synchronized. @@ -345,7 +350,7 @@ struct drm_vmw_fence_rep { __u32 mask; __u32 seqno; __u32 passed_seqno; - __u32 pad64; + __s32 fd; __s32 error; }; -- cgit v1.2.3 From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 25 Aug 2017 10:52:22 -0700 Subject: drm/syncobj: add sync obj wait interface. (v8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interface will allow sync object to be used to back Vulkan fences. This API is pretty much the vulkan fence waiting API, and I've ported the code from amdgpu. v2: accept relative timeout, pass remaining time back to userspace. v3: return to absolute timeouts. v4: absolute zero = poll, rewrite any/all code to have same operation for arrays return -EINVAL for 0 fences. v4.1: fixup fences allocation check, use u64_to_user_ptr v5: move to sec/nsec, and use timespec64 for calcs. v6: use -ETIME and drop the out status flag. (-ETIME is suggested by ickle, I can feel a shed painting) v7: talked to Daniel/Arnd, use ktime and ns everywhere. v8: be more careful in the timeout calculations use uint32_t for counter variables so we don't overflow graciously handle -ENOINT being returned from dma_fence_wait_timeout Signed-off-by: Dave Airlie Reviewed-by: Jason Ekstrand Acked-by: Christian König Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 101593ab10ac..0757c1a41821 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -718,6 +718,17 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +851,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:25 -0700 Subject: drm/syncobj: Add a CREATE_SIGNALED flag This requests that the driver create the sync object such that it already has a signaled dma_fence attached. Because we don't need anything in particular (just something signaled), we use a dummy null fence. This is useful for Vulkan which has a similar flag that can be passed to vkCreateFence. Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 0757c1a41821..ade7f68d32b5 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -700,6 +700,7 @@ struct drm_prime_handle { struct drm_syncobj_create { __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) __u32 flags; }; -- cgit v1.2.3 From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:24 -0700 Subject: drm/syncobj: Allow wait for submit and signal behavior (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand Cc: Dave Airlie Cc: Chris Wilson Cc: Christian König Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index ade7f68d32b5..4c746597225e 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -720,6 +720,7 @@ struct drm_syncobj_handle { }; #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ -- cgit v1.2.3 From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:27 -0700 Subject: drm/syncobj: Add a reset ioctl (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just resets the dma_fence to NULL so it looks like it's never been signaled. This will be useful once we add the new wait API for allowing wait on "submit and signal" behavior. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Reviewed-by: Christian König (v1) Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4c746597225e..b037fdf9e43b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -731,6 +731,12 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -854,6 +860,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3 From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:28 -0700 Subject: drm/syncobj: Add a signal ioctl (v3) This IOCTL provides a mechanism for userspace to trigger a sync object directly. There are other ways that userspace can trigger a syncobj such as submitting a dummy batch somewhere or hanging on to a triggered sync_file and doing an import. This just provides an easy way to manually trigger the sync object without weird hacks. The motivation for this IOCTL is Vulkan fences. Vulkan lets you create a fence already in the signaled state so that you can wait on it immediatly without stalling. We could also handle this with a new create flag to ask the driver to create a syncobj that is already signaled but the IOCTL seemed a bit cleaner and more generic. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index b037fdf9e43b..97677cd6964d 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -861,6 +861,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit v1.2.3