summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/arch/x86/include/asm/amd-ibs.h132
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/include/uapi/asm-generic/unistd.h14
-rw-r--r--tools/include/uapi/drm/drm.h14
-rw-r--r--tools/include/uapi/drm/i915_drm.h498
-rw-r--r--tools/include/uapi/linux/fs.h1
-rw-r--r--tools/include/uapi/linux/in.h42
-rw-r--r--tools/include/uapi/linux/kvm.h11
-rw-r--r--tools/include/uapi/linux/mount.h3
-rw-r--r--tools/include/uapi/linux/prctl.h12
-rw-r--r--tools/include/uapi/sound/asound.h1
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Makefile.config47
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl12
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl14
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl3
-rwxr-xr-xtools/perf/check-headers.sh1
-rwxr-xr-xtools/perf/scripts/python/bin/stackcollapse-report2
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h9
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh2
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/amd-sample-raw.c289
-rw-r--r--tools/perf/util/bpf-event.c8
-rw-r--r--tools/perf/util/dso.c10
-rw-r--r--tools/perf/util/env.c78
-rw-r--r--tools/perf/util/env.h5
-rw-r--r--tools/perf/util/evsel.c20
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/parse-events-hybrid.c18
-rw-r--r--tools/perf/util/parse-events.c27
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c5
-rw-r--r--tools/perf/util/sample-raw.c8
-rw-r--r--tools/perf/util/sample-raw.h6
-rw-r--r--tools/perf/util/symbol.c20
36 files changed, 1147 insertions, 175 deletions
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
new file mode 100644
index 000000000000..174e7d83fcbd
--- /dev/null
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include "msr-index.h"
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+ __u64 val;
+ struct {
+ __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+ fetch_cnt:16, /* 16-31: instruction fetch count */
+ fetch_lat:16, /* 32-47: instruction fetch latency */
+ fetch_en:1, /* 48: instruction fetch enable */
+ fetch_val:1, /* 49: instruction fetch valid */
+ fetch_comp:1, /* 50: instruction fetch complete */
+ ic_miss:1, /* 51: i-cache miss */
+ phy_addr_valid:1,/* 52: physical address valid */
+ l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
+ * (needs IbsPhyAddrValid) */
+ l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
+ l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
+ rand_en:1, /* 57: random tagging enable */
+ fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+ * (needs IbsFetchComp) */
+ reserved:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+ __u64 val;
+ struct {
+ __u64 opmaxcnt:16, /* 0-15: periodic op max. count */
+ reserved0:1, /* 16: reserved */
+ op_en:1, /* 17: op sampling enable */
+ op_val:1, /* 18: op sample valid */
+ cnt_ctl:1, /* 19: periodic op counter control */
+ opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
+ reserved1:5, /* 27-31: reserved */
+ opcurcnt:27, /* 32-58: periodic op counter current count */
+ reserved2:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011035: IBS Op Data 2 */
+union ibs_op_data {
+ __u64 val;
+ struct {
+ __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
+ tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
+ reserved1:2, /* 32-33: reserved */
+ op_return:1, /* 34: return op */
+ op_brn_taken:1, /* 35: taken branch op */
+ op_brn_misp:1, /* 36: mispredicted branch op */
+ op_brn_ret:1, /* 37: branch op retired */
+ op_rip_invalid:1, /* 38: RIP is invalid */
+ op_brn_fuse:1, /* 39: fused branch op */
+ op_microcode:1, /* 40: microcode op */
+ reserved2:23; /* 41-63: reserved */
+ };
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+ __u64 val;
+ struct {
+ __u64 data_src:3, /* 0-2: data source */
+ reserved0:1, /* 3: reserved */
+ rmt_node:1, /* 4: destination node */
+ cache_hit_st:1, /* 5: cache hit state */
+ reserved1:57; /* 5-63: reserved */
+ };
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+ __u64 val;
+ struct {
+ __u64 ld_op:1, /* 0: load op */
+ st_op:1, /* 1: store op */
+ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
+ dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
+ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
+ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
+ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
+ dc_miss:1, /* 7: data cache miss */
+ dc_mis_acc:1, /* 8: misaligned access */
+ reserved:4, /* 9-12: reserved */
+ dc_wc_mem_acc:1, /* 13: write combining memory access */
+ dc_uc_mem_acc:1, /* 14: uncacheable memory access */
+ dc_locked_op:1, /* 15: locked operation */
+ dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
+ dc_lin_addr_valid:1, /* 17: data cache linear address valid */
+ dc_phy_addr_valid:1, /* 18: data cache physical address valid */
+ dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
+ l2_miss:1, /* 20: L2 cache miss */
+ sw_pf:1, /* 21: software prefetch */
+ op_mem_width:4, /* 22-25: load/store size in bytes */
+ op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
+ dc_miss_lat:16, /* 32-47: data cache miss latency */
+ tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
+ };
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+ __u64 val;
+ struct {
+ __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
+ reserved:48; /* 16-63: reserved */
+ };
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index a6c327f8ad9e..2ef1f6513c68 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
#define KVM_GUESTDBG_INJECT_DB 0x00040000
#define KVM_GUESTDBG_INJECT_BP 0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ 0x00100000
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index a9d6fcd95f42..1c5fb86d455a 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
#define __NR_remap_file_pages 234
__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
#define __NR_mbind 235
-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
#define __NR_get_mempolicy 236
-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
#define __NR_set_mempolicy 237
-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
#define __NR_migrate_pages 238
-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
#define __NR_move_pages 239
-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
#endif
#define __NR_rt_tgsigqueueinfo 240
@@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
#define __NR_memfd_secret 447
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#endif
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#undef __NR_syscalls
-#define __NR_syscalls 448
+#define __NR_syscalls 449
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index d043752a74cf..3b810b53ba8b 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -635,8 +635,8 @@ struct drm_gem_open {
/**
* DRM_CAP_VBLANK_HIGH_CRTC
*
- * If set to 1, the kernel supports specifying a CRTC index in the high bits of
- * &drm_wait_vblank_request.type.
+ * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>`
+ * in the high bits of &drm_wait_vblank_request.type.
*
* Starting kernel version 2.6.39, this capability is always set to 1.
*/
@@ -1050,6 +1050,16 @@ extern "C" {
#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+/**
+ * DRM_IOCTL_MODE_RMFB - Remove a framebuffer.
+ *
+ * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * Warning: removing a framebuffer currently in-use on an enabled plane will
+ * disable that plane. The CRTC the plane is linked to may also be disabled
+ * (depending on driver capabilities).
+ */
#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index c2c7759b7d2e..bde5860b3686 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
+/*
+ * Indicates the 2k user priority levels are statically mapped into 3 buckets as
+ * follows:
+ *
+ * -1k to -1 Low priority
+ * 0 Normal priority
+ * 1 to 1k Highest priority
+ */
+#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
#define I915_PARAM_HUC_STATUS 42
@@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
+#define I915_PARAM_HAS_USERPTR_PROBE 56
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt {
__u64 offset;
};
+/**
+ * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
+ *
+ * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
+ * and is used to retrieve the fake offset to mmap an object specified by &handle.
+ *
+ * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
+ * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
+ * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
+ */
struct drm_i915_gem_mmap_offset {
- /** Handle for the object being mapped. */
+ /** @handle: Handle for the object being mapped. */
__u32 handle;
+ /** @pad: Must be zero */
__u32 pad;
/**
- * Fake offset to use for subsequent mmap call
+ * @offset: The fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
/**
- * Flags for extended behaviour.
+ * @flags: Flags for extended behaviour.
+ *
+ * It is mandatory that one of the `MMAP_OFFSET` types
+ * should be included:
*
- * It is mandatory that one of the MMAP_OFFSET types
- * (GTT, WC, WB, UC, etc) should be included.
+ * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
+ * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
+ * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
+ * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
+ *
+ * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
+ * type. On devices without local memory, this caching mode is invalid.
+ *
+ * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
+ * be used, depending on the object placement on creation. WB will be used
+ * when the object can only exist in system memory, WC otherwise.
*/
__u64 flags;
-#define I915_MMAP_OFFSET_GTT 0
-#define I915_MMAP_OFFSET_WC 1
-#define I915_MMAP_OFFSET_WB 2
-#define I915_MMAP_OFFSET_UC 3
- /*
- * Zero-terminated chain of extensions.
+#define I915_MMAP_OFFSET_GTT 0
+#define I915_MMAP_OFFSET_WC 1
+#define I915_MMAP_OFFSET_WB 2
+#define I915_MMAP_OFFSET_UC 3
+#define I915_MMAP_OFFSET_FIXED 4
+
+ /**
+ * @extensions: Zero-terminated chain of extensions.
*
* No current extensions defined; mbz.
*/
__u64 extensions;
};
+/**
+ * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
+ * preparation for accessing the pages via some CPU domain.
+ *
+ * Specifying a new write or read domain will flush the object out of the
+ * previous domain(if required), before then updating the objects domain
+ * tracking with the new domain.
+ *
+ * Note this might involve waiting for the object first if it is still active on
+ * the GPU.
+ *
+ * Supported values for @read_domains and @write_domain:
+ *
+ * - I915_GEM_DOMAIN_WC: Uncached write-combined domain
+ * - I915_GEM_DOMAIN_CPU: CPU cache domain
+ * - I915_GEM_DOMAIN_GTT: Mappable aperture domain
+ *
+ * All other domains are rejected.
+ *
+ * Note that for discrete, starting from DG1, this is no longer supported, and
+ * is instead rejected. On such platforms the CPU domain is effectively static,
+ * where we also only support a single &drm_i915_gem_mmap_offset cache mode,
+ * which can't be set explicitly and instead depends on the object placements,
+ * as per the below.
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ * mapped as write-combined only.
+ *
+ * - Everything else is always allocated and mapped as write-back, with the
+ * guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ */
struct drm_i915_gem_set_domain {
- /** Handle for the object */
+ /** @handle: Handle for the object. */
__u32 handle;
- /** New read domains */
+ /** @read_domains: New read domains. */
__u32 read_domains;
- /** New write domain */
+ /**
+ * @write_domain: New write domain.
+ *
+ * Note that having something in the write domain implies it's in the
+ * read domain, and only that read domain.
+ */
__u32 write_domain;
};
@@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy {
* reading from the object simultaneously.
*
* The value of each engine class is the same as specified in the
- * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
+ * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
* I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
- * reported as active itself. Some hardware may have parallel
- * execution engines, e.g. multiple media engines, which are
- * mapped to the same class identifier and so are not separately
- * reported for busyness.
+ * Some hardware may have parallel execution engines, e.g. multiple
+ * media engines, which are mapped to the same class identifier and so
+ * are not separately reported for busyness.
*
* Caveat emptor:
* Only the boolean result of this query is reliable; that is whether
@@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy {
};
/**
- * I915_CACHING_NONE
- *
- * GPU access is not coherent with cpu caches. Default for machines without an
- * LLC.
- */
-#define I915_CACHING_NONE 0
-/**
- * I915_CACHING_CACHED
- *
- * GPU access is coherent with cpu caches and furthermore the data is cached in
- * last-level caches shared between cpu cores and the gpu GT. Default on
- * machines with HAS_LLC.
+ * struct drm_i915_gem_caching - Set or get the caching for given object
+ * handle.
+ *
+ * Allow userspace to control the GTT caching bits for a given object when the
+ * object is later mapped through the ppGTT(or GGTT on older platforms lacking
+ * ppGTT support, or if the object is used for scanout). Note that this might
+ * require unbinding the object from the GTT first, if its current caching value
+ * doesn't match.
+ *
+ * Note that this all changes on discrete platforms, starting from DG1, the
+ * set/get caching is no longer supported, and is now rejected. Instead the CPU
+ * caching attributes(WB vs WC) will become an immutable creation time property
+ * for the object, along with the GTT caching level. For now we don't expose any
+ * new uAPI for this, instead on DG1 this is all implicit, although this largely
+ * shouldn't matter since DG1 is coherent by default(without any way of
+ * controlling it).
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ * mapped as write-combined only.
+ *
+ * - Everything else is always allocated and mapped as write-back, with the
+ * guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ *
+ * Side note: Part of the reason for this is that changing the at-allocation-time CPU
+ * caching attributes for the pages might be required(and is expensive) if we
+ * need to then CPU map the pages later with different caching attributes. This
+ * inconsistent caching behaviour, while supported on x86, is not universally
+ * supported on other architectures. So for simplicity we opt for setting
+ * everything at creation time, whilst also making it immutable, on discrete
+ * platforms.
*/
-#define I915_CACHING_CACHED 1
-/**
- * I915_CACHING_DISPLAY
- *
- * Special GPU caching mode which is coherent with the scanout engines.
- * Transparently falls back to I915_CACHING_NONE on platforms where no special
- * cache mode (like write-through or gfdt flushing) is available. The kernel
- * automatically sets this mode when using a buffer as a scanout target.
- * Userspace can manually set this mode to avoid a costly stall and clflush in
- * the hotpath of drawing the first frame.
- */
-#define I915_CACHING_DISPLAY 2
-
struct drm_i915_gem_caching {
/**
- * Handle of the buffer to set/get the caching level of. */
+ * @handle: Handle of the buffer to set/get the caching level.
+ */
__u32 handle;
/**
- * Cacheing level to apply or return value
+ * @caching: The GTT caching level to apply or possible return value.
+ *
+ * The supported @caching values:
*
- * bits0-15 are for generic caching control (i.e. the above defined
- * values). bits16-31 are reserved for platform-specific variations
- * (e.g. l3$ caching on gen7). */
+ * I915_CACHING_NONE:
+ *
+ * GPU access is not coherent with CPU caches. Default for machines
+ * without an LLC. This means manual flushing might be needed, if we
+ * want GPU access to be coherent.
+ *
+ * I915_CACHING_CACHED:
+ *
+ * GPU access is coherent with CPU caches and furthermore the data is
+ * cached in last-level caches shared between CPU cores and the GPU GT.
+ *
+ * I915_CACHING_DISPLAY:
+ *
+ * Special GPU caching mode which is coherent with the scanout engines.
+ * Transparently falls back to I915_CACHING_NONE on platforms where no
+ * special cache mode (like write-through or gfdt flushing) is
+ * available. The kernel automatically sets this mode when using a
+ * buffer as a scanout target. Userspace can manually set this mode to
+ * avoid a costly stall and clflush in the hotpath of drawing the first
+ * frame.
+ */
+#define I915_CACHING_NONE 0
+#define I915_CACHING_CACHED 1
+#define I915_CACHING_DISPLAY 2
__u32 caching;
};
@@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param {
__u32 size;
__u64 param;
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
+/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance
+ * someone somewhere has attempted to use it, never re-use this context
+ * param number.
+ */
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
@@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param {
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
-/*
- * I915_CONTEXT_PARAM_RINGSIZE:
- *
- * Sets the size of the CS ringbuffer to use for logical ring contexts. This
- * applies a limit of how many batches can be queued to HW before the caller
- * is blocked due to lack of space for more commands.
- *
- * Only reliably possible to be set prior to first use, i.e. during
- * construction. At any later point, the current execution must be flushed as
- * the ring can only be changed while the context is idle. Note, the ringsize
- * can be specified as a constructor property, see
- * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
- *
- * Only applies to the current set of engine and lost when those engines
- * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
- *
- * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
- * Default is 16 KiB.
+/* This API has been removed. On the off chance someone somewhere has
+ * attempted to use it, never re-use this context param number.
*/
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
/* Must be kept compact -- no holes and well documented */
@@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu {
__u32 rsvd;
};
+/**
+ * DOC: Virtual Engine uAPI
+ *
+ * Virtual engine is a concept where userspace is able to configure a set of
+ * physical engines, submit a batch buffer, and let the driver execute it on any
+ * engine from the set as it sees fit.
+ *
+ * This is primarily useful on parts which have multiple instances of a same
+ * class engine, like for example GT3+ Skylake parts with their two VCS engines.
+ *
+ * For instance userspace can enumerate all engines of a certain class using the
+ * previously described `Engine Discovery uAPI`_. After that userspace can
+ * create a GEM context with a placeholder slot for the virtual engine (using
+ * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
+ * and instance respectively) and finally using the
+ * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
+ * the same reserved slot.
+ *
+ * Example of creating a virtual engine and submitting a batch buffer to it:
+ *
+ * .. code-block:: C
+ *
+ * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
+ * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+ * .engine_index = 0, // Place this virtual engine into engine map slot 0
+ * .num_siblings = 2,
+ * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
+ * { I915_ENGINE_CLASS_VIDEO, 1 }, },
+ * };
+ * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
+ * .engines = { { I915_ENGINE_CLASS_INVALID,
+ * I915_ENGINE_CLASS_INVALID_NONE } },
+ * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension
+ * };
+ * struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_ENGINES,
+ * .value = to_user_pointer(&engines),
+ * .size = sizeof(engines),
+ * },
+ * };
+ * struct drm_i915_gem_context_create_ext create = {
+ * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * .extensions = to_user_pointer(&p_engines);
+ * };
+ *
+ * ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * // Now we have created a GEM context with its engine map containing a
+ * // single virtual engine. Submissions to this slot can go either to
+ * // vcs0 or vcs1, depending on the load balancing algorithm used inside
+ * // the driver. The load balancing is dynamic from one batch buffer to
+ * // another and transparent to userspace.
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 0; // Submits to index 0 which is the virtual engine
+ * gem_execbuf(drm_fd, &execbuf);
+ */
+
/*
* i915_context_engines_load_balance:
*
@@ -1883,6 +2049,61 @@ struct i915_context_engines_bond {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
+/**
+ * DOC: Context Engine Map uAPI
+ *
+ * Context engine map is a new way of addressing engines when submitting batch-
+ * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
+ * inside the flags field of `struct drm_i915_gem_execbuffer2`.
+ *
+ * To use it created GEM contexts need to be configured with a list of engines
+ * the user is intending to submit to. This is accomplished using the
+ * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
+ * i915_context_param_engines`.
+ *
+ * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
+ * configured map.
+ *
+ * Example of creating such context and submitting against it:
+ *
+ * .. code-block:: C
+ *
+ * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
+ * .engines = { { I915_ENGINE_CLASS_RENDER, 0 },
+ * { I915_ENGINE_CLASS_COPY, 0 } }
+ * };
+ * struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * .base = {
+ * .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * },
+ * .param = {
+ * .param = I915_CONTEXT_PARAM_ENGINES,
+ * .value = to_user_pointer(&engines),
+ * .size = sizeof(engines),
+ * },
+ * };
+ * struct drm_i915_gem_context_create_ext create = {
+ * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * .extensions = to_user_pointer(&p_engines);
+ * };
+ *
+ * ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * // We have now created a GEM context with two engines in the map:
+ * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines
+ * // will not be accessible from this context.
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
+ * gem_execbuf(drm_fd, &execbuf);
+ *
+ * ...
+ * execbuf.rsvd1 = ctx_id;
+ * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
+ * gem_execbuf(drm_fd, &execbuf);
+ */
+
struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
@@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam {
struct drm_i915_gem_context_param param;
};
-struct drm_i915_gem_context_create_ext_clone {
+/* This API has been removed. On the off chance someone somewhere has
+ * attempted to use it, never re-use this extension number.
+ */
#define I915_CONTEXT_CREATE_EXT_CLONE 1
- struct i915_user_extension base;
- __u32 clone_id;
- __u32 flags;
-#define I915_CONTEXT_CLONE_ENGINES (1u << 0)
-#define I915_CONTEXT_CLONE_FLAGS (1u << 1)
-#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2)
-#define I915_CONTEXT_CLONE_SSEU (1u << 3)
-#define I915_CONTEXT_CLONE_TIMELINE (1u << 4)
-#define I915_CONTEXT_CLONE_VM (1u << 5)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
- __u64 rsvd;
-};
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
@@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats {
__u32 pad;
};
+/**
+ * struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
+ *
+ * Userptr objects have several restrictions on what ioctls can be used with the
+ * object handle.
+ */
struct drm_i915_gem_userptr {
+ /**
+ * @user_ptr: The pointer to the allocated memory.
+ *
+ * Needs to be aligned to PAGE_SIZE.
+ */
__u64 user_ptr;
+
+ /**
+ * @user_size:
+ *
+ * The size in bytes for the allocated memory. This will also become the
+ * object size.
+ *
+ * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
+ * or larger.
+ */
__u64 user_size;
+
+ /**
+ * @flags:
+ *
+ * Supported flags:
+ *
+ * I915_USERPTR_READ_ONLY:
+ *
+ * Mark the object as readonly, this also means GPU access can only be
+ * readonly. This is only supported on HW which supports readonly access
+ * through the GTT. If the HW can't support readonly access, an error is
+ * returned.
+ *
+ * I915_USERPTR_PROBE:
+ *
+ * Probe the provided @user_ptr range and validate that the @user_ptr is
+ * indeed pointing to normal memory and that the range is also vali