diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-11 10:54:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-11 10:54:24 -0700 |
commit | b1412bd75abe8b1c57ecca4a85f92c8ddb4ccd39 (patch) | |
tree | ebfa509236729b322b56b56e84003527581ef5f0 | |
parent | de5540965853e514a85d3b775e9049deb85a2ff3 (diff) | |
parent | eb7261f14e1a86f0fd299a2ec408990d349ce3d1 (diff) | |
download | linux-b1412bd75abe8b1c57ecca4a85f92c8ddb4ccd39.tar.gz linux-b1412bd75abe8b1c57ecca4a85f92c8ddb4ccd39.tar.bz2 linux-b1412bd75abe8b1c57ecca4a85f92c8ddb4ccd39.zip |
Merge tag 'perf-tools-for-v5.14-2021-07-10' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tool updates from Arnaldo Carvalho de Melo:
"New features:
- Enable use of BPF counters with 'perf stat --for-each-cgroup',
using per-CPU 'cgroup-switch' events with an attached BPF program
that does aggregation per-cgroup in the kernel instead of using
per-cgroup perf events.
- Add Topdown metrics L2 events as default events in 'perf stat' for
systems having those events.
Hardware tracing:
- Add a config for max loops without consuming a packet in the Intel
PT packet decoder, set via 'perf config intel-pt.max-loops=N'
Hardware enablement:
- Disable misleading NMI watchdog message in 'perf stat' on hybrid
systems such as Intel Alder Lake.
- Add a dummy event on hybrid systems to collect metadata records.
- Add 24x7 nest metric events for the Power10 platform.
Fixes:
- Fix event parsing for PMUs starting with the same prefix.
- Fix the 'perf trace' 'trace' alias installation dir.
- Fix buffer size to report iregs in perf script python scripts,
supporting the extended registers in PowerPC.
- Fix overflow in elf_sec__is_text().
- Fix 's' on source line when disasm is empty in the annotation TUI,
accessible via 'perf annotate', 'perf report' and 'perf top'.
- Plug leaks in scandir() returned dirent entries in 'perf test' when
sorting the shell tests.
- Fix --task and --stat with pipe input in 'perf report'.
- Fix 'perf probe' use of debuginfo files by build id.
- If a DSO has both dynsym and symtab ELF sections, read from both
when loading the symbol table, fixing a problem processing Fedora
32 glibc DSOs.
Libraries:
- Add grouping of events to libperf, from code in tools/perf,
allowing libperf users to use that mode.
Misc:
- Filter plt stubs from the 'perf probe --functions' output.
- Update UAPI header copies for asound, DRM, mman-common.h and the
ones affected by the quotactl_fd syscall"
* tag 'perf-tools-for-v5.14-2021-07-10' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (29 commits)
perf test: Add free() calls for scandir() returned dirent entries
libperf: Add tests for perf_evlist__set_leader()
libperf: Remove BUG_ON() from library code in get_group_fd()
libperf: Add group support to perf_evsel__open()
perf tools: Fix pattern matching for same substring in different PMU type
perf record: Add a dummy event on hybrid systems to collect metadata records
perf stat: Add Topdown metrics L2 events as default events
libperf: Adopt evlist__set_leader() from tools/perf as perf_evlist__set_leader()
libperf: Move 'nr_groups' from tools/perf to evlist::nr_groups
libperf: Move 'leader' from tools/perf to perf_evsel::leader
libperf: Move 'idx' from tools/perf to perf_evsel::idx
libperf: Change tests to single static and shared binaries
perf intel-pt: Add a config for max loops without consuming a packet
perf stat: Disable the NMI watchdog message on hybrid
perf vendor events power10: Adds 24x7 nest metric events for power10 platform
perf script python: Fix buffer size to report iregs in perf script
perf trace: Fix the perf trace link location
perf top: Fix overflow in elf_sec__is_text()
perf annotate: Fix 's' on source line when disasm is empty
perf probe: Do not show @plt function by default
...
77 files changed, 1872 insertions, 331 deletions
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f94f65d429be..1567a3294c3d 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d2a942086fcb..f211961ce1da 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,7 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -/* 443 is reserved for quotactl_path */ +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 67b94bc3c885..d043752a74cf 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -777,9 +777,12 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_STEREO_3D * - * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * If set to 1, the DRM core will expose the stereo 3D capabilities of the * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. */ #define DRM_CLIENT_CAP_STEREO_3D 1 @@ -788,6 +791,9 @@ struct drm_get_cap { * * If set to 1, the DRM core will expose all planes (overlay, primary, and * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. */ #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 @@ -797,6 +803,13 @@ struct drm_get_cap { * If set to 1, the DRM core will expose atomic properties to userspace. This * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -804,6 +817,10 @@ struct drm_get_cap { * DRM_CLIENT_CAP_ASPECT_RATIO * * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 @@ -811,8 +828,11 @@ struct drm_get_cap { * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS * * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. Depends on client - * also supporting DRM_CLIENT_CAP_ATOMIC + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index ddc47bbf48b6..c2c7759b7d2e 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -62,8 +62,8 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" -/* - * i915_user_extension: Base class for defining a chain of extensions +/** + * struct i915_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -76,12 +76,58 @@ extern "C" { * increasing complexity, and for large parts of that interface to be * entirely optional. The downside is more pointer chasing; chasing across * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * */ struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ __u32 name; - __u32 flags; /* All undefined bits must be zero. */ - __u32 rsvd[4]; /* Reserved for future use; must be zero. */ + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; }; /* @@ -360,6 +406,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_QUERY 0x39 #define DRM_I915_GEM_VM_CREATE 0x3a #define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -392,6 +439,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) @@ -1054,12 +1102,12 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; -/** +/* * See drm_i915_gem_execbuffer_ext_timeline_fences. */ #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 -/** +/* * This structure describes an array of drm_syncobj and associated points for * timeline variants of drm_syncobj. It is invalid to append this structure to * the execbuf if I915_EXEC_FENCE_ARRAY is set. @@ -1700,7 +1748,7 @@ struct drm_i915_gem_context_param { __u64 value; }; -/** +/* * Context SSEU programming * * It may be necessary for either functional or performance reason to configure @@ -2067,7 +2115,7 @@ struct drm_i915_perf_open_param { __u64 properties_ptr; }; -/** +/* * Enable data capture for a stream that was either opened in a disabled state * via I915_PERF_FLAG_DISABLED or was later disabled via * I915_PERF_IOCTL_DISABLE. @@ -2081,7 +2129,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) -/** +/* * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. @@ -2090,7 +2138,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -/** +/* * Change metrics_set captured by a stream. * * If the stream is bound to a specific context, the configuration change @@ -2103,7 +2151,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) -/** +/* * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -2151,7 +2199,7 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; -/** +/* * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { @@ -2172,53 +2220,95 @@ struct drm_i915_perf_oa_config { __u64 flex_regs_ptr; }; +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ struct drm_i915_query_item { + /** @query_id: The id for this query */ __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 #define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 /* Must be kept compact -- no holes and well documented */ - /* + /** + * @length: + * * When set to zero by userspace, this is filled with the size of the - * data to be written at the data_ptr pointer. The kernel sets this + * data to be written at the @data_ptr pointer. The kernel sets this * value to a negative value to signal an error on a particular query * item. */ __s32 length; - /* + /** + * @flags: + * * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. * * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the - * following : - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * following: + * + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - /* - * Data will be written at the location pointed by data_ptr when the - * value of length matches the length of the data to be written by the + /** + * @data_ptr: + * + * Data will be written at the location pointed by @data_ptr when the + * value of @length matches the length of the data to be written by the * kernel. */ __u64 data_ptr; }; +/** + * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the + * kernel to fill out. + * + * Note that this is generally a two step process for each struct + * drm_i915_query_item in the array: + * + * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct + * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The + * kernel will then fill in the size, in bytes, which tells userspace how + * memory it needs to allocate for the blob(say for an array of properties). + * + * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the + * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that + * the &drm_i915_query_item.length should still be the same as what the + * kernel previously set. At this point the kernel can fill in the blob. + * + * Note that for some query items it can make sense for userspace to just pass + * in a buffer/blob equal to or larger than the required size. In this case only + * a single ioctl call is needed. For some smaller query items this can work + * quite well. + * + */ struct drm_i915_query { + /** @num_items: The number of elements in the @items_ptr array */ __u32 num_items; - /* - * Unused for now. Must be cleared to zero. + /** + * @flags: Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * This points to an array of num_items drm_i915_query_item structures. + /** + * @items_ptr: + * + * Pointer to an array of struct drm_i915_query_item. The number of + * array elements is @num_items. */ __u64 items_ptr; }; @@ -2292,21 +2382,21 @@ struct drm_i915_query_topology_info { * Describes one engine and it's capabilities as known to the driver. */ struct drm_i915_engine_info { - /** Engine class and instance. */ + /** @engine: Engine class and instance. */ struct i915_engine_class_instance engine; - /** Reserved field. */ + /** @rsvd0: Reserved field. */ __u32 rsvd0; - /** Engine flags. */ + /** @flags: Engine flags. */ __u64 flags; - /** Capabilities of this engine. */ + /** @capabilities: Capabilities of this eng |