From 571d91dcadfa3cef499010b4eddb9b58b0da4d24 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:19 -0700 Subject: perf: Add branch stack counters Currently, the additional information of a branch entry is stored in a u64 space. With more and more information added, the space is running out. For example, the information of occurrences of events will be added for each branch. Two places were suggested to append the counters. https://lore.kernel.org/lkml/20230802215814.GH231007@hirez.programming.kicks-ass.net/ One place is right after the flags of each branch entry. It changes the existing struct perf_branch_entry. The later ARCH specific implementation has to be really careful to consistently pick the right struct. The other place is right after the entire struct perf_branch_stack. The disadvantage is that the pointer of the extra space has to be recorded. The common interface perf_sample_save_brstack() has to be updated. The latter is much straightforward, and should be easily understood and maintained. It is implemented in the patch. Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate the event which is recorded in the branch info. The "u64 counters" may store the occurrences of several events. The information regarding the number of events/counters and the width of each counter should be exposed via sysfs as a reference for the perf tool. Define the branch_counter_nr and branch_counter_width ABI here. The support will be implemented later in the Intel-specific patch. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231025201626.3000228-1-kan.liang@linux.intel.com --- include/linux/perf_event.h | 17 ++++++++++++++++- include/uapi/linux/perf_event.h | 10 ++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0367d748fae0..7897ef066027 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1139,6 +1139,10 @@ static inline bool branch_sample_priv(const struct perf_event *event) return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; } +static inline bool branch_sample_counters(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; +} struct perf_sample_data { /* @@ -1173,6 +1177,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + u64 *br_stack_cntr; union perf_sample_weight weight; union perf_mem_data_src data_src; u64 txn; @@ -1250,7 +1255,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, - struct perf_branch_stack *brs) + struct perf_branch_stack *brs, + u64 *brs_cntr) { int size = sizeof(u64); /* nr */ @@ -1258,7 +1264,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); + /* + * The extension space for counters is appended after the + * struct perf_branch_stack. It is used to store the occurrences + * of events of each branch. + */ + if (brs_cntr) + size += brs->nr * sizeof(u64); + data->br_stack = brs; + data->br_stack_cntr = brs_cntr; data->dyn_size += size; data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 39c6a250dd1b..4461f380425b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi -- cgit v1.2.3 From 1f2376cd03dd3b965d130ed46a7c92769d614ba1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:21 -0700 Subject: perf: Add branch_sample_call_stack Add a helper function to check call stack sample type. The later patch will invoke the function in several places. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231025201626.3000228-3-kan.liang@linux.intel.com --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7897ef066027..ac1a59c1f252 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1144,6 +1144,11 @@ static inline bool branch_sample_counters(const struct perf_event *event) return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; } +static inline bool branch_sample_call_stack(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} + struct perf_sample_data { /* * Fields set by perf_sample_data_init() unconditionally, -- cgit v1.2.3 From 33744916196b4ed7a50f6f47af7c3ad46b730ce6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:23 -0700 Subject: perf/x86/intel: Support branch counters logging The branch counters logging (A.K.A LBR event logging) introduces a per-counter indication of precise event occurrences in LBRs. It can provide a means to attribute exposed retirement latency to combinations of events across a block of instructions. It also provides a means of attributing Timed LBR latencies to events. The feature is first introduced on SRF/GRR. It is an enhancement of the ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences of events on the GP counters. The information is displayed by the order of counters. The design proposed in this patch requires that the events which are logged must be in a group with the event that has LBR. If there are more than one LBR group, the counters logging information only from the current group (overflowed) are stored for the perf tool, otherwise the perf tool cannot know which and when other groups are scheduled especially when multiplexing is triggered. The user can ensure it uses the maximum number of counters that support LBR info (4 by now) by making the group large enough. The HW only logs events by the order of counters. The order may be different from the order of enabling which the perf tool can understand. When parsing the information of each branch entry, convert the counter order to the enabled order, and store the enabled order in the extension space. Unconditionally reset LBRs for an LBR event group when it's deleted. The logged counter information is only valid for the current LBR group. If another LBR group is scheduled later, the information from the stale LBRs would be otherwise wrongly interpreted. Add a sanity check in intel_pmu_hw_config(). Disable the feature if other counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode is enabled. (For the LBR call stack mode, we cannot simply flush the LBR, since it will break the call stack. Also, there is no obvious usage with the call stack mode for now.) Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't require any branch stack setup. Expose the maximum number of supported counters and the width of the counters into the sysfs. The perf tool can use the information to parse the logged counters in each branch. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231025201626.3000228-5-kan.liang@linux.intel.com --- include/uapi/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4461f380425b..3a64499b0f5d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1437,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From d23b5c577715892c87533b13923306acc6243f93 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 29 Oct 2023 06:14:29 +0000 Subject: cgroup: Make operations on the cgroup root_list RCU safe At present, when we perform operations on the cgroup root_list, we must hold the cgroup_mutex, which is a relatively heavyweight lock. In reality, we can make operations on this list RCU-safe, eliminating the need to hold the cgroup_mutex during traversal. Modifications to the list only occur in the cgroup root setup and destroy paths, which should be infrequent in a production environment. In contrast, traversal may occur frequently. Therefore, making it RCU-safe would be beneficial. Signed-off-by: Yafang Shao Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4a6b6b77ccb6..4caab0c6b361 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -563,6 +563,7 @@ struct cgroup_root { /* A list running through the active hierarchies */ struct list_head root_list; + struct rcu_head rcu; /* Hierarchy-specific flags */ unsigned int flags; -- cgit v1.2.3 From aecd408b7e50742868b3305c24325a89024e2a30 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 29 Oct 2023 06:14:32 +0000 Subject: cgroup: Add a new helper for cgroup1 hierarchy A new helper is added for cgroup1 hierarchy: - task_get_cgroup1 Acquires the associated cgroup of a task within a specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its hierarchy ID. This helper function is added to facilitate the tracing of tasks within a particular container or cgroup dir in BPF programs. It's important to note that this helper is designed specifically for cgroup1 only. tj: Use irsqsave/restore as suggested by Hou Tao . Suggested-by: Tejun Heo Signed-off-by: Yafang Shao Cc: Hou Tao Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0ef0af66080e..34aaf0e87def 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -69,6 +69,7 @@ struct css_task_iter { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern spinlock_t css_set_lock; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include @@ -386,7 +387,6 @@ static inline void cgroup_unlock(void) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ rcu_read_lock_sched_held() || \ @@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ +struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); + #endif /* _LINUX_CGROUP_H */ -- cgit v1.2.3 From 74523c06ae20b83c5508a98af62393ac34913362 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 6 Nov 2023 20:57:23 -0800 Subject: bpf: Add __bpf_dynptr_data* for in kernel use Different types of bpf dynptr have different internal data storage. Specifically, SKB and XDP type of dynptr may have non-continuous data. Therefore, it is not always safe to directly access dynptr->data. Add __bpf_dynptr_data and __bpf_dynptr_data_rw to replace direct access to dynptr->data. Update bpf_verify_pkcs7_signature to use __bpf_dynptr_data instead of dynptr->data. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/bpf/20231107045725.2278852-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4825d3cdb29..eb84caf133df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1222,6 +1222,8 @@ enum bpf_dynptr_type { int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -- cgit v1.2.3 From 790ce3cfefb1b768dccd4eee324ddef0f0ce3db4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:37 -0800 Subject: bpf: Move GRAPH_{ROOT,NODE}_MASK macros into btf_field_type enum This refactoring patch removes the unused BPF_GRAPH_NODE_OR_ROOT btf_field_type and moves BPF_GRAPH_{NODE,ROOT} macros into the btf_field_type enum. Further patches in the series will use BPF_GRAPH_NODE, so let's move this useful definition out of btf.c. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-5-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eb84caf133df..4001d11be151 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,8 +186,8 @@ enum btf_field_type { BPF_LIST_NODE = (1 << 6), BPF_RB_ROOT = (1 << 7), BPF_RB_NODE = (1 << 8), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), }; -- cgit v1.2.3 From 155addf0814a92d08fce26a11b27e3315cdba977 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 3 Nov 2023 19:49:00 -0700 Subject: bpf: Use named fields for certain bpf uapi structs Martin and Vadim reported a verifier failure with bpf_dynptr usage. The issue is mentioned but Vadim workarounded the issue with source change ([1]). The below describes what is the issue and why there is a verification failure. int BPF_PROG(skb_crypto_setup) { struct bpf_dynptr algo, key; ... bpf_dynptr_from_mem(..., ..., 0, &algo); ... } The bpf program is using vmlinux.h, so we have the following definition in vmlinux.h: struct bpf_dynptr { long: 64; long: 64; }; Note that in uapi header bpf.h, we have struct bpf_dynptr { long: 64; long: 64; } __attribute__((aligned(8))); So we lost alignment information for struct bpf_dynptr by using vmlinux.h. Let us take a look at a simple program below: $ cat align.c typedef unsigned long long __u64; struct bpf_dynptr_no_align { __u64 :64; __u64 :64; }; struct bpf_dynptr_yes_align { __u64 :64; __u64 :64; } __attribute__((aligned(8))); void bar(void *, void *); int foo() { struct bpf_dynptr_no_align a; struct bpf_dynptr_yes_align b; bar(&a, &b); return 0; } $ clang --target=bpf -O2 -S -emit-llvm align.c Look at the generated IR file align.ll: ... %a = alloca %struct.bpf_dynptr_no_align, align 1 %b = alloca %struct.bpf_dynptr_yes_align, align 8 ... The compiler dictates the alignment for struct bpf_dynptr_no_align is 1 and the alignment for struct bpf_dynptr_yes_align is 8. So theoretically compiler could allocate variable %a with alignment 1 although in reallity the compiler may choose a different alignment by considering other local variables. In [1], the verification failure happens because variable 'algo' is allocated on the stack with alignment 4 (fp-28). But the verifer wants its alignment to be 8. To fix the issue, the RFC patch ([1]) tried to add '__attribute__((aligned(8)))' to struct bpf_dynptr plus other similar structs. Andrii suggested that we could directly modify uapi struct with named fields like struct 'bpf_iter_num': struct bpf_iter_num { /* opaque iterator state; having __u64 here allows to preserve correct * alignment requirements in vmlinux.h, generated from BTF */ __u64 __opaque[1]; } __attribute__((aligned(8))); Indeed, adding named fields for those affected structs in this patch can preserve alignment when bpf program references them in vmlinux.h. With this patch, the verification failure in [1] can also be resolved. [1] https://lore.kernel.org/bpf/1b100f73-7625-4c1f-3ae5-50ecf84d3ff0@linux.dev/ [2] https://lore.kernel.org/bpf/20231103055218.2395034-1-yonghong.song@linux.dev/ Cc: Vadim Fedorenko Cc: Martin KaFai Lau Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231104024900.1539182-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f6cdf52b1da..095ca7238ac2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { -- cgit v1.2.3 From 689b097a06bafb461ec162fc3b3ecc9765cea67b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 6 Nov 2023 03:18:02 +0000 Subject: compiler-gcc: Suppress -Wmissing-prototypes warning for all supported GCC The kernel supports a minimum GCC version of 5.1.0 for building. However, the "__diag_ignore_all" directive only suppresses the "-Wmissing-prototypes" warning for GCC versions >= 8.0.0. As a result, when building the kernel with older GCC versions, warnings may be triggered. The example below illustrates the warnings reported by the kernel test robot using GCC 7.5.0: compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0 All warnings (new ones prefixed by >>): kernel/bpf/helpers.c:1893:19: warning: no previous prototype for 'bpf_obj_new_impl' [-Wmissing-prototypes] __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) ^~~~~~~~~~~~~~~~ kernel/bpf/helpers.c:1907:19: warning: no previous prototype for 'bpf_percpu_obj_new_impl' [-Wmissing-prototypes] __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) [...] To address this, we should also suppress the "-Wmissing-prototypes" warning for older GCC versions. "#pragma GCC diagnostic push" is supported as of GCC 4.6, and both "-Wmissing-prototypes" and "-Wmissing-declarations" are supported for all the GCC versions that we currently support. Therefore, it is reasonable to suppress these warnings for all supported GCC versions. With this adjustment, it's important to note that after implementing "__diag_ignore_all", it will effectively suppress warnings for all the supported GCC versions. In the future, if you wish to suppress warnings that are only supported on higher GCC versions, it is advisable to explicitly use "__diag_ignore" to specify the GCC version you are targeting. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311031651.A7crZEur-lkp@intel.com/ Suggested-by: Arnd Bergmann Signed-off-by: Yafang Shao Cc: Kumar Kartikeya Dwivedi Cc: Arnd Bergmann Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231106031802.4188-1-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2ceba3fe4ec1..aebb65bf95a7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -136,7 +136,7 @@ #endif #define __diag_ignore_all(option, comment) \ - __diag_GCC(8, ignore, option) + __diag(__diag_GCC_ignore option) /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" -- cgit v1.2.3 From b8e3a87a627b575896e448021e5c2f8a3bc19931 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Wed, 8 Nov 2023 03:23:34 -0800 Subject: bpf: Add crosstask check to __bpf_get_stack Currently get_perf_callchain only supports user stack walking for the current task. Passing the correct *crosstask* param will return 0 frames if the task passed to __bpf_get_stack isn't the current one instead of a single incorrect frame/address. This change passes the correct *crosstask* param but also does a preemptive check in __bpf_get_stack if the task is current and returns -EOPNOTSUPP if it is not. This issue was found using bpf_get_task_stack inside a BPF iterator ("iter/task"), which iterates over all tasks. bpf_get_task_stack works fine for fetching kernel stacks but because get_perf_callchain relies on the caller to know if the requested *task* is the current one (via *crosstask*) it was failing in a confusing way. It might be possible to get user stacks for all tasks utilizing something like access_process_vm but that requires the bpf program calling bpf_get_task_stack to be sleepable and would therefore be a breaking change. Fixes: fa28dcb82a38 ("bpf: Introduce helper bpf_get_task_stack()") Signed-off-by: Jordan Rome Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231108112334.3433136-1-jordalgo@meta.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 095ca7238ac2..7cf8bcf9f6a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4517,6 +4517,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4530,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. -- cgit v1.2.3 From fe28f631fa941fba583d1c4f25895284b90af671 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 25 Oct 2023 14:25:52 -0400 Subject: workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask When the "isolcpus" boot command line option is used to add a set of isolated CPUs, those CPUs will be excluded automatically from wq_unbound_cpumask to avoid running work functions from unbound workqueues. Recently cpuset has been extended to allow the creation of partitions of isolated CPUs dynamically. To make it closer to the "isolcpus" in functionality, the CPUs in those isolated cpuset partitions should be excluded from wq_unbound_cpumask as well. This can be done currently by explicitly writing to the workqueue's cpumask sysfs file after creating the isolated partitions. However, this process can be error prone. Ideally, the cpuset code should be allowed to request the workqueue code to exclude those isolated CPUs from wq_unbound_cpumask so that this operation can be done automatically and the isolated CPUs will be returned back to wq_unbound_cpumask after the destructions of the isolated cpuset partitions. This patch adds a new workqueue_unbound_exclude_cpumask() function to enable that. This new function will exclude the specified isolated CPUs from wq_unbound_cpumask. To be able to restore those isolated CPUs back after the destruction of isolated cpuset partitions, a new wq_requested_unbound_cpumask is added to store the user provided unbound cpumask either from the boot command line options or from writing to the cpumask sysfs file. This new cpumask provides the basis for CPU exclusion. To enable users to understand how the wq_unbound_cpumask is being modified internally, this patch also exposes the newly introduced wq_requested_unbound_cpumask as well as a wq_isolated_cpumask to store the cpumask to be excluded from wq_unbound_cpumask as read-only sysfs files. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 24b1e5070f4d..b0b9604b76b8 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -491,7 +491,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); +extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); -- cgit v1.2.3 From e76d28bdf9ba5388b8c4835a5199dc427b603188 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 3 Nov 2023 23:13:01 -0400 Subject: cgroup/rstat: Reduce cpu_lock hold time in cgroup_rstat_flush_locked() When cgroup_rstat_updated() isn't being called concurrently with cgroup_rstat_flush_locked(), its run time is pretty short. When both are called concurrently, the cgroup_rstat_updated() run time can spike to a pretty high value due to high cpu_lock hold time in cgroup_rstat_flush_locked(). This can be problematic if the task calling cgroup_rstat_updated() is a realtime task running on an isolated CPU with a strict latency requirement. The cgroup_rstat_updated() call can happen when there is a page fault even though the task is running in user space most of the time. The percpu cpu_lock is used to protect the update tree - updated_next and updated_children. This protection is only needed when cgroup_rstat_cpu_pop_updated() is being called. The subsequent flushing operation which can take a much longer time does not need that protection as it is already protected by cgroup_rstat_lock. To reduce the cpu_lock hold time, we need to perform all the cgroup_rstat_cpu_pop_updated() calls up front with the lock released afterward before doing any flushing. This patch adds a new cgroup_rstat_updated_list() function to return a singly linked list of cgroups to be flushed. Some instrumentation code are added to measure the cpu_lock hold time right after lock acquisition to after releasing the lock. Parallel kernel build on a 2-socket x86-64 server is used as the benchmarking tool for measuring the lock hold time. The maximum cpu_lock hold time before and after the patch are 100us and 29us respectively. So the worst case time is reduced to about 30% of the original. However, there may be some OS or hardware noises like NMI or SMI in the test system that can worsen the worst case value. Those noises are usually tuned out in a real production environment to get a better result. OTOH, the lock hold time frequency distribution should give a better idea of the performance benefit of the patch. Below were the frequency distribution before and after the patch: Hold time Before patch After patch --------- ------------ ----------- 0-01 us 804,139 13,738,708 01-05 us 9,772,767 1,177,194 05-10 us 4,595,028 4,984 10-15 us 303,481 3,562 15-20 us 78,971 1,314 20-25 us 24,583 18 25-30 us 6,908 12 30-40 us 8,015 40-50 us 2,192 50-60 us 316 60-70 us 43 70-80 us 7 80-90 us 2 >90 us 3 Signed-off-by: Waiman Long Reviewed-by: Yosry Ahmed Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4caab0c6b361..37518436cfe7 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -496,6 +496,13 @@ struct cgroup { struct cgroup_rstat_cpu __percpu *rstat_cpu; struct list_head rstat_css_list; + /* + * A singly-linked list of cgroup structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + */ + struct cgroup *rstat_flush_next; + /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; -- cgit v1.2.3 From 89ef42088b3ba884a007ad10bd89ce8a81b9dedd Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Thu, 9 Nov 2023 15:59:00 +0200 Subject: ASoC: SOF: Add support for configuring PDM interface from topology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we only support configuration for number of channels and sample rate. Reviewed-by: Péter Ujfalusi Reviewed-by: Iuliana Prodan Signed-off-by: Daniel Baluta Link: https://lore.kernel.org/r/20231109135900.88310-3-daniel.baluta@oss.nxp.com Signed-off-by: Mark Brown --- include/sound/sof/dai-imx.h | 7 +++++++ include/sound/sof/dai.h | 2 ++ include/uapi/sound/sof/tokens.h | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/sound/sof/dai-imx.h b/include/sound/sof/dai-imx.h index ca8325353d41..6bc987bd4761 100644 --- a/include/sound/sof/dai-imx.h +++ b/include/sound/sof/dai-imx.h @@ -51,4 +51,11 @@ struct sof_ipc_dai_sai_params { uint16_t tdm_slot_width; uint16_t reserved2; /* alignment */ } __packed; + +/* MICFIL Configuration Request - SOF_IPC_DAI_MICFIL_CONFIG */ +struct sof_ipc_dai_micfil_params { + uint32_t pdm_rate; + uint32_t pdm_ch; +} __packed; + #endif diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 3041f5805b7b..4773a5f616a4 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -88,6 +88,7 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_HS, /**< Amd HS */ SOF_DAI_AMD_SP_VIRTUAL, /**< AMD ACP SP VIRTUAL */ SOF_DAI_AMD_HS_VIRTUAL, /**< AMD ACP HS VIRTUAL */ + SOF_DAI_IMX_MICFIL, /** < i.MX MICFIL PDM */ }; /* general purpose DAI configuration */ @@ -117,6 +118,7 @@ struct sof_ipc_dai_config { struct sof_ipc_dai_acpdmic_params acpdmic; struct sof_ipc_dai_acp_params acphs; struct sof_ipc_dai_mtk_afe_params afe; + struct sof_ipc_dai_micfil_params micfil; }; } __packed; diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 453cab2a1209..0fb39780f9bd 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -213,4 +213,8 @@ #define SOF_TKN_AMD_ACPI2S_CH 1701 #define SOF_TKN_AMD_ACPI2S_TDM_MODE 1702 +/* MICFIL PDM */ +#define SOF_TKN_IMX_MICFIL_RATE 2000 +#define SOF_TKN_IMX_MICFIL_CH 2001 + #endif -- cgit v1.2.3 From 8156c7dd47b92fc4a70c9ea58e7a9e88c8bc32be Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 26 Oct 2023 16:48:21 +0200 Subject: regulator: Introduce handling for system-critical under-voltage events Handle under-voltage events for crucial regulators to maintain system stability and avoid issues during power drops. Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20231026144824.4065145-3-o.rempel@pengutronix.de Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 621b7f4a3639..e0ddfb5593c9 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -49,6 +49,13 @@ struct regulator; #define DISABLE_IN_SUSPEND 1 #define ENABLE_IN_SUSPEND 2 +/* + * Default time window (in milliseconds) following a critical under-voltage + * event during which less critical actions can be safely carried out by the + * system. + */ +#define REGULATOR_DEF_UV_LESS_CRITICAL_WINDOW_MS 10 + /* Regulator active discharge flags */ enum regulator_active_discharge { REGULATOR_ACTIVE_DISCHARGE_DEFAULT, @@ -127,6 +134,8 @@ struct notification_limit { * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. + * @system_critical: Set if the regulator is critical to system stability or + * functionality. * @over_current_protection: Auto disable on over current event. * * @over_current_detection: Configure over current limits. @@ -214,6 +223,7 @@ struct regulation_constraints { unsigned ramp_disable:1; /* disable ramp delay */ unsigned soft_start:1; /* ramp voltage slowly */ unsigned pull_down:1; /* pull down resistor when regulator off */ + unsigned system_critical:1; /* critical to system stability */ unsigned over_current_protection:1; /* auto disable on over current */ unsigned over_current_detection:1; /* notify on over current */ unsigned over_voltage_detection:1; /* notify on over voltage */ -- cgit v1.2.3 From 1e22152aa59d793743fc53051dd7a042f362aecb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 26 Oct 2023 16:48:24 +0200 Subject: regulator: Implement uv_survival_time for handling under-voltage events Add 'uv_survival_time' field to regulation_constraints for specifying survival time post critical under-voltage event. Update the regulator notifier call chain and Device Tree property parsing to use this new field, allowing a configurable timeout before emergency shutdown. Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20231026144824.4065145-6-o.rempel@pengutronix.de Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index e0ddfb5593c9..0cd76d264727 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -162,6 +162,13 @@ struct notification_limit { * regulator_active_discharge values are used for * initialisation. * @enable_time: Turn-on time of the rails (unit: microseconds) + * @uv_less_critical_window_ms: Specifies the time window (in milliseconds) + * following a critical under-voltage (UV) event + * during which less critical actions can be + * safely carried out by the system (for example + * logging). After this time window more critical + * actions should be done (for example prevent + * HW damage). */ struct regulation_constraints { @@ -213,6 +220,7 @@ struct regulation_constraints { unsigned int settling_time_up; unsigned int settling_time_down; unsigned int enable_time; + unsigned int uv_less_critical_window_ms; unsigned int active_discharge; -- cgit v1.2.3 From f3b8788cde61b02f1e6c202f8fac4360e6adbafc Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:46 -0700 Subject: LSM: Identify modules by more than name Create a struct lsm_id to contain identifying information about Linux Security Modules (LSMs). At inception this contains the name of the module and an identifier associated with the security module. Change the security_add_hooks() interface to use this structure. Change the individual modules to maintain their own struct lsm_id and pass it to security_add_hooks(). The values are for LSM identifiers are defined in a new UAPI header file linux/lsm.h. Each existing LSM has been updated to include it's LSMID in the lsm_id. The LSM ID values are sequential, with the oldest module LSM_ID_CAPABILITY being the lowest value and the existing modules numbered in the order they were included in the main line kernel. This is an arbitrary convention for assigning the values, but none better presents itself. The value 0 is defined as being invalid. The values 1-99 are reserved for any special case uses which may arise in the future. This may include attributes of the LSM infrastructure itself, possibly related to namespacing or network attribute management. A special range is identified for such attributes to help reduce confusion for developers unfamiliar with LSMs. LSM attribute values are defined for the attributes presented by modules that are available today. As with the LSM IDs, The value 0 is defined as being invalid. The values 1-99 are reserved for any special case uses which may arise in the future. Cc: linux-security-module Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: Mickael Salaun Reviewed-by: John Johansen Signed-off-by: Kees Cook Nacked-by: Tetsuo Handa [PM: forward ported beyond v6.6 due merge window changes] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 16 ++++++++++++-- include/uapi/linux/lsm.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 include/uapi/linux/lsm.h (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index dcb5e5b5eb13..7f0adb33caaa 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -42,6 +42,18 @@ struct security_hook_heads { #undef LSM_HOOK } __randomize_layout; +/** + * struct lsm_id - Identify a Linux Security Module. + * @lsm: name of the LSM, must be approved by the LSM maintainers + * @id: LSM ID number from uapi/linux/lsm.h + * + * Contains the information that identifies the LSM. + */ +struct lsm_id { + const char *name; + u64 id; +}; + /* * Security module hook list structure. * For use with generic list macros for common operations. @@ -50,7 +62,7 @@ struct security_hook_list { struct hlist_node list; struct hlist_head *head; union security_list_options hook; - const char *lsm; + const struct lsm_id *lsmid; } __randomize_layout; /* @@ -104,7 +116,7 @@ extern struct security_hook_heads security_hook_heads; extern char *lsm_names; extern void security_add_hooks(struct security_hook_list *hooks, int count, - const char *lsm); + const struct lsm_id *lsmid); #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h new file mode 100644 index 000000000000..f27c9a9cc376 --- /dev/null +++ b/include/uapi/linux/lsm.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Linux Security Modules (LSM) - User space API + * + * Copyright (C) 2022 Casey Schaufler + * Copyright (C) 2022 Intel Corporation + */ + +#ifndef _UAPI_LINUX_LSM_H +#define _UAPI_LINUX_LSM_H + +/* + * ID tokens to identify Linux Security Modules (LSMs) + * + * These token values are used to uniquely identify specific LSMs + * in the kernel as well as in the kernel's LSM userspace API. + * + * A value of zero/0 is considered undefined and should not be used + * outside the kernel. Values 1-99 are reserved for potential + * future use. + */ +#define LSM_ID_UNDEF 0 +#define LSM_ID_CAPABILITY 100 +#define LSM_ID_SELINUX 101 +#define LSM_ID_SMACK 102 +#define LSM_ID_TOMOYO 103 +#define LSM_ID_IMA 104 +#define LSM_ID_APPARMOR 105 +#define LSM_ID_YAMA 106 +#define LSM_ID_LOADPIN 107 +#define LSM_ID_SAFESETID 108 +#define LSM_ID_LOCKDOWN 109 +#define LSM_ID_BPF 110 +#define LSM_ID_LANDLOCK 111 + +/* + * LSM_ATTR_XXX definitions identify different LSM attributes + * which are used in the kernel's LSM userspace API. Support + * for these attributes vary across the different LSMs. None + * are required. + * + * A value of zero/0 is considered undefined and should not be used + * outside the kernel. Values 1-99 are reserved for potential + * future use. + */ +#define LSM_ATTR_UNDEF 0 +#define LSM_ATTR_CURRENT 100 +#define LSM_ATTR_EXEC 101 +#define LSM_ATTR_FSCREATE 102 +#define LSM_ATTR_KEYCREATE 103 +#define LSM_ATTR_PREV 104 +#define LSM_ATTR_SOCKCREATE 105 + +#endif /* _UAPI_LINUX_LSM_H */ -- cgit v1.2.3 From 9285c5ad9d00abfe0f4e2ce4039c8127e7a09738 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:47 -0700 Subject: LSM: Maintain a table of LSM attribute data As LSMs are registered add their lsm_id pointers to a table. This will be used later for attribute reporting. Determine the number of possible security modules based on their respective CONFIG options. This allows the number to be known at build time. This allows data structures and tables to use the constant. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: Mickael Salaun Reviewed-by: John Johansen Signed-off-by: Paul Moore --- include/linux/security.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 1d1df326c881..50c178019a58 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -138,6 +138,8 @@ enum lockdown_reason { }; extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; +extern u32 lsm_active_cnt; +extern const struct lsm_id *lsm_idlist[]; /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, -- cgit v1.2.3 From 267c068e5f8b81b68cc4247c94dbba90a21a634e Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:48 -0700 Subject: proc: Use lsmids instead of lsm names for attrs Use the LSM ID number instead of the LSM name to identify which security module's attibute data should be shown in /proc/self/attr. The security_[gs]etprocattr() functions have been changed to expect the LSM ID. The change from a string comparison to an integer comparison in these functions will provide a minor performance improvement. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: Mickael Salaun Reviewed-by: John Johansen Signed-off-by: Paul Moore --- include/linux/security.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 50c178019a58..c81bca77f4f2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -472,10 +472,9 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, const char *lsm, const char *name, +int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); -int security_setprocattr(const char *lsm, const char *name, void *value, - size_t size); +int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -1339,14 +1338,14 @@ static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) { } -static inline int security_getprocattr(struct task_struct *p, const char *lsm, +static inline int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value) { return -EINVAL; } -static inline int security_setprocattr(const char *lsm, char *name, - void *value, size_t size) +static inline int security_setprocattr(int lsmid, char *name, void *value, + size_t size) { return -EINVAL; } -- cgit v1.2.3 From a04a1198088a1378d0389c250cc684f649bcc91e Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:49 -0700 Subject: LSM: syscalls for current process attributes Create a system call lsm_get_self_attr() to provide the security module maintained attributes of the current process. Create a system call lsm_set_self_attr() to set a security module maintained attribute of the current process. Historically these attributes have been exposed to user space via entries in procfs under /proc/self/attr. The attribute value is provided in a lsm_ctx structure. The structure identifies the size of the attribute, and the attribute value. The format of the attribute value is defined by the security module. A flags field is included for LSM specific information. It is currently unused and must be 0. The total size of the data, including the lsm_ctx structure and any padding, is maintained as well. struct lsm_ctx { __u64 id; __u64 flags; __u64 len; __u64 ctx_len; __u8 ctx[]; }; Two new LSM hooks are used to interface with the LSMs. security_getselfattr() collects the lsm_ctx values from the LSMs that support the hook, accounting for space requirements. security_setselfattr() identifies which LSM the attribute is intended for and passes it along. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: John Johansen Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++++ include/linux/lsm_hooks.h | 1 + include/linux/security.h | 19 +++++++++++++++++++ include/linux/syscalls.h | 5 +++++ include/uapi/linux/lsm.h | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ff217a5ce552..c925a0d26edf 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -262,6 +262,10 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) +LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr, + struct lsm_ctx __user *ctx, size_t *size, u32 flags) +LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr, + struct lsm_ctx *ctx, size_t size, u32 flags) LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7f0adb33caaa..a2ade0ffe9e7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -25,6 +25,7 @@ #ifndef __LINUX_LSM_HOOKS_H #define __LINUX_LSM_HOOKS_H +#include #include #include #include diff --git a/include/linux/security.h b/include/linux/security.h index c81bca77f4f2..dd1fe487385d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -60,6 +60,7 @@ struct fs_parameter; enum fs_value_type; struct watch; struct watch_notification; +struct lsm_ctx; /* Default (no) options for the capable function */ #define CAP_OPT_NONE 0x0 @@ -472,6 +473,10 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); +int security_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + size_t __user *size, u32 flags); +int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + size_t size, u32 flags); int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); int security_setprocattr(int lsmid, const char *name, void *value, size_t size); @@ -1338,6 +1343,20 @@ static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) { } +static inline int security_getselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t __user *size, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int security_setselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t size, u32 flags) +{ + return -EOPNOTSUPP; +} + static inline int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index fd9d12de7e92..4e1e56a24f1e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -71,6 +71,7 @@ struct clone_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; +struct lsm_ctx; enum landlock_rule_type; struct cachestat_range; struct cachestat; @@ -949,6 +950,10 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t *size, __u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t size, __u32 flags); /* * Architecture-specific system calls diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index f27c9a9cc376..eeda59a77c02 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -9,6 +9,36 @@ #ifndef _UAPI_LINUX_LSM_H #define _UAPI_LINUX_LSM_H +#include +#include + +/** + * struct lsm_ctx - LSM context information + * @id: the LSM id number, see LSM_ID_XXX + * @flags: LSM specific flags + * @len: length of the lsm_ctx struct, @ctx and any other data or padding + * @ctx_len: the size of @ctx + * @ctx: the LSM context value + * + * The @len field MUST be equal to the size of the lsm_ctx struct + * plus any additional padding and/or data placed after @ctx. + * + * In all cases @ctx_len MUST be equal to the length of @ctx. + * If @ctx is a string value it should be nul terminated with + * @ctx_len equal to `strlen(@ctx) + 1`. Binary values are + * supported. + * + * The @flags and @ctx fields SHOULD only be interpreted by the + * LSM specified by @id; they MUST be set to zero/0 when not used. + */ +struct lsm_ctx { + __u64 id; + __u64 flags; + __u64 len; + __u64 ctx_len; + __u8 ctx[]; +}; + /* * ID tokens to identify Linux Security Modules (LSMs) * @@ -51,4 +81,10 @@ #define LSM_ATTR_PREV 104 #define LSM_ATTR_SOCKCREATE 105 +/* + * LSM_FLAG_XXX definitions identify special handling instructions + * for the API. + */ +#define LSM_FLAG_SINGLE 0x0001 + #endif /* _UAPI_LINUX_LSM_H */ -- cgit v1.2.3 From ad4aff9ec25f400608283c10d634cc4eeda83a02 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:50 -0700 Subject: LSM: Create lsm_list_modules system call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a system call to report the list of Linux Security Modules that are active on the system. The list is provided as an array of LSM ID numbers. The calling application can use this list determine what LSM specific actions it might take. That might include choosing an output format, determining required privilege or bypassing security module specific behavior. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Reviewed-by: Serge Hallyn Reviewed-by: John Johansen Reviewed-by: Mickaël Salaün Signed-off-by: Paul Moore --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4e1e56a24f1e..feec5719750b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -954,6 +954,7 @@ asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, size_t *size, __u32 flags); asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, size_t size, __u32 flags); +asmlinkage long sys_lsm_list_modules(u64 *ids, size_t *size, u32 flags); /* * Architecture-specific system calls -- cgit v1.2.3 From 5f42375904b08890f2e8e7cd955c5bf0c2c0d05a Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:51 -0700 Subject: LSM: wireup Linux Security Module syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wireup lsm_get_self_attr, lsm_set_self_attr and lsm_list_modules system calls. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Acked-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Cc: linux-api@vger.kernel.org Reviewed-by: Mickaël Salaün [PM: forward ported beyond v6.6 due merge window changes] Signed-off-by: Paul Moore --- include/uapi/asm-generic/unistd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 756b013fb832..55cc0bcfb58d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -829,8 +829,15 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_lsm_get_self_attr 457 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 458 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 459 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 460 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From e1ca7129db2c3b3c4d261702905a752e6b2710b4 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:52 -0700 Subject: LSM: Helpers for attribute names and filling lsm_ctx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add lsm_name_to_attr(), which translates a text string to a LSM_ATTR value if one is available. Add lsm_fill_user_ctx(), which fills a struct lsm_ctx, including the trailing attribute value. Both are used in module specific components of LSM system calls. Signed-off-by: Casey Schaufler Reviewed-by: John Johansen Reviewed-by: Serge Hallyn Reviewed-by: Mickaël Salaün Signed-off-by: Paul Moore --- include/linux/security.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index dd1fe487385d..334f75aa7289 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include #include #include +#include struct linux_binprm; struct cred; @@ -264,6 +265,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb); /* prototypes */ extern int security_init(void); extern int early_security_init(void); +extern u64 lsm_name_to_attr(const char *name); /* Security operations */ int security_binder_set_context_mgr(const struct cred *mgr); @@ -490,6 +492,8 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int lsm_fill_user_ctx(struct lsm_ctx __user *ctx, void *context, + size_t context_size, u64 id, u64 flags); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -507,6 +511,11 @@ static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb) return 0; } +static inline u64 lsm_name_to_attr(const char *name) +{ + return LSM_ATTR_UNDEF; +} + static inline void security_free_mnt_opts(void **mnt_opts) { } @@ -1415,6 +1424,11 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } +static inline int lsm_fill_user_ctx(struct lsm_ctx __user *ctx, void *context, + size_t context_size, u64 id, u64 flags) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) -- cgit v1.2.3 From edd71f8e266c7ba15eedfec338864e53ddde1c25 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 18 Oct 2023 17:41:41 -0400 Subject: lsm: drop LSM_ID_IMA When IMA becomes a proper LSM we will reintroduce an appropriate LSM ID, but drop it from the userspace API for now in an effort to put an end to debates around the naming of the LSM ID macro. Reviewed-by: Roberto Sassu Signed-off-by: Paul Moore --- include/uapi/linux/lsm.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/lsm.h b/include/uapi/linux/lsm.h index eeda59a77c02..f0386880a78e 100644 --- a/include/uapi/linux/lsm.h +++ b/include/uapi/linux/lsm.h @@ -54,14 +54,13 @@ struct lsm_ctx { #define LSM_ID_SELINUX 101 #define LSM_ID_SMACK 102 #define LSM_ID_TOMOYO 103 -#define LSM_ID_IMA 104 -#define LSM_ID_APPARMOR 105 -#define LSM_ID_YAMA 106 -#define LSM_ID_LOADPIN 107 -#define LSM_ID_SAFESETID 108 -#define LSM_ID_LOCKDOWN 109 -#define LSM_ID_BPF 110 -#define LSM_ID_LANDLOCK 111 +#define LSM_ID_APPARMOR 104 +#define LSM_ID_YAMA 105 +#define LSM_ID_LOADPIN 106 +#define LSM_ID_SAFESETID 107 +#define LSM_ID_LOCKDOWN 108 +#define LSM_ID_BPF 109 +#define LSM_ID_LANDLOCK 110 /* * LSM_ATTR_XXX definitions identify different LSM attributes -- cgit v1.2.3 From d7cf3412a9f6c547e5ee443fa7644e08898aa3e2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 24 Oct 2023 14:44:00 -0400 Subject: lsm: consolidate buffer size handling into lsm_fill_user_ctx() While we have a lsm_fill_user_ctx() helper function designed to make life easier for LSMs which return lsm_ctx structs to userspace, we didn't include all of the buffer length safety checks and buffer padding adjustments in the helper. This led to code duplication across the different LSMs and the possibility for mistakes across the different LSM subsystems. In order to reduce code duplication and decrease the chances of silly mistakes, we're consolidating all of this code into the lsm_fill_user_ctx() helper. The buffer padding is also modified from a fixed 8-byte alignment to an alignment that matches the word length of the machine (BITS_PER_LONG / 8). Signed-off-by: Paul Moore --- include/linux/security.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 334f75aa7289..750130a7b9dd 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -492,8 +492,8 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); -int lsm_fill_user_ctx(struct lsm_ctx __user *ctx, void *context, - size_t context_size, u64 id, u64 flags); +int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, size_t *uctx_len, + void *val, size_t val_len, u64 id, u64 flags); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -1424,8 +1424,9 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } -static inline int lsm_fill_user_ctx(struct lsm_ctx __user *ctx, void *context, - size_t context_size, u64 id, u64 flags) +static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, + size_t *uctx_len, void *val, size_t val_len, + u64 id, u64 flags) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 48f996d4adf15a0a0af8b8184d3ec6042a684ea4 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Mon, 23 Oct 2023 07:03:23 -0700 Subject: RDMA/bnxt_re: Remove roundup_pow_of_two depth for all hardware