summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.kexec2
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/auditsc.c8
-rw-r--r--kernel/bpf/bpf_inode_storage.c1
-rw-r--r--kernel/bpf/bpf_lsm.c4
-rw-r--r--kernel/bpf/bpf_task_storage.c1
-rw-r--r--kernel/bpf/btf.c15
-rw-r--r--kernel/bpf/cgroup.c19
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/devmap.c11
-rw-r--r--kernel/bpf/helpers.c64
-rw-r--r--kernel/bpf/inode.c5
-rw-r--r--kernel/bpf/log.c3
-rw-r--r--kernel/bpf/lpm_trie.c2
-rw-r--r--kernel/bpf/memalloc.c14
-rw-r--r--kernel/bpf/ringbuf.c14
-rw-r--r--kernel/bpf/syscall.c45
-rw-r--r--kernel/bpf/task_iter.c8
-rw-r--r--kernel/bpf/token.c1
-rw-r--r--kernel/bpf/verifier.c134
-rw-r--r--kernel/cgroup/cgroup.c25
-rw-r--r--kernel/debug/gdbstub.c2
-rw-r--r--kernel/events/core.c67
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c46
-rw-r--r--kernel/freezer.c7
-rw-r--r--kernel/irq/msi.c2
-rw-r--r--kernel/kcmp.c4
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module/dups.c1
-rw-r--r--kernel/module/kmod.c1
-rw-r--r--kernel/module/main.c15
-rw-r--r--kernel/nsproxy.c5
-rw-r--r--kernel/pid.c20
-rw-r--r--kernel/rcu/tasks.h9
-rw-r--r--kernel/rcu/tree.c9
-rw-r--r--kernel/rcu/tree_nocb.h8
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/resource_kunit.c18
-rw-r--r--kernel/sched/core.c97
-rw-r--r--kernel/sched/deadline.c2
-rw-r--r--kernel/sched/ext.c599
-rw-r--r--kernel/sched/ext.h2
-rw-r--r--kernel/sched/fair.c52
-rw-r--r--kernel/sched/psi.c26
-rw-r--r--kernel/sched/sched.h44
-rw-r--r--kernel/sched/stats.h48
-rw-r--r--kernel/sched/syscalls.c55
-rw-r--r--kernel/signal.c32
-rw-r--r--kernel/sys.c15
-rw-r--r--kernel/task_work.c15
-rw-r--r--kernel/taskstats.c18
-rw-r--r--kernel/time/posix-clock.c3
-rw-r--r--kernel/time/tick-sched.c6
-rw-r--r--kernel/time/timekeeping.c105
-rw-r--r--kernel/time/timekeeping_debug.c13
-rw-r--r--kernel/time/timekeeping_internal.h15
-rw-r--r--kernel/trace/bpf_trace.c42
-rw-r--r--kernel/trace/fgraph.c41
-rw-r--r--kernel/trace/ring_buffer.c44
-rw-r--r--kernel/trace/trace.c53
-rw-r--r--kernel/trace/trace_eprobe.c7
-rw-r--r--kernel/trace/trace_fprobe.c6
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_osnoise.c22
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/trace/trace_selftest.c2
-rw-r--r--kernel/trace/trace_uprobe.c13
-rw-r--r--kernel/ucount.c9
-rw-r--r--kernel/umh.c1
-rw-r--r--kernel/watch_queue.c6
73 files changed, 1201 insertions, 805 deletions
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 6c34e63c88ff..4d111f871951 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -97,7 +97,7 @@ config KEXEC_JUMP
config CRASH_DUMP
bool "kernel crash dumps"
- default y
+ default ARCH_DEFAULT_CRASH_DUMP
depends on ARCH_SUPPORTS_CRASH_DUMP
depends on KEXEC_CORE
select VMCORE_INFO
diff --git a/kernel/audit.c b/kernel/audit.c
index 1edaa4846a47..53e3bddcc327 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2102,8 +2102,8 @@ bool audit_string_contains_control(const char *string, size_t len)
/**
* audit_log_n_untrustedstring - log a string that may contain random characters
* @ab: audit_buffer
- * @len: length of string (not including trailing null)
* @string: string to be logged
+ * @len: length of string (not including trailing null)
*
* This code will escape a string that is passed to it if the string
* contains a control character, unprintable character, double quote mark,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index cd57053b4a69..0627e74585ce 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1653,8 +1653,8 @@ static void audit_log_uring(struct audit_context *ctx)
audit_log_format(ab, "uring_op=%d", ctx->uring_op);
if (ctx->return_valid != AUDITSC_INVALID)
audit_log_format(ab, " success=%s exit=%ld",
- (ctx->return_valid == AUDITSC_SUCCESS ?
- "yes" : "no"),
+ str_yes_no(ctx->return_valid ==
+ AUDITSC_SUCCESS),
ctx->return_code);
audit_log_format(ab,
" items=%d"
@@ -1696,8 +1696,8 @@ static void audit_log_exit(void)
audit_log_format(ab, " per=%lx", context->personality);
if (context->return_valid != AUDITSC_INVALID)
audit_log_format(ab, " success=%s exit=%ld",
- (context->return_valid == AUDITSC_SUCCESS ?
- "yes" : "no"),
+ str_yes_no(context->return_valid ==
+ AUDITSC_SUCCESS),
context->return_code);
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d",
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 29da6d3838f6..e16e79f8cd6d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -16,7 +16,6 @@
#include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
-#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>
DEFINE_BPF_STORAGE_CACHE(inode_cache);
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 6292ac5f9bd1..3bc61628ab25 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -339,10 +339,6 @@ BTF_ID(func, bpf_lsm_path_chmod)
BTF_ID(func, bpf_lsm_path_chown)
#endif /* CONFIG_SECURITY_PATH */
-#ifdef CONFIG_KEYS
-BTF_ID(func, bpf_lsm_key_free)
-#endif /* CONFIG_KEYS */
-
BTF_ID(func, bpf_lsm_mmap_file)
BTF_ID(func, bpf_lsm_netlink_send)
BTF_ID(func, bpf_lsm_path_notify)
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index adf6dfe0ba68..1eb9852a9f8e 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -16,7 +16,6 @@
#include <linux/filter.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
-#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>
DEFINE_BPF_STORAGE_CACHE(task_cache);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 75e4fe83c509..5cd1c7a23848 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3523,7 +3523,7 @@ end:
* (i + 1) * elem_size
* where i is the repeat index and elem_size is the size of an element.
*/
-static int btf_repeat_fields(struct btf_field_info *info,
+static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
u32 field_cnt, u32 repeat_cnt, u32 elem_size)
{
u32 i, j;
@@ -3543,6 +3543,12 @@ static int btf_repeat_fields(struct btf_field_info *info,
}
}
+ /* The type of struct size or variable size is u32,
+ * so the multiplication will not overflow.
+ */
+ if (field_cnt * (repeat_cnt + 1) > info_cnt)
+ return -E2BIG;
+
cur = field_cnt;
for (i = 0; i < repeat_cnt; i++) {
memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
@@ -3587,7 +3593,7 @@ static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *
info[i].off += off;
if (nelems > 1) {
- err = btf_repeat_fields(info, ret, nelems - 1, t->size);
+ err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size);
if (err == 0)
ret *= nelems;
else
@@ -3681,10 +3687,10 @@ static int btf_find_field_one(const struct btf *btf,
if (ret == BTF_FIELD_IGNORE)
return 0;
- if (nelems > info_cnt)
+ if (!info_cnt)
return -E2BIG;
if (nelems > 1) {
- ret = btf_repeat_fields(info, 1, nelems - 1, sz);
+ ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz);
if (ret < 0)
return ret;
}
@@ -8961,6 +8967,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
if (!type) {
bpf_log(ctx->log, "relo #%u: bad type id %u\n",
relo_idx, relo->type_id);
+ kfree(specs);
return -EINVAL;
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index e7113d700b87..025d7e2214ae 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -24,6 +24,23 @@
DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+/*
+ * cgroup bpf destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions. Use a separate workqueue so that cgroup bpf
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_bpf_destroy_wq;
+
+static int __init cgroup_bpf_wq_init(void)
+{
+ cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
+ if (!cgroup_bpf_destroy_wq)
+ panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
+ return 0;
+}
+core_initcall(cgroup_bpf_wq_init);
+
/* __always_inline is necessary to prevent indirect call through run_prog
* function pointer.
*/
@@ -334,7 +351,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
- queue_work(system_wq, &cgrp->bpf.release_work);
+ queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
}
/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4e07cc057d6f..5e77c58e0601 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -40,7 +40,7 @@
#include <linux/execmem.h>
#include <asm/barrier.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* Registers */
#define BPF_R0 regs[BPF_REG_0]
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 9e0e3b0a18e4..7878be18e9d2 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -333,9 +333,11 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
struct xdp_frame **frames, int n,
- struct net_device *dev)
+ struct net_device *tx_dev,
+ struct net_device *rx_dev)
{
- struct xdp_txq_info txq = { .dev = dev };
+ struct xdp_txq_info txq = { .dev = tx_dev };
+ struct xdp_rxq_info rxq = { .dev = rx_dev };
struct xdp_buff xdp;
int i, nframes = 0;
@@ -346,6 +348,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
xdp_convert_frame_to_buff(xdpf, &xdp);
xdp.txq = &txq;
+ xdp.rxq = &rxq;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) {
@@ -360,7 +363,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(dev, xdp_prog, act);
+ trace_xdp_exception(tx_dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
xdp_return_frame_rx_napi(xdpf);
@@ -388,7 +391,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
}
if (bq->xdp_prog) {
- to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
+ to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev, bq->dev_rx);
if (!to_send)
goto out;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1a43d06eab28..3d45ebe8afb4 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -111,7 +111,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
};
BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
@@ -124,7 +124,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
};
BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
@@ -538,7 +538,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
.arg4_size = sizeof(s64),
};
@@ -566,7 +566,7 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
.arg4_size = sizeof(u64),
};
@@ -1742,7 +1742,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
+ .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
};
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
@@ -2851,21 +2851,47 @@ struct bpf_iter_bits {
__u64 __opaque[2];
} __aligned(8);
+#define BITS_ITER_NR_WORDS_MAX 511
+
struct bpf_iter_bits_kern {
union {
- unsigned long *bits;
- unsigned long bits_copy;
+ __u64 *bits;
+ __u64 bits_copy;
};
- u32 nr_bits;
+ int nr_bits;
int bit;
} __aligned(8);
+/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
+ * a u64 pointer and an unsigned long pointer to find_next_bit() will
+ * return the same result, as both point to the same 8-byte area.
+ *
+ * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
+ * pointer also makes no difference. This is because the first iterated
+ * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
+ * long is composed of bits 32-63 of the u64.
+ *
+ * However, for 32-bit big-endian hosts, this is not the case. The first
+ * iterated unsigned long will be bits 32-63 of the u64, so swap these two
+ * ulong values within the u64.
+ */
+static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
+{
+#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
+ unsigned int i;
+
+ for (i = 0; i < nr; i++)
+ bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
+#endif
+}
+
/**
* bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
* @it: The new bpf_iter_bits to be created
* @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
* @nr_words: The size of the specified memory area, measured in 8-byte units.
- * Due to the limitation of memalloc, it can't be greater than 512.
+ * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
+ * further reduced by the BPF memory allocator implementation.
*
* This function initializes a new bpf_iter_bits structure for iterating over
* a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
@@ -2892,6 +2918,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
if (!unsafe_ptr__ign || !nr_words)
return -EINVAL;
+ if (nr_words > BITS_ITER_NR_WORDS_MAX)
+ return -E2BIG;
/* Optimization for u64 mask */
if (nr_bits == 64) {
@@ -2899,10 +2927,15 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
if (err)
return -EFAULT;
+ swap_ulong_in_u64(&kit->bits_copy, nr_words);
+
kit->nr_bits = nr_bits;
return 0;
}
+ if (bpf_mem_alloc_check_size(false, nr_bytes))
+ return -E2BIG;
+
/* Fallback to memalloc */
kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
if (!kit->bits)
@@ -2914,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
return err;
}
+ swap_ulong_in_u64(kit->bits, nr_words);
+
kit->nr_bits = nr_bits;
return 0;
}
@@ -2930,17 +2965,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
{
struct bpf_iter_bits_kern *kit = (void *)it;
- u32 nr_bits = kit->nr_bits;
- const unsigned long *bits;
- int bit;
+ int bit = kit->bit, nr_bits = kit->nr_bits;
+ const void *bits;
- if (nr_bits == 0)
+ if (!nr_bits || bit >= nr_bits)
return NULL;
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
- bit = find_next_bit(bits, nr_bits, kit->bit + 1);
+ bit = find_next_bit(bits, nr_bits, bit + 1);
if (bit >= nr_bits) {
- kit->nr_bits = 0;
+ kit->bit = bit;
return NULL;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index d8fc5eba529d..9aaf5124648b 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -880,7 +880,7 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
const struct btf_type *enum_t;
const char *enum_pfx;
u64 *delegate_msk, msk = 0;
- char *p;
+ char *p, *str;
int val;
/* ignore errors, fallback to hex */
@@ -911,7 +911,8 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -EINVAL;
}
- while ((p = strsep(&param->string, ":"))) {
+ str = param->string;
+ while ((p = strsep(&str, ":"))) {
if (strcmp(p, "any") == 0) {
msk |= ~0ULL;
} else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) {
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 5aebfc3051e3..4a858fdb6476 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -688,8 +688,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
- /* reg->off should be 0 for SCALAR_VALUE */
- verbose_snum(env, reg->var_off.value + reg->off);
+ verbose_snum(env, reg->var_off.value);
return;
}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 0