diff options
78 files changed, 1619 insertions, 476 deletions
@@ -721,7 +721,8 @@ Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com> Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com> Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com> -Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org> +Sibi Sankar <sibi.sankar@oss.qualcomm.com> <sibis@codeaurora.org> +Sibi Sankar <sibi.sankar@oss.qualcomm.com> <quic_sibis@quicinc.com> Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org> Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu> Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@ffwll.ch> diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 8ccc5af5ea1e..86d7902a657f 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -134,47 +134,72 @@ The above command can be used with -v to get more debug information. After the system starts, use `delaytop` to get the system-wide delay information, which includes system-wide PSI information and Top-N high-latency tasks. +Note: PSI support requires `CONFIG_PSI=y` and `psi=1` for full functionality. -`delaytop` supports sorting by CPU latency in descending order by default, -displays the top 20 high-latency tasks by default, and refreshes the latency -data every 2 seconds by default. +`delaytop` is an interactive tool for monitoring system pressure and task delays. +It supports multiple sorting options, display modes, and real-time keyboard controls. -Get PSI information and Top-N tasks delay, since system boot:: +Basic usage with default settings (sorts by CPU delay, shows top 20 tasks, refreshes every 2 seconds):: bash# ./delaytop - System Pressure Information: (avg10/avg60/avg300/total) - CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) + System Pressure Information: (avg10/avg60vg300/total) + CPU some: 0.0%/ 0.0%/ 0.0%/ 106137(ms) CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) - IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) - IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) + IO full: 0.0%/ 0.0%/ 0.0%/ 2240(ms) + IO some: 0.0%/ 0.0%/ 0.0%/ 2783(ms) IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) - Top 20 processes (sorted by CPU delay): - PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) - ---------------------------------------------------------------------------------------------- - 161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00 - 12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00 - 6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00 - 1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00 - 1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00 - 134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - 6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - -Dynamic interactive interface of delaytop:: + [o]sort [M]memverbose [q]quit + Top 20 processes (sorted by cpu delay): + PID TGID COMMAND CPU(ms) IO(ms) IRQ(ms) MEM(ms) + ------------------------------------------------------------------------ + 110 110 kworker/15:0H-s 27.91 0.00 0.00 0.00 + 57 57 cpuhp/7 3.18 0.00 0.00 0.00 + 99 99 cpuhp/14 2.97 0.00 0.00 0.00 + 51 51 cpuhp/6 0.90 0.00 0.00 0.00 + 44 44 kworker/4:0H-sy 0.80 0.00 0.00 0.00 + 60 60 ksoftirqd/7 0.74 0.00 0.00 0.00 + 76 76 idle_inject/10 0.31 0.00 0.00 0.00 + 100 100 idle_inject/14 0.30 0.00 0.00 0.00 + 1309 1309 systemsettings 0.29 0.00 0.00 0.00 + 45 45 cpuhp/5 0.22 0.00 0.00 0.00 + 63 63 cpuhp/8 0.20 0.00 0.00 0.00 + 87 87 cpuhp/12 0.18 0.00 0.00 0.00 + 93 93 cpuhp/13 0.17 0.00 0.00 0.00 + 1265 1265 acpid 0.17 0.00 0.00 0.00 + 1552 1552 sshd 0.17 0.00 0.00 0.00 + 2584 2584 sddm-helper 0.16 0.00 0.00 0.00 + 1284 1284 rtkit-daemon 0.15 0.00 0.00 0.00 + 1326 1326 nde-netfilter 0.14 0.00 0.00 0.00 + 27 27 cpuhp/2 0.13 0.00 0.00 0.00 + 631 631 kworker/11:2-rc 0.11 0.00 0.00 0.00 + +Interactive keyboard controls during runtime:: + + o - Select sort field (CPU, IO, IRQ, Memory, etc.) + M - Toggle display mode (Default/Memory Verbose) + q - Quit + +Available sort fields(use -s/--sort or interactive command):: + + cpu(c) - CPU delay + blkio(i) - I/O delay + irq(q) - IRQ delay + mem(m) - Total memory delay + swapin(s) - Swapin delay (memory verbose mode only) + freepages(r) - Freepages reclaim delay (memory verbose mode only) + thrashing(t) - Thrashing delay (memory verbose mode only) + compact(p) - Compaction delay (memory verbose mode only) + wpcopy(w) - Write page copy delay (memory verbose mode only) + +Advanced usage examples:: + + # ./delaytop -s blkio + Sorted by IO delay + + # ./delaytop -s mem -M + Sorted by memory delay in memory verbose mode # ./delaytop -p pid Print delayacct stats diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e019db1633fd..74ca438d2d6d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4603,7 +4603,7 @@ bit 2: print timer info bit 3: print locks info if CONFIG_LOCKDEP is on bit 4: print ftrace buffer - bit 5: replay all messages on consoles at the end of panic + bit 5: replay all kernel messages on consoles at the end of panic bit 6: print all CPUs backtrace (if available in the arch) bit 7: print only tasks in uninterruptible (blocked) state *Be aware* that this option may print a _lot_ of lines, diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 8b49eab937d0..f3ee807b5d8b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -890,7 +890,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer -bit 5 replay all messages on consoles at the end of panic +bit 5 replay all kernel messages on consoles at the end of panic bit 6 print all CPUs backtrace (if available in the arch) bit 7 print only tasks in uninterruptible (blocked) state ===== ============================================ diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 6611434e2dd2..8127849d40f5 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -361,7 +361,12 @@ local tasks spawned by the process and the global task that handles USB bus #1: */ sleep(2); - n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + /* + * The load to the coverage count should be an acquire to pair with + * pair with the corresponding write memory barrier (smp_wmb()) on + * the kernel-side in kcov_move_area(). + */ + n = __atomic_load_n(&cover[0], __ATOMIC_ACQUIRE); for (i = 0; i < n; i++) printf("0x%lx\n", cover[i + 1]); if (ioctl(fd, KCOV_DISABLE, 0)) diff --git a/MAINTAINERS b/MAINTAINERS index 6a6cf856c98b..4abe33914884 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20987,7 +20987,7 @@ F: Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml F: drivers/pmdomain/qcom/cpr.c QUALCOMM CPUCP MAILBOX DRIVER -M: Sibi Sankar <quic_sibis@quicinc.com> +M: Sibi Sankar <sibi.sankar@oss.qualcomm.com> L: linux-arm-msm@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/mailbox/qcom,cpucp-mbox.yaml diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c6b12bed173d..335fd2ee9766 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -165,14 +165,23 @@ static struct crash_mem *fill_up_crash_elf_data(void) /* * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges * may cause range splits. So add extra slots here. + * + * Exclusion of low 1M may not cause another range split, because the + * range of exclude is [0, 1M] and the condition for splitting a new + * region is that the start, end parameters are both in a certain + * existing region in cmem and cannot be equal to existing region's + * start or end. Obviously, the start of [0, 1M] cannot meet this + * condition. + * + * But in order to lest the low 1M could be changed in the future, + * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 2 + crashk_cma_cnt; + nr_ranges += 3 + crashk_cma_cnt; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return NULL; cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; return cmem; } @@ -323,16 +332,20 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) struct crash_mem *cmem; /* - * Using random kexec_buf for passing dm crypt keys may cause a range - * split. So use two slots here. + * In the current x86 architecture code, the elfheader is always + * allocated at crashk_res.start. But it depends on the allocation + * position of elfheader in crashk_res. To avoid potential out of + * bounds in future, add an extra slot. + * + * And using random kexec_buf for passing dm crypt keys may cause a + * range split too, add another extra slot here. */ - nr_ranges = 2; + nr_ranges = 3; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return -ENOMEM; cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; memset(&cmd, 0, sizeof(struct crash_memmap_data)); cmd.params = params; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 24a41f0e0cf1..c3244ac680d1 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -16,6 +16,8 @@ #include <linux/kexec.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/libfdt.h> +#include <linux/of_fdt.h> #include <linux/efi.h> #include <linux/random.h> @@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, } #endif /* CONFIG_EFI */ +#ifdef CONFIG_OF_FLATTREE +static void setup_dtb(struct boot_params *params, + unsigned long params_load_addr, + unsigned int dtb_setup_data_offset) +{ + struct setup_data *sd = (void *)params + dtb_setup_data_offset; + unsigned long setup_data_phys, dtb_len; + + dtb_len = fdt_totalsize(initial_boot_params); + sd->type = SETUP_DTB; + sd->len = dtb_len; + + /* Carry over current boot DTB with setup_data */ + memcpy(sd->data, initial_boot_params, dtb_len); + + /* Add setup data */ + setup_data_phys = params_load_addr + dtb_setup_data_offset; + sd->next = params->hdr.setup_data; + params->hdr.setup_data = setup_data_phys; +} +#endif /* CONFIG_OF_FLATTREE */ + static void setup_ima_state(const struct kimage *image, struct boot_params *params, unsigned long params_load_addr, @@ -336,6 +360,17 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, sizeof(struct efi_setup_data); #endif +#ifdef CONFIG_OF_FLATTREE + if (image->force_dtb && initial_boot_params) { + setup_dtb(params, params_load_addr, setup_data_offset); + setup_data_offset += sizeof(struct setup_data) + + fdt_totalsize(initial_boot_params); + } else { + pr_debug("Not carrying over DTB, force_dtb = %d\n", + image->force_dtb); + } +#endif + if (IS_ENABLED(CONFIG_IMA_KEXEC)) { /* Setup IMA log buffer state */ setup_ima_state(image, params, params_load_addr, @@ -529,6 +564,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel, sizeof(struct setup_data) + RNG_SEED_LENGTH; +#ifdef CONFIG_OF_FLATTREE + if (image->force_dtb && initial_boot_params) + kbuf.bufsz += sizeof(struct setup_data) + + fdt_totalsize(initial_boot_params); +#endif + if (IS_ENABLED(CONFIG_IMA_KEXEC)) kbuf.bufsz += sizeof(struct setup_data) + sizeof(struct ima_setup_data); @@ -537,7 +578,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz += sizeof(struct setup_data) + sizeof(struct kho_data); - params = kzalloc(kbuf.bufsz, GFP_KERNEL); + params = kvzalloc(kbuf.bufsz, GFP_KERNEL); if (!params) return ERR_PTR(-ENOMEM); efi_map_offset = params_cmdline_sz; @@ -647,7 +688,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, return ldata; out_free_params: - kfree(params); + kvfree(params); return ERR_PTR(ret); } @@ -659,7 +700,7 @@ static int bzImage64_cleanup(void *loader_data) if (!ldata) return 0; - kfree(ldata->bootparams_buf); + kvfree(ldata->bootparams_buf); ldata->bootparams_buf = NULL; return 0; diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index a00e07b853f2..a65c2d5b9e7b 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/fwnode.h> #include <linux/init.h> +#include <linux/kexec_handover.h> #include <linux/memblock.h> #include <linux/mm_types.h> #include <linux/of.h> @@ -164,12 +165,32 @@ static __init void reserve_regions(void) pr_info("Processing EFI memory map:\n"); /* - * Discard memblocks discovered so far: if there are any at this - * point, they originate from memory nodes in the DT, and UEFI - * uses its own memory map instead. + * Discard memblocks discovered so far except for KHO scratch + * regions. Most memblocks at this point originate from memory nodes + * in the DT and UEFI uses its own memory map instead. However, if + * KHO is enabled, scratch regions, which are good known memory + * must be preserved. */ memblock_dump_all(); - memblock_remove(0, PHYS_ADDR_MAX); + + if (is_kho_boot()) { + struct memblock_region *r; + + /* Remove all non-KHO regions */ + for_each_mem_region(r) { + if (!memblock_is_kho_scratch(r)) { + memblock_remove(r->base, r->size); + r--; + } + } + } else { + /* + * KHO is disabled. Discard memblocks discovered so far: + * if there are any at this point, they originate from memory + * nodes in the DT, and UEFI uses its own memory map instead. + */ + memblock_remove(0, PHYS_ADDR_MAX); + } for_each_efi_memory_desc(md) { paddr = md->phys_addr; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 5940e2eb9231..96cc9b389246 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -279,14 +279,7 @@ static int fbcon_get_rotate(struct fb_info *info) static bool fbcon_skip_panic(struct fb_info *info) { -/* panic_cpu is not exported, and can't be used if built as module. Use - * oops_in_progress instead, but non-fatal oops won't be printed. - */ -#if defined(MODULE) - return (info->skip_panic && unlikely(oops_in_progress)); -#else - return (info->skip_panic && unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID)); -#endif + return (info->skip_panic && unlikely(panic_in_progress())); } static inline bool fbcon_is_active(struct vc_data *vc, struct fb_info *info) |
