diff options
77 files changed, 826 insertions, 873 deletions
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 1b8b46deeb29..197fe319cbec 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -13,6 +13,8 @@ a) waiting for a CPU (while being runnable) b) completion of synchronous block I/O initiated by the task c) swapping in pages d) memory reclaim +e) thrashing page cache +f) direct compact and makes these statistics available to userspace through the taskstats interface. @@ -41,11 +43,12 @@ generic data structure to userspace corresponding to per-pid and per-tgid statistics. The delay accounting functionality populates specific fields of this structure. See - include/linux/taskstats.h + include/uapi/linux/taskstats.h for a description of the fields pertaining to delay accounting. It will generally be in the form of counters returning the cumulative -delay seen for cpu, sync block I/O, swapin, memory reclaim etc. +delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page +cache, direct compact etc. Taking the difference of two successive readings of a given counter (say cpu_delay_total) for a task will give the delay @@ -88,41 +91,37 @@ seen. General format of the getdelays command:: - getdelays [-t tgid] [-p pid] [-c cmd...] - + getdelays [-dilv] [-t tgid] [-p pid] Get delays, since system boot, for pid 10:: - # ./getdelays -p 10 + # ./getdelays -d -p 10 (output similar to next case) Get sum of delays, since system boot, for all pids with tgid 5:: - # ./getdelays -t 5 - - - CPU count real total virtual total delay total - 7876 92005750 100000000 24001500 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 + # ./getdelays -d -t 5 + print delayacct stats ON + TGID 5 -Get delays seen in executing a given simple command:: - # ./getdelays -c ls / + CPU count real total virtual total delay total delay average + 8 7000000 6872122 3382277 0.423ms + IO count delay total delay average + 0 0 0ms + SWAP count delay total delay average + 0 0 0ms + RECLAIM count delay total delay average + 0 0 0ms + THRASHING count delay total delay average + 0 0 0ms + COMPACT count delay total delay average + 0 0 0ms - bin data1 data3 data5 dev home media opt root srv sys usr - boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var +Get IO accounting for pid 1, it works only with -p:: + # ./getdelays -i -p 1 + printing IO accounting + linuxrc: read=65536, write=0, cancelled_write=0 - CPU count real total virtual total delay total - 6 4000250 4000000 0 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 +The above command can be used with -v to get more debug information. diff --git a/arch/Kconfig b/arch/Kconfig index 874c65d9e963..678a80713b21 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -997,6 +997,10 @@ config PAGE_SIZE_LESS_THAN_64KB depends on !PAGE_SIZE_64KB depends on !PARISC_PAGE_SIZE_64KB depends on !PPC_64K_PAGES + depends on PAGE_SIZE_LESS_THAN_256KB + +config PAGE_SIZE_LESS_THAN_256KB + def_bool y depends on !PPC_256K_PAGES depends on !PAGE_SIZE_256KB diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f6e333b59314..dc10d26cb432 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1136,6 +1136,10 @@ config NUMA select GENERIC_ARCH_NUMA select ACPI_NUMA if ACPI select OF_NUMA + select HAVE_SETUP_PER_CPU_AREA + select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PER_CPU_PAGE_FIRST_CHUNK + select USE_PERCPU_NUMA_NODE_ID help Enable NUMA (Non-Uniform Memory Access) support. @@ -1152,22 +1156,6 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -config USE_PERCPU_NUMA_NODE_ID - def_bool y - depends on NUMA - -config HAVE_SETUP_PER_CPU_AREA - def_bool y - depends on NUMA - -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y - depends on NUMA - -config NEED_PER_CPU_PAGE_FIRST_CHUNK - def_bool y - depends on NUMA - source "kernel/Kconfig.hz" config ARCH_SPARSEMEM_ENABLE diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 1e33666fa679..703952819e10 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -32,6 +32,7 @@ config IA64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE if (!ITANIUM) select HAVE_FUNCTION_TRACER + select HAVE_SETUP_PER_CPU_AREA select TTY select HAVE_ARCH_TRACEHOOK select HAVE_VIRT_CPU_ACCOUNTING @@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config HAVE_SETUP_PER_CPU_AREA - def_bool y - config DMI bool default y @@ -292,6 +290,7 @@ config NUMA bool "NUMA support" depends on !FLATMEM select SMP + select USE_PERCPU_NUMA_NODE_ID help Say Y to compile the kernel to support NUMA (Non-Uniform Memory Access). This option is for configuring high-end multiprocessor @@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION def_bool y depends on NUMA -config USE_PERCPU_NUMA_NODE_ID - def_bool y - depends on NUMA - config HAVE_MEMORYLESS_NODES def_bool NUMA diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3dd8c4618293..edf6c1577449 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2674,6 +2674,8 @@ config NUMA bool "NUMA Support" depends on SYS_SUPPORTS_NUMA select SMP + select HAVE_SETUP_PER_CPU_AREA + select NEED_PER_CPU_EMBED_FIRST_CHUNK help Say Y to compile the kernel to support NUMA (Non-Uniform Memory Access). This option improves performance on systems with more @@ -2684,14 +2686,6 @@ config NUMA config SYS_SUPPORTS_NUMA bool -config HAVE_SETUP_PER_CPU_AREA - def_bool y - depends on NUMA - -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y - depends on NUMA - config RELOCATABLE bool "Relocatable kernel" depends on SYS_SUPPORTS_RELOCATABLE diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 325e1552cbea..5a8002839550 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -519,17 +519,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) return node_distance(cpu_to_node(from), cpu_to_node(to)); } -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, - size_t align) +static int __init pcpu_cpu_to_node(int cpu) { - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, - cpu_to_node(cpu)); -} - -static void __init pcpu_fc_free(void *ptr, size_t size) -{ - memblock_free(ptr, size); + return cpu_to_node(cpu); } void __init setup_per_cpu_areas(void) @@ -545,7 +537,7 @@ void __init setup_per_cpu_areas(void) rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); + pcpu_cpu_to_node); if (rc < 0) panic("Failed to initialize percpu areas."); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0631c9241af3..b779603978e1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K) default 11 # 11 = 23 (8MB) - 12 (4K) -config HAVE_SETUP_PER_CPU_AREA - def_bool PPC64 - -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y if PPC64 - -config NEED_PER_CPU_PAGE_FIRST_CHUNK - def_bool y if PPC64 - config NR_IRQS int "Number of virtual interrupt numbers" range 32 1048576 @@ -241,6 +232,7 @@ config PPC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SETUP_PER_CPU_AREA if PPC64 select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) @@ -255,6 +247,8 @@ config PPC select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE + select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64 + select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64 select NEED_SG_DMA_LENGTH select OF select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE @@ -660,6 +654,7 @@ config NUMA bool "NUMA Memory Allocation and Scheduler Support" depends on PPC64 && SMP default y if PPC_PSERIES || PPC_POWERNV + select USE_PERCPU_NUMA_NODE_ID help Enable NUMA (Non-Uniform Memory Access) support. @@ -673,10 +668,6 @@ config NODES_SHIFT default "4" depends on NUMA -config USE_PERCPU_NUMA_NODE_ID - def_bool y - depends on NUMA - config HAVE_MEMORYLESS_NODES def_bool y depends on NUMA diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d87f7c1103ce..be8577ac9397 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -771,50 +771,6 @@ void __init emergency_stack_init(void) } #ifdef CONFIG_SMP -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, - size_t align) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NUMA - int node = early_cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = memblock_alloc_try_nid(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, node); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return memblock_alloc_from(size, align, goal); -#endif -} - -static void __init pcpu_free_bootmem(void *ptr, size_t size) -{ - memblock_free(ptr, size); -} - static int pcpu_cpu_distance(unsigned int from, unsigned int to) { if (early_cpu_to_node(from) == early_cpu_to_node(to)) @@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); - -static void __init pcpu_populate_pte(unsigned long addr) +static __init int pcpu_cpu_to_node(int cpu) { - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + return early_cpu_to_node(cpu); } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { @@ -900,7 +816,7 @@ void __init setup_per_cpu_areas(void) if (pcpu_chosen_fc != PCPU_FC_PAGE) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_alloc_bootmem, pcpu_free_bootmem); + pcpu_cpu_to_node); if (rc) pr_warn("PERCPU: %s allocator failed (%d), " "falling back to page size\n", @@ -908,8 +824,7 @@ void __init setup_per_cpu_areas(void) } if (rc < 0) - rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, - pcpu_populate_pte); + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4602cfe92a20..171ecc6d1792 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -335,6 +335,8 @@ config NUMA select GENERIC_ARCH_NUMA select OF_NUMA select ARCH_SUPPORTS_NUMA_BALANCING + select USE_PERCPU_NUMA_NODE_ID + select NEED_PER_CPU_EMBED_FIRST_CHUNK help Enable NUMA (Non-Uniform Memory Access) support. @@ -350,14 +352,6 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -config USE_PERCPU_NUMA_NODE_ID - def_bool y - depends on NUMA - -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y - depends on NUMA - config RISCV_ISA_C bool "Emit compressed instructions when building Linux" default y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 66fc08646be5..1cab1b284f1a 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -97,6 +97,9 @@ config SPARC64 select PCI_DOMAINS if PCI select ARCH_HAS_GIGANTIC_PAGE select HAVE_SOFTIRQ_ON_OWN_STACK + select HAVE_SETUP_PER_CPU_AREA + select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PER_CPU_PAGE_FIRST_CHUNK config ARCH_PROC_KCORE_TEXT def_bool y @@ -123,15 +126,6 @@ config AUDIT_ARCH bool default y -config HAVE_SETUP_PER_CPU_AREA - def_bool y if SPARC64 - -config NEED_PER_CPU_EMBED_FIRST_CHUNK - def_bool y if SPARC64 - -config NEED_PER_CPU_PAGE_FIRST_CHUNK - def_bool y if SPARC64 - config MMU bool default y |
