summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 14:57:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 14:57:54 -0700
commitbeace86e61e465dba204a268ab3f3377153a4973 (patch)
tree24f90cb26bf39eb7724326cdf3e8bffed7c05e50 /mm
parentcbbf0a759ff96c80dfc32192a2cc427b79447f74 (diff)
parentaf915c3c13b64d196d1c305016092f5da20942c4 (diff)
downloadlinux-beace86e61e465dba204a268ab3f3377153a4973.tar.gz
linux-beace86e61e465dba204a268ab3f3377153a4973.tar.bz2
linux-beace86e61e465dba204a268ab3f3377153a4973.zip
Merge tag 'mm-stable-2025-07-30-15-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "As usual, many cleanups. The below blurbiage describes 42 patchsets. 21 of those are partially or fully cleanup work. "cleans up", "cleanup", "maintainability", "rationalizes", etc. I never knew the MM code was so dirty. "mm: ksm: prevent KSM from breaking merging of new VMAs" (Lorenzo Stoakes) addresses an issue with KSM's PR_SET_MEMORY_MERGE mode: newly mapped VMAs were not eligible for merging with existing adjacent VMAs. "mm/damon: introduce DAMON_STAT for simple and practical access monitoring" (SeongJae Park) adds a new kernel module which simplifies the setup and usage of DAMON in production environments. "stop passing a writeback_control to swap/shmem writeout" (Christoph Hellwig) is a cleanup to the writeback code which removes a couple of pointers from struct writeback_control. "drivers/base/node.c: optimization and cleanups" (Donet Tom) contains largely uncorrelated cleanups to the NUMA node setup and management code. "mm: userfaultfd: assorted fixes and cleanups" (Tal Zussman) does some maintenance work on the userfaultfd code. "Readahead tweaks for larger folios" (Ryan Roberts) implements some tuneups for pagecache readahead when it is reading into order>0 folios. "selftests/mm: Tweaks to the cow test" (Mark Brown) provides some cleanups and consistency improvements to the selftests code. "Optimize mremap() for large folios" (Dev Jain) does that. A 37% reduction in execution time was measured in a memset+mremap+munmap microbenchmark. "Remove zero_user()" (Matthew Wilcox) expunges zero_user() in favor of the more modern memzero_page(). "mm/huge_memory: vmf_insert_folio_*() and vmf_insert_pfn_pud() fixes" (David Hildenbrand) addresses some warts which David noticed in the huge page code. These were not known to be causing any issues at this time. "mm/damon: use alloc_migrate_target() for DAMOS_MIGRATE_{HOT,COLD" (SeongJae Park) provides some cleanup and consolidation work in DAMON. "use vm_flags_t consistently" (Lorenzo Stoakes) uses vm_flags_t in places where we were inappropriately using other types. "mm/memfd: Reserve hugetlb folios before allocation" (Vivek Kasireddy) increases the reliability of large page allocation in the memfd code. "mm: Remove pXX_devmap page table bit and pfn_t type" (Alistair Popple) removes several now-unneeded PFN_* flags. "mm/damon: decouple sysfs from core" (SeongJae Park) implememnts some cleanup and maintainability work in the DAMON sysfs layer. "madvise cleanup" (Lorenzo Stoakes) does quite a lot of cleanup/maintenance work in the madvise() code. "madvise anon_name cleanups" (Vlastimil Babka) provides additional cleanups on top or Lorenzo's effort. "Implement numa node notifier" (Oscar Salvador) creates a standalone notifier for NUMA node memory state changes. Previously these were lumped under the more general memory on/offline notifier. "Make MIGRATE_ISOLATE a standalone bit" (Zi Yan) cleans up the pageblock isolation code and fixes a potential issue which doesn't seem to cause any problems in practice. "selftests/damon: add python and drgn based DAMON sysfs functionality tests" (SeongJae Park) adds additional drgn- and python-based DAMON selftests which are more comprehensive than the existing selftest suite. "Misc rework on hugetlb faulting path" (Oscar Salvador) fixes a rather obscure deadlock in the hugetlb fault code and follows that fix with a series of cleanups. "cma: factor out allocation logic from __cma_declare_contiguous_nid" (Mike Rapoport) rationalizes and cleans up the highmem-specific code in the CMA allocator. "mm/migration: rework movable_ops page migration (part 1)" (David Hildenbrand) provides cleanups and future-preparedness to the migration code. "mm/damon: add trace events for auto-tuned monitoring intervals and DAMOS quota" (SeongJae Park) adds some tracepoints to some DAMON auto-tuning code. "mm/damon: fix misc bugs in DAMON modules" (SeongJae Park) does that. "mm/damon: misc cleanups" (SeongJae Park) also does what it claims. "mm: folio_pte_batch() improvements" (David Hildenbrand) cleans up the large folio PTE batching code. "mm/damon/vaddr: Allow interleaving in migrate_{hot,cold} actions" (SeongJae Park) facilitates dynamic alteration of DAMON's inter-node allocation policy. "Remove unmap_and_put_page()" (Vishal Moola) provides a couple of page->folio conversions. "mm: per-node proactive reclaim" (Davidlohr Bueso) implements a per-node control of proactive reclaim - beyond the current memcg-based implementation. "mm/damon: remove damon_callback" (SeongJae Park) replaces the damon_callback interface with a more general and powerful damon_call()+damos_walk() interface. "mm/mremap: permit mremap() move of multiple VMAs" (Lorenzo Stoakes) implements a number of mremap cleanups (of course) in preparation for adding new mremap() functionality: newly permit the remapping of multiple VMAs when the user is specifying MREMAP_FIXED. It still excludes some specialized situations where this cannot be performed reliably. "drop hugetlb_free_pgd_range()" (Anthony Yznaga) switches some sparc hugetlb code over to the generic version and removes the thus-unneeded hugetlb_free_pgd_range(). "mm/damon/sysfs: support periodic and automated stats update" (SeongJae Park) augments the present userspace-requested update of DAMON sysfs monitoring files. Automatic update is now provided, along with a tunable to control the update interval. "Some randome fixes and cleanups to swapfile" (Kemeng Shi) does what is claims. "mm: introduce snapshot_page" (Luiz Capitulino and David Hildenbrand) provides (and uses) a means by which debug-style functions can grab a copy of a pageframe and inspect it locklessly without tripping over the races inherent in operating on the live pageframe directly. "use per-vma locks for /proc/pid/maps reads" (Suren Baghdasaryan) addresses the large contention issues which can be triggered by reads from that procfs file. Latencies are reduced by more than half in some situations. The series also introduces several new selftests for the /proc/pid/maps interface. "__folio_split() clean up" (Zi Yan) cleans up __folio_split()! "Optimize mprotect() for large folios" (Dev Jain) provides some quite large (>3x) speedups to mprotect() when dealing with large folios. "selftests/mm: reuse FORCE_READ to replace "asm volatile("" : "+r" (XXX));" and some cleanup" (wang lian) does some cleanup work in the selftests code. "tools/testing: expand mremap testing" (Lorenzo Stoakes) extends the mremap() selftest in several ways, including adding more checking of Lorenzo's recently added "permit mremap() move of multiple VMAs" feature. "selftests/damon/sysfs.py: test all parameters" (SeongJae Park) extends the DAMON sysfs interface selftest so that it tests all possible user-requested parameters. Rather than the present minimal subset" * tag 'mm-stable-2025-07-30-15-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (370 commits) MAINTAINERS: add missing headers to mempory policy & migration section MAINTAINERS: add missing file to cgroup section MAINTAINERS: add MM MISC section, add missing files to MISC and CORE MAINTAINERS: add missing zsmalloc file MAINTAINERS: add missing files to page alloc section MAINTAINERS: add missing shrinker files MAINTAINERS: move memremap.[ch] to hotplug section MAINTAINERS: add missing mm_slot.h file THP section MAINTAINERS: add missing interval_tree.c to memory mapping section MAINTAINERS: add missing percpu-internal.h file to per-cpu section mm/page_alloc: remove trace_mm_alloc_contig_migrate_range_info() selftests/damon: introduce _common.sh to host shared function selftests/damon/sysfs.py: test runtime reduction of DAMON parameters selftests/damon/sysfs.py: test non-default parameters runtime commit selftests/damon/sysfs.py: generalize DAMON context commit assertion selftests/damon/sysfs.py: generalize monitoring attributes commit assertion selftests/damon/sysfs.py: generalize DAMOS schemes commit assertion selftests/damon/sysfs.py: test DAMOS filters commitment selftests/damon/sysfs.py: generalize DAMOS scheme commit assertion selftests/damon/sysfs.py: test DAMOS destinations commitment ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig28
-rw-r--r--mm/balloon_compaction.c21
-rw-r--r--mm/cma.c341
-rw-r--r--mm/cma_debug.c10
-rw-r--r--mm/compaction.c44
-rw-r--r--mm/damon/Kconfig16
-rw-r--r--mm/damon/Makefile1
-rw-r--r--mm/damon/core.c189
-rw-r--r--mm/damon/lru_sort.c75
-rw-r--r--mm/damon/ops-common.c274
-rw-r--r--mm/damon/ops-common.h5
-rw-r--r--mm/damon/paddr.c277
-rw-r--r--mm/damon/reclaim.c71
-rw-r--r--mm/damon/stat.c264
-rw-r--r--mm/damon/sysfs-schemes.c504
-rw-r--r--mm/damon/sysfs.c171
-rw-r--r--mm/damon/tests/core-kunit.h4
-rw-r--r--mm/damon/tests/vaddr-kunit.h2
-rw-r--r--mm/damon/vaddr.c241
-rw-r--r--mm/debug.c44
-rw-r--r--mm/debug_vm_pgtable.c113
-rw-r--r--mm/execmem.c8
-rw-r--r--mm/filemap.c68
-rw-r--r--mm/gup.c245
-rw-r--r--mm/hmm.c14
-rw-r--r--mm/huge_memory.c489
-rw-r--r--mm/hugetlb.c173
-rw-r--r--mm/hugetlb_vmemmap.c2
-rw-r--r--mm/internal.h150
-rw-r--r--mm/kasan/kasan_test_c.c8
-rw-r--r--mm/khugepaged.c23
-rw-r--r--mm/ksm.c55
-rw-r--r--mm/list_lru.c34
-rw-r--r--mm/maccess.c1
-rw-r--r--mm/madvise.c824
-rw-r--r--mm/mapping_dirty_helpers.c6
-rw-r--r--mm/memcontrol.c82
-rw-r--r--mm/memfd.c38
-rw-r--r--mm/memory-failure.c4
-rw-r--r--mm/memory-tiers.c19
-rw-r--r--mm/memory.c153
-rw-r--r--mm/memory_hotplug.c197
-rw-r--r--mm/mempolicy.c31
-rw-r--r--mm/mempool.c34
-rw-r--r--mm/memremap.c34
-rw-r--r--mm/migrate.c283
-rw-r--r--mm/migrate_device.c2
-rw-r--r--mm/mlock.c4
-rw-r--r--mm/mm_init.c26
-rw-r--r--mm/mmap.c28
-rw-r--r--mm/mmap_lock.c93
-rw-r--r--mm/mprotect.c305
-rw-r--r--mm/mremap.c625
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c394
-rw-r--r--mm/page_ext.c17
-rw-r--r--mm/page_io.c71
-rw-r--r--mm/page_isolation.c112
-rw-r--r--mm/page_owner.c4
-rw-r--r--mm/page_vma_mapped.c5
-rw-r--r--mm/pagewalk.c90
-rw-r--r--mm/percpu-stats.c1
-rw-r--r--mm/percpu.c2
-rw-r--r--mm/pgtable-generic.c7
-rw-r--r--mm/ptdump.c5
-rw-r--r--mm/readahead.c36
-rw-r--r--mm/rmap.c26
-rw-r--r--mm/secretmem.c16
-rw-r--r--mm/shmem.c112
-rw-r--r--mm/show_mem.c2
-rw-r--r--mm/slub.c61
-rw-r--r--mm/swap.c33
-rw-r--r--mm/swap.h9
-rw-r--r--mm/swapfile.c70
-rw-r--r--mm/userfaultfd.c92
-rw-r--r--mm/util.c116
-rw-r--r--mm/vma.c159
-rw-r--r--mm/vma.h29
-rw-r--r--mm/vma_exec.c2
-rw-r--r--mm/vmpressure.c2
-rw-r--r--mm/vmscan.c466
-rw-r--r--mm/vmstat.c454
-rw-r--r--mm/zpdesc.h15
-rw-r--r--mm/zsmalloc.c33
-rw-r--r--mm/zswap.c5
86 files changed, 5526 insertions, 3689 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 781be3240e21..d5d4eca947a6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -934,6 +934,13 @@ config ARCH_SUPPORTS_PUD_PFNMAP
depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
#
+# Architectures that always use weak definitions for percpu
+# variables in modules should set this.
+#
+config ARCH_MODULE_NEEDS_WEAK_PER_CPU
+ bool
+
+#
# UP and nommu archs use km based percpu allocator
#
config NEED_PER_CPU_KM
@@ -1005,8 +1012,8 @@ config ARCH_FORCE_MAX_ORDER
# the default page block order is MAX_PAGE_ORDER (10) as per
# include/linux/mmzone.h.
#
-config PAGE_BLOCK_ORDER
- int "Page Block Order"
+config PAGE_BLOCK_MAX_ORDER
+ int "Page Block Order Upper Limit"
range 1 10 if ARCH_FORCE_MAX_ORDER = 0
default 10 if ARCH_FORCE_MAX_ORDER = 0
range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
@@ -1014,15 +1021,16 @@ config PAGE_BLOCK_ORDER
help
The page block order refers to the power of two number of pages that
are physically contiguous and can have a migrate type associated to
- them. The maximum size of the page block order is limited by
- ARCH_FORCE_MAX_ORDER.
+ them. The maximum size of the page block order is at least limited by
+ ARCH_FORCE_MAX_ORDER/MAX_PAGE_ORDER.
- This config allows overriding the default page block order when the
- page block order is required to be smaller than ARCH_FORCE_MAX_ORDER
- or MAX_PAGE_ORDER.
+ This config adds a new upper limit of default page block
+ order when the page block order is required to be smaller than
+ ARCH_FORCE_MAX_ORDER/MAX_PAGE_ORDER or other limits
+ (see include/linux/pageblock-flags.h for details).
Reducing pageblock order can negatively impact THP generation
- success rate. If your workloads uses THP heavily, please use this
+ success rate. If your workloads use THP heavily, please use this
option with caution.
Don't change if unsure.
@@ -1109,9 +1117,6 @@ config ARCH_HAS_CURRENT_STACK_POINTER
register alias named "current_stack_pointer", this config can be
selected.
-config ARCH_HAS_PTE_DEVMAP
- bool
-
config ARCH_HAS_ZONE_DMA_SET
bool
@@ -1129,7 +1134,6 @@ config ZONE_DEVICE
depends on MEMORY_HOTPLUG
depends on MEMORY_HOTREMOVE
depends on SPARSEMEM_VMEMMAP
- depends on ARCH_HAS_PTE_DEVMAP
select XARRAY_MULTI
help
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index d3e00731e262..2a4a649805c1 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -94,13 +94,8 @@ size_t balloon_page_list_dequeue(struct balloon_dev_info *b_dev_info,
if (!trylock_page(page))
continue;
- if (IS_ENABLED(CONFIG_BALLOON_COMPACTION) &&
- PageIsolated(page)) {
- /* raced with isolation */
- unlock_page(page);
- continue;
- }
- balloon_page_delete(page);
+ list_del(&page->lru);
+ balloon_page_finalize(page);
__count_vm_event(BALLOON_DEFLATE);
list_add(&page->lru, pages);
unlock_page(page);
@@ -211,6 +206,9 @@ static bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
unsigned long flags;
+ if (!b_dev_info)
+ return false;
+
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
list_del(&page->lru);
b_dev_info->isolated_pages++;
@@ -224,6 +222,10 @@ static void balloon_page_putback(struct page *page)
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
unsigned long flags;
+ /* Isolated balloon pages cannot get deflated. */
+ if (WARN_ON_ONCE(!b_dev_info))
+ return;
+
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
list_add(&page->lru, &b_dev_info->pages);
b_dev_info->isolated_pages--;
@@ -239,6 +241,10 @@ static int balloon_page_migrate(struct page *newpage, struct page *page,
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ /* Isolated balloon pages cannot get deflated. */
+ if (WARN_ON_ONCE(!balloon))
+ return -EAGAIN;
+
return balloon->migratepage(balloon, newpage, page, mode);
}
@@ -247,6 +253,5 @@ const struct movable_operations balloon_mops = {
.isolate_page = balloon_page_isolate,
.putback_page = balloon_page_putback,
};
-EXPORT_SYMBOL_GPL(balloon_mops);
#endif /* CONFIG_BALLOON_COMPACTION */
diff --git a/mm/cma.c b/mm/cma.c
index 397567883a10..2ffa4befb99a 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/sizes.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/log2.h>
#include <linux/cma.h>
#include <linux/highmem.h>
@@ -35,12 +36,6 @@
struct cma cma_areas[MAX_CMA_AREAS];
unsigned int cma_area_count;
-static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
- phys_addr_t size, phys_addr_t limit,
- phys_addr_t alignment, unsigned int order_per_bit,
- bool fixed, const char *name, struct cma **res_cma,
- int nid);
-
phys_addr_t cma_get_base(const struct cma *cma)
{
WARN_ON_ONCE(cma->nranges != 1);
@@ -358,6 +353,168 @@ static void __init list_insert_sorted(
}
}
+static int __init cma_fixed_reserve(phys_addr_t base, phys_addr_t size)
+{
+ if (IS_ENABLED(CONFIG_HIGHMEM)) {
+ phys_addr_t highmem_start = __pa(high_memory - 1) + 1;
+
+ /*
+ * If allocating at a fixed base the request region must not
+ * cross the low/high memory boundary.
+ */
+ if (base < highmem_start && base + size > highmem_start) {
+ pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
+ &base, &highmem_start);
+ return -EINVAL;
+ }
+ }
+
+ if (memblock_is_region_reserved(base, size) ||
+ memblock_reserve(base, size) < 0) {
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static phys_addr_t __init cma_alloc_mem(phys_addr_t base, phys_addr_t size,
+ phys_addr_t align, phys_addr_t limit, int nid)
+{
+ phys_addr_t addr = 0;
+
+ /*
+ * If there is enough memory, try a bottom-up allocation first.
+ * It will place the new cma area close to the start of the node
+ * and guarantee that the compaction is moving pages out of the
+ * cma area and not into it.
+ * Avoid using first 4GB to not interfere with constrained zones
+ * like DMA/DMA32.
+ */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ if (!memblock_bottom_up() && limit >= SZ_4G + size) {
+ memblock_set_bottom_up(true);
+ addr = memblock_alloc_range_nid(size, align, SZ_4G, limit,
+ nid, true);
+ memblock_set_bottom_up(false);
+ }
+#endif
+
+ /*
+ * On systems with HIGHMEM try allocating from there before consuming
+ * memory in lower zones.
+ */
+ if (!addr && IS_ENABLED(CONFIG_HIGHMEM)) {
+ phys_addr_t highmem = __pa(high_memory - 1) + 1;
+
+ /*
+ * All pages in the reserved area must come from the same zone.
+ * If the requested region crosses the low/high memory boundary,
+ * try allocating from high memory first and fall back to low
+ * memory in case of failure.
+ */
+ if (base < highmem && limit > highmem) {
+ addr = memblock_alloc_range_nid(size, align, highmem,
+ limit, nid, true);
+ limit = highmem;
+ }
+ }
+
+ if (!addr)
+ addr = memblock_alloc_range_nid(size, align, base, limit, nid,
+ true);
+
+ return addr;
+}
+
+static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
+ phys_addr_t size, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, const char *name, struct cma **res_cma