summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
commit6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch)
tree2c425707f78642625dbe2c824c7fded2021e3dc7 /mm
parent6aeadf7896bff4ca230702daba8788455e6b866e (diff)
parentacc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff)
downloadlinux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.gz
linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.bz2
linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.zip
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig16
-rw-r--r--mm/Makefile4
-rw-r--r--mm/backing-dev.c17
-rw-r--r--mm/cma.c4
-rw-r--r--mm/compaction.c334
-rw-r--r--mm/damon/core-test.h24
-rw-r--r--mm/damon/ops-common.c32
-rw-r--r--mm/damon/ops-common.h4
-rw-r--r--mm/damon/paddr.c6
-rw-r--r--mm/damon/vaddr.c26
-rw-r--r--mm/debug.c9
-rw-r--r--mm/debug_page_alloc.c59
-rw-r--r--mm/debug_vm_pgtable.c9
-rw-r--r--mm/dmapool.c10
-rw-r--r--mm/early_ioremap.c8
-rw-r--r--mm/fadvise.c17
-rw-r--r--mm/fail_page_alloc.c66
-rw-r--r--mm/filemap.c450
-rw-r--r--mm/frontswap.c10
-rw-r--r--mm/gup.c406
-rw-r--r--mm/gup_test.c27
-rw-r--r--mm/highmem.c12
-rw-r--r--mm/hmm.c6
-rw-r--r--mm/huge_memory.c56
-rw-r--r--mm/hugetlb.c126
-rw-r--r--mm/hugetlb_vmemmap.c17
-rw-r--r--mm/internal.h87
-rw-r--r--mm/kasan/common.c2
-rw-r--r--mm/kasan/generic.c76
-rw-r--r--mm/kasan/init.c9
-rw-r--r--mm/kasan/kasan.h159
-rw-r--r--mm/kasan/report.c44
-rw-r--r--mm/kasan/report_generic.c12
-rw-r--r--mm/kasan/report_hw_tags.c2
-rw-r--r--mm/kasan/report_sw_tags.c2
-rw-r--r--mm/kasan/shadow.c46
-rw-r--r--mm/kasan/sw_tags.c20
-rw-r--r--mm/kasan/tags.c2
-rw-r--r--mm/khugepaged.c125
-rw-r--r--mm/kmsan/core.c6
-rw-r--r--mm/kmsan/instrumentation.c2
-rw-r--r--mm/ksm.c38
-rw-r--r--mm/madvise.c150
-rw-r--r--mm/mapping_dirty_helpers.c38
-rw-r--r--mm/memblock.c33
-rw-r--r--mm/memcontrol.c253
-rw-r--r--mm/memory-failure.c45
-rw-r--r--mm/memory-tiers.c3
-rw-r--r--mm/memory.c341
-rw-r--r--mm/memory_hotplug.c42
-rw-r--r--mm/mempolicy.c28
-rw-r--r--mm/migrate.c382
-rw-r--r--mm/migrate_device.c46
-rw-r--r--mm/mincore.c11
-rw-r--r--mm/mlock.c10
-rw-r--r--mm/mm_init.c154
-rw-r--r--mm/mmap.c222
-rw-r--r--mm/mprotect.c87
-rw-r--r--mm/mremap.c32
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c873
-rw-r--r--mm/page_isolation.c33
-rw-r--r--mm/page_owner.c2
-rw-r--r--mm/page_table_check.c6
-rw-r--r--mm/page_vma_mapped.c114
-rw-r--r--mm/pagewalk.c33
-rw-r--r--mm/percpu-internal.h11
-rw-r--r--mm/pgtable-generic.c58
-rw-r--r--mm/process_vm_access.c2
-rw-r--r--mm/ptdump.c2
-rw-r--r--mm/readahead.c1
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/secretmem.c4
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/show_mem.c429
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h5
-rw-r--r--mm/slab_common.c41
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/sparse.c10
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swap_state.c87
-rw-r--r--mm/swapfile.c109
-rw-r--r--mm/truncate.c27
-rw-r--r--mm/userfaultfd.c12
-rw-r--r--mm/vmalloc.c130
-rw-r--r--mm/vmscan.c197
-rw-r--r--mm/vmstat.c15
-rw-r--r--mm/workingset.c158
-rw-r--r--mm/z3fold.c249
-rw-r--r--mm/zbud.c167
-rw-r--r--mm/zpool.c48
-rw-r--r--mm/zsmalloc.c408
-rw-r--r--mm/zswap.c239
95 files changed, 3779 insertions, 4046 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 7672a22647b4..12f32f8d26bf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -46,6 +46,22 @@ config ZSWAP_DEFAULT_ON
The selection made here can be overridden by using the kernel
command line 'zswap.enabled=' option.
+config ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON
+ bool "Invalidate zswap entries when pages are loaded"
+ depends on ZSWAP
+ help
+ If selected, exclusive loads for zswap will be enabled at boot,
+ otherwise it will be disabled.
+
+ If exclusive loads are enabled, when a page is loaded from zswap,
+ the zswap entry is invalidated at once, as opposed to leaving it
+ in zswap until the swap entry is freed.
+
+ This avoids having two copies of the same page in memory
+ (compressed and uncompressed) after faulting in a page from zswap.
+ The cost is that if the page was never dirtied and needs to be
+ swapped out again, it will be re-compressed.
+
choice
prompt "Default compressor"
depends on ZSWAP
diff --git a/mm/Makefile b/mm/Makefile
index e29afc890cde..678530a07326 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -51,7 +51,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o percpu.o slab_common.o \
- compaction.o \
+ compaction.o show_mem.o\
interval_tree.o list_lru.o workingset.o \
debug.o gup.o mmap_lock.o $(mmu-y)
@@ -89,6 +89,7 @@ obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_KMSAN) += kmsan/
obj-$(CONFIG_FAILSLAB) += failslab.o
+obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
@@ -123,6 +124,7 @@ obj-$(CONFIG_SECRETMEM) += secretmem.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
+obj-$(CONFIG_DEBUG_PAGEALLOC) += debug_page_alloc.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_DAMON) += damon/
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7da9727fcdf3..3ffc3cfa7a14 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -20,7 +20,6 @@
struct backing_dev_info noop_backing_dev_info;
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
-static struct class *bdi_class;
static const char *bdi_unknown_name = "(unknown)";
/*
@@ -345,13 +344,19 @@ static struct attribute *bdi_dev_attrs[] = {
};
ATTRIBUTE_GROUPS(bdi_dev);
+static const struct class bdi_class = {
+ .name = "bdi",
+ .dev_groups = bdi_dev_groups,
+};
+
static __init int bdi_class_init(void)
{
- bdi_class = class_create("bdi");
- if (IS_ERR(bdi_class))
- return PTR_ERR(bdi_class);
+ int ret;
+
+ ret = class_register(&bdi_class);
+ if (ret)
+ return ret;
- bdi_class->dev_groups = bdi_dev_groups;
bdi_debug_init();
return 0;
@@ -1001,7 +1006,7 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
return 0;
vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
- dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
+ dev = device_create(&bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
if (IS_ERR(dev))
return PTR_ERR(dev);
diff --git a/mm/cma.c b/mm/cma.c
index 6268d6620254..a4cfe995e11e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -483,8 +483,8 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
if (ret != -EBUSY)
break;
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
+ pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
+ __func__, pfn, pfn_to_page(pfn));
trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
count, align);
diff --git a/mm/compaction.c b/mm/compaction.c
index c8bcdea15f5f..dbc9f86b1934 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -229,6 +229,33 @@ static void reset_cached_positions(struct zone *zone)
pageblock_start_pfn(zone_end_pfn(zone) - 1);
}
+#ifdef CONFIG_SPARSEMEM
+/*
+ * If the PFN falls into an offline section, return the start PFN of the
+ * next online section. If the PFN falls into an online section or if
+ * there is no next online section, return 0.
+ */
+static unsigned long skip_offline_sections(unsigned long start_pfn)
+{
+ unsigned long start_nr = pfn_to_section_nr(start_pfn);
+
+ if (online_section_nr(start_nr))
+ return 0;
+
+ while (++start_nr <= __highest_present_section_nr) {
+ if (online_section_nr(start_nr))
+ return section_nr_to_pfn(start_nr);
+ }
+
+ return 0;
+}
+#else
+static unsigned long skip_offline_sections(unsigned long start_pfn)
+{
+ return 0;
+}
+#endif
+
/*
* Compound pages of >= pageblock_order should consistently be skipped until
* released. It is always pointless to compact pages of such order (if they are
@@ -392,18 +419,14 @@ void reset_isolation_suitable(pg_data_t *pgdat)
* Sets the pageblock skip bit if it was clear. Note that this is a hint as
* locks are not required for read/writers. Returns true if it was already set.
*/
-static bool test_and_set_skip(struct compact_control *cc, struct page *page,
- unsigned long pfn)
+static bool test_and_set_skip(struct compact_control *cc, struct page *page)
{
bool skip;
- /* Do no update if skip hint is being ignored */
+ /* Do not update if skip hint is being ignored */
if (cc->ignore_skip_hint)
return false;
- if (!pageblock_aligned(pfn))
- return false;
-
skip = get_pageblock_skip(page);
if (!skip && !cc->no_set_skip_hint)
set_pageblock_skip(page);
@@ -440,9 +463,6 @@ static void update_pageblock_skip(struct compact_control *cc,
if (cc->no_set_skip_hint)
return;
- if (!page)
- return;
-
set_pageblock_skip(page);
/* Update where async and sync compaction should restart */
@@ -470,8 +490,7 @@ static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
{
}
-static bool test_and_set_skip(struct compact_control *cc, struct page *page,
- unsigned long pfn)
+static bool test_and_set_skip(struct compact_control *cc, struct page *page)
{
return false;
}
@@ -745,8 +764,9 @@ isolate_freepages_range(struct compact_control *cc,
}
/* Similar to reclaim, but different enough that they don't share logic */
-static bool too_many_isolated(pg_data_t *pgdat)
+static bool too_many_isolated(struct compact_control *cc)
{
+ pg_data_t *pgdat = cc->zone->zone_pgdat;
bool too_many;
unsigned long active, inactive, isolated;
@@ -758,6 +778,17 @@ static bool too_many_isolated(pg_data_t *pgdat)
isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
node_page_state(pgdat, NR_ISOLATED_ANON);
+ /*
+ * Allow GFP_NOFS to isolate past the limit set for regular
+ * compaction runs. This prevents an ABBA deadlock when other
+ * compactors have already isolated to the limit, but are
+ * blocked on filesystem locks held by the GFP_NOFS thread.
+ */
+ if (cc->gfp_mask & __GFP_FS) {
+ inactive >>= 3;
+ active >>= 3;
+ }
+
too_many = isolated > (inactive + active) / 2;
if (!too_many)
wake_throttle_isolated(pgdat);
@@ -791,6 +822,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
struct lruvec *lruvec;
unsigned long flags = 0;
struct lruvec *locked = NULL;
+ struct folio *folio = NULL;
struct page *page = NULL, *valid_page = NULL;
struct address_space *mapping;
unsigned long start_pfn = low_pfn;
@@ -806,7 +838,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* list by either parallel reclaimers or compaction. If there are,
* delay for some time until fewer pages are isolated
*/
- while (unlikely(too_many_isolated(pgdat))) {
+ while (unlikely(too_many_isolated(cc))) {
/* stop isolation if there are still pages not migrated */
if (cc->nr_migratepages)
return -EAGAIN;
@@ -887,7 +919,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (!valid_page && pageblock_aligned(low_pfn)) {
if (!isolation_suitable(cc, page)) {
low_pfn = end_pfn;
- page = NULL;
+ folio = NULL;
goto isolate_abort;
}
valid_page = page;
@@ -919,7 +951,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* Hugepage was successfully isolated and placed
* on the cc->migratepages list.
*/
- low_pfn += compound_nr(page) - 1;
+ folio = page_folio(page);
+ low_pfn += folio_nr_pages(folio) - 1;
goto isolate_success_no_list;
}
@@ -987,8 +1020,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
locked = NULL;
}
- if (isolate_movable_page(page, mode))
+ if (isolate_movable_page(page, mode)) {
+ folio = page_folio(page);
goto isolate_success;
+ }
}
goto isolate_fail;
@@ -999,7 +1034,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
- if (unlikely(!get_page_unless_zero(page)))
+ folio = folio_get_nontail_page(page);
+ if (unlikely(!folio))
goto isolate_fail;
/*
@@ -1007,8 +1043,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
- mapping = page_mapping(page);
- if (!mapping && (page_count(page) - 1) > total_mapcount(page))
+ mapping = folio_mapping(folio);
+ if (!mapping && (folio_ref_count(folio) - 1) > folio_mapcount(folio))
goto isolate_fail_put;
/*
@@ -1019,11 +1055,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_fail_put;