diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
| commit | 6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch) | |
| tree | 2c425707f78642625dbe2c824c7fded2021e3dc7 /mm | |
| parent | 6aeadf7896bff4ca230702daba8788455e6b866e (diff) | |
| parent | acc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff) | |
| download | linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.gz linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.bz2 linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.zip | |
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton:
- Yosry Ahmed brought back some cgroup v1 stats in OOM logs
- Yosry has also eliminated cgroup's atomic rstat flushing
- Nhat Pham adds the new cachestat() syscall. It provides userspace
with the ability to query pagecache status - a similar concept to
mincore() but more powerful and with improved usability
- Mel Gorman provides more optimizations for compaction, reducing the
prevalence of page rescanning
- Lorenzo Stoakes has done some maintanance work on the
get_user_pages() interface
- Liam Howlett continues with cleanups and maintenance work to the
maple tree code. Peng Zhang also does some work on maple tree
- Johannes Weiner has done some cleanup work on the compaction code
- David Hildenbrand has contributed additional selftests for
get_user_pages()
- Thomas Gleixner has contributed some maintenance and optimization
work for the vmalloc code
- Baolin Wang has provided some compaction cleanups,
- SeongJae Park continues maintenance work on the DAMON code
- Huang Ying has done some maintenance on the swap code's usage of
device refcounting
- Christoph Hellwig has some cleanups for the filemap/directio code
- Ryan Roberts provides two patch series which yield some
rationalization of the kernel's access to pte entries - use the
provided APIs rather than open-coding accesses
- Lorenzo Stoakes has some fixes to the interaction between pagecache
and directio access to file mappings
- John Hubbard has a series of fixes to the MM selftesting code
- ZhangPeng continues the folio conversion campaign
- Hugh Dickins has been working on the pagetable handling code, mainly
with a view to reducing the load on the mmap_lock
- Catalin Marinas has reduced the arm64 kmalloc() minimum alignment
from 128 to 8
- Domenico Cerasuolo has improved the zswap reclaim mechanism by
reorganizing the LRU management
- Matthew Wilcox provides some fixups to make gfs2 work better with the
buffer_head code
- Vishal Moola also has done some folio conversion work
- Matthew Wilcox has removed the remnants of the pagevec code - their
functionality is migrated over to struct folio_batch
* tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits)
mm/hugetlb: remove hugetlb_set_page_subpool()
mm: nommu: correct the range of mmap_sem_read_lock in task_mem()
hugetlb: revert use of page_cache_next_miss()
Revert "page cache: fix page_cache_next/prev_miss off by one"
mm/vmscan: fix root proactive reclaim unthrottling unbalanced node
mm: memcg: rename and document global_reclaim()
mm: kill [add|del]_page_to_lru_list()
mm: compaction: convert to use a folio in isolate_migratepages_block()
mm: zswap: fix double invalidate with exclusive loads
mm: remove unnecessary pagevec includes
mm: remove references to pagevec
mm: rename invalidate_mapping_pagevec to mapping_try_invalidate
mm: remove struct pagevec
net: convert sunrpc from pagevec to folio_batch
i915: convert i915_gpu_error to use a folio_batch
pagevec: rename fbatch_count()
mm: remove check_move_unevictable_pages()
drm: convert drm_gem_put_pages() to use a folio_batch
i915: convert shmem_sg_free_table() to use a folio_batch
scatterlist: add sg_set_folio()
...
Diffstat (limited to 'mm')
95 files changed, 3779 insertions, 4046 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 7672a22647b4..12f32f8d26bf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -46,6 +46,22 @@ config ZSWAP_DEFAULT_ON The selection made here can be overridden by using the kernel command line 'zswap.enabled=' option. +config ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON + bool "Invalidate zswap entries when pages are loaded" + depends on ZSWAP + help + If selected, exclusive loads for zswap will be enabled at boot, + otherwise it will be disabled. + + If exclusive loads are enabled, when a page is loaded from zswap, + the zswap entry is invalidated at once, as opposed to leaving it + in zswap until the swap entry is freed. + + This avoids having two copies of the same page in memory + (compressed and uncompressed) after faulting in a page from zswap. + The cost is that if the page was never dirtied and needs to be + swapped out again, it will be re-compressed. + choice prompt "Default compressor" depends on ZSWAP diff --git a/mm/Makefile b/mm/Makefile index e29afc890cde..678530a07326 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -51,7 +51,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ - compaction.o \ + compaction.o show_mem.o\ interval_tree.o list_lru.o workingset.o \ debug.o gup.o mmap_lock.o $(mmu-y) @@ -89,6 +89,7 @@ obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_KFENCE) += kfence/ obj-$(CONFIG_KMSAN) += kmsan/ obj-$(CONFIG_FAILSLAB) += failslab.o +obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o obj-$(CONFIG_MEMTEST) += memtest.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_NUMA) += memory-tiers.o @@ -123,6 +124,7 @@ obj-$(CONFIG_SECRETMEM) += secretmem.o obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o +obj-$(CONFIG_DEBUG_PAGEALLOC) += debug_page_alloc.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_DAMON) += damon/ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7da9727fcdf3..3ffc3cfa7a14 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -20,7 +20,6 @@ struct backing_dev_info noop_backing_dev_info; EXPORT_SYMBOL_GPL(noop_backing_dev_info); -static struct class *bdi_class; static const char *bdi_unknown_name = "(unknown)"; /* @@ -345,13 +344,19 @@ static struct attribute *bdi_dev_attrs[] = { }; ATTRIBUTE_GROUPS(bdi_dev); +static const struct class bdi_class = { + .name = "bdi", + .dev_groups = bdi_dev_groups, +}; + static __init int bdi_class_init(void) { - bdi_class = class_create("bdi"); - if (IS_ERR(bdi_class)) - return PTR_ERR(bdi_class); + int ret; + + ret = class_register(&bdi_class); + if (ret) + return ret; - bdi_class->dev_groups = bdi_dev_groups; bdi_debug_init(); return 0; @@ -1001,7 +1006,7 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) return 0; vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args); - dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); + dev = device_create(&bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -483,8 +483,8 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, if (ret != -EBUSY) break; - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); + pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", + __func__, pfn, pfn_to_page(pfn)); trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), count, align); diff --git a/mm/compaction.c b/mm/compaction.c index c8bcdea15f5f..dbc9f86b1934 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -229,6 +229,33 @@ static void reset_cached_positions(struct zone *zone) pageblock_start_pfn(zone_end_pfn(zone) - 1); } +#ifdef CONFIG_SPARSEMEM +/* + * If the PFN falls into an offline section, return the start PFN of the + * next online section. If the PFN falls into an online section or if + * there is no next online section, return 0. + */ +static unsigned long skip_offline_sections(unsigned long start_pfn) +{ + unsigned long start_nr = pfn_to_section_nr(start_pfn); + + if (online_section_nr(start_nr)) + return 0; + + while (++start_nr <= __highest_present_section_nr) { + if (online_section_nr(start_nr)) + return section_nr_to_pfn(start_nr); + } + + return 0; +} +#else +static unsigned long skip_offline_sections(unsigned long start_pfn) +{ + return 0; +} +#endif + /* * Compound pages of >= pageblock_order should consistently be skipped until * released. It is always pointless to compact pages of such order (if they are @@ -392,18 +419,14 @@ void reset_isolation_suitable(pg_data_t *pgdat) * Sets the pageblock skip bit if it was clear. Note that this is a hint as * locks are not required for read/writers. Returns true if it was already set. */ -static bool test_and_set_skip(struct compact_control *cc, struct page *page, - unsigned long pfn) +static bool test_and_set_skip(struct compact_control *cc, struct page *page) { bool skip; - /* Do no update if skip hint is being ignored */ + /* Do not update if skip hint is being ignored */ if (cc->ignore_skip_hint) return false; - if (!pageblock_aligned(pfn)) - return false; - skip = get_pageblock_skip(page); if (!skip && !cc->no_set_skip_hint) set_pageblock_skip(page); @@ -440,9 +463,6 @@ static void update_pageblock_skip(struct compact_control *cc, if (cc->no_set_skip_hint) return; - if (!page) - return; - set_pageblock_skip(page); /* Update where async and sync compaction should restart */ @@ -470,8 +490,7 @@ static void update_cached_migrate(struct compact_control *cc, unsigned long pfn) { } -static bool test_and_set_skip(struct compact_control *cc, struct page *page, - unsigned long pfn) +static bool test_and_set_skip(struct compact_control *cc, struct page *page) { return false; } @@ -745,8 +764,9 @@ isolate_freepages_range(struct compact_control *cc, } /* Similar to reclaim, but different enough that they don't share logic */ -static bool too_many_isolated(pg_data_t *pgdat) +static bool too_many_isolated(struct compact_control *cc) { + pg_data_t *pgdat = cc->zone->zone_pgdat; bool too_many; unsigned long active, inactive, isolated; @@ -758,6 +778,17 @@ static bool too_many_isolated(pg_data_t *pgdat) isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + node_page_state(pgdat, NR_ISOLATED_ANON); + /* + * Allow GFP_NOFS to isolate past the limit set for regular + * compaction runs. This prevents an ABBA deadlock when other + * compactors have already isolated to the limit, but are + * blocked on filesystem locks held by the GFP_NOFS thread. + */ + if (cc->gfp_mask & __GFP_FS) { + inactive >>= 3; + active >>= 3; + } + too_many = isolated > (inactive + active) / 2; if (!too_many) wake_throttle_isolated(pgdat); @@ -791,6 +822,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, struct lruvec *lruvec; unsigned long flags = 0; struct lruvec *locked = NULL; + struct folio *folio = NULL; struct page *page = NULL, *valid_page = NULL; struct address_space *mapping; unsigned long start_pfn = low_pfn; @@ -806,7 +838,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * list by either parallel reclaimers or compaction. If there are, * delay for some time until fewer pages are isolated */ - while (unlikely(too_many_isolated(pgdat))) { + while (unlikely(too_many_isolated(cc))) { /* stop isolation if there are still pages not migrated */ if (cc->nr_migratepages) return -EAGAIN; @@ -887,7 +919,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!valid_page && pageblock_aligned(low_pfn)) { if (!isolation_suitable(cc, page)) { low_pfn = end_pfn; - page = NULL; + folio = NULL; goto isolate_abort; } valid_page = page; @@ -919,7 +951,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * Hugepage was successfully isolated and placed * on the cc->migratepages list. */ - low_pfn += compound_nr(page) - 1; + folio = page_folio(page); + low_pfn += folio_nr_pages(folio) - 1; goto isolate_success_no_list; } @@ -987,8 +1020,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, locked = NULL; } - if (isolate_movable_page(page, mode)) + if (isolate_movable_page(page, mode)) { + folio = page_folio(page); goto isolate_success; + } } goto isolate_fail; @@ -999,7 +1034,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * sure the page is not being freed elsewhere -- the * page release code relies on it. */ - if (unlikely(!get_page_unless_zero(page))) + folio = folio_get_nontail_page(page); + if (unlikely(!folio)) goto isolate_fail; /* @@ -1007,8 +1043,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * so avoid taking lru_lock and isolating it unnecessarily in an * admittedly racy check. */ - mapping = page_mapping(page); - if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + mapping = folio_mapping(folio); + if (!mapping && (folio_ref_count(folio) - 1) > folio_mapcount(folio)) goto isolate_fail_put; /* @@ -1019,11 +1055,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail_put; |
