diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-06 14:08:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-06 14:08:17 -0700 |
commit | 512b7931ad0561ffe14265f9ff554a3c081b476b (patch) | |
tree | a94450d08468e094d2d92a495de4650faab09c1f /mm | |
parent | fe91c4725aeed35023ba4f7a1e1adfebb6878c23 (diff) | |
parent | 658f9ae761b5965893727dd4edcdad56e5a439bb (diff) | |
download | linux-512b7931ad0561ffe14265f9ff554a3c081b476b.tar.gz linux-512b7931ad0561ffe14265f9ff554a3c081b476b.tar.bz2 linux-512b7931ad0561ffe14265f9ff554a3c081b476b.zip |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"257 patches.
Subsystems affected by this patch series: scripts, ocfs2, vfs, and
mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache,
gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc,
pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools,
memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm,
vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram,
cleanups, kfence, and damon)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (257 commits)
mm/damon: remove return value from before_terminate callback
mm/damon: fix a few spelling mistakes in comments and a pr_debug message
mm/damon: simplify stop mechanism
Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions
Docs/admin-guide/mm/damon/start: simplify the content
Docs/admin-guide/mm/damon/start: fix a wrong link
Docs/admin-guide/mm/damon/start: fix wrong example commands
mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on
mm/damon: remove unnecessary variable initialization
Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM
mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM)
selftests/damon: support watermarks
mm/damon/dbgfs: support watermarks
mm/damon/schemes: activate schemes based on a watermarks mechanism
tools/selftests/damon: update for regions prioritization of schemes
mm/damon/dbgfs: support prioritization weights
mm/damon/vaddr,paddr: support pageout prioritization
mm/damon/schemes: prioritize regions within the quotas
mm/damon/selftests: support schemes quotas
mm/damon/dbgfs: support quotas of schemes
...
Diffstat (limited to 'mm')
65 files changed, 3682 insertions, 1136 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index d16ba9249bc5..ae1f151c2924 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -123,15 +123,11 @@ config ARCH_ENABLE_MEMORY_HOTPLUG config MEMORY_HOTPLUG bool "Allow for memory hot-add" select MEMORY_ISOLATION - depends on SPARSEMEM || X86_64_ACPI_NUMA + depends on SPARSEMEM depends on ARCH_ENABLE_MEMORY_HOTPLUG - depends on 64BIT || BROKEN + depends on 64BIT select NUMA_KEEP_MEMINFO if NUMA -config MEMORY_HOTPLUG_SPARSE - def_bool y - depends on SPARSEMEM && MEMORY_HOTPLUG - config MEMORY_HOTPLUG_DEFAULT_ONLINE bool "Online the newly added memory blocks by default" depends on MEMORY_HOTPLUG @@ -371,7 +367,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT select COMPACTION select XARRAY_MULTI help diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c878d995af06..1eead4761011 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -292,8 +292,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, memset(wb, 0, sizeof(*wb)); - if (wb != &bdi->wb) - bdi_get(bdi); wb->bdi = bdi; wb->last_old_flush = jiffies; INIT_LIST_HEAD(&wb->b_dirty); @@ -317,7 +315,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, err = fprop_local_init_percpu(&wb->completions, gfp); if (err) - goto out_put_bdi; + return err; for (i = 0; i < NR_WB_STAT_ITEMS; i++) { err = percpu_counter_init(&wb->stat[i], 0, gfp); @@ -331,9 +329,6 @@ out_destroy_stat: while (i--) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); -out_put_bdi: - if (wb != &bdi->wb) - bdi_put(bdi); return err; } @@ -374,8 +369,6 @@ static void wb_exit(struct bdi_writeback *wb) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); - if (wb != &wb->bdi->wb) - bdi_put(wb->bdi); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -398,6 +391,7 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); + struct backing_dev_info *bdi = wb->bdi; mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); @@ -417,6 +411,7 @@ static void cgwb_release_workfn(struct work_struct *work) percpu_ref_exit(&wb->refcnt); wb_exit(wb); + bdi_put(bdi); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); } @@ -498,6 +493,7 @@ static int cgwb_create(struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->b_attached); INIT_WORK(&wb->release_work, cgwb_release_workfn); set_bit(WB_registered, &wb->state); + bdi_get(bdi); /* * The root wb determines the registered state of the whole bdi and @@ -529,6 +525,7 @@ static int cgwb_create(struct backing_dev_info *bdi, goto out_put; err_fprop_exit: + bdi_put(bdi); fprop_local_destroy_percpu(&wb->memcg_completions); err_ref_exit: percpu_ref_exit(&wb->refcnt); @@ -959,14 +956,14 @@ void bdi_unregister(struct backing_dev_info *bdi) bdi->owner = NULL; } } +EXPORT_SYMBOL(bdi_unregister); static void release_bdi(struct kref *ref) { struct backing_dev_info *bdi = container_of(ref, struct backing_dev_info, refcnt); - if (test_bit(WB_registered, &bdi->wb.state)) - bdi_unregister(bdi); + WARN_ON_ONCE(test_bit(WB_registered, &bdi->wb.state)); WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); kfree(bdi); @@ -1058,51 +1055,3 @@ long congestion_wait(int sync, long timeout) return ret; } EXPORT_SYMBOL(congestion_wait); - -/** - * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes - * @sync: SYNC or ASYNC IO - * @timeout: timeout in jiffies - * - * In the event of a congested backing_dev (any backing_dev) this waits - * for up to @timeout jiffies for either a BDI to exit congestion of the - * given @sync queue or a write to complete. - * - * The return value is 0 if the sleep is for the full timeout. Otherwise, - * it is the number of jiffies that were still remaining when the function - * returned. return_value == timeout implies the function did not sleep. - */ -long wait_iff_congested(int sync, long timeout) -{ - long ret; - unsigned long start = jiffies; - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[sync]; - - /* - * If there is no congestion, yield if necessary instead - * of sleeping on the congestion queue - */ - if (atomic_read(&nr_wb_congested[sync]) == 0) { - cond_resched(); - - /* In case we scheduled, work out time remaining */ - ret = timeout - (jiffies - start); - if (ret < 0) - ret = 0; - - goto out; - } - - /* Sleep until uncongested or a write happens */ - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = io_schedule_timeout(timeout); - finish_wait(wqh, &wait); - -out: - trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout), - jiffies_to_usecs(jiffies - start)); - - return ret; -} -EXPORT_SYMBOL(wait_iff_congested); @@ -378,7 +378,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return 0; free_mem: - memblock_free(base, size); + memblock_phys_free(base, size); err: pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); return ret; @@ -524,6 +524,25 @@ out: return page; } +bool cma_pages_valid(struct cma *cma, const struct page *pages, + unsigned long count) +{ + unsigned long pfn; + + if (!cma || !pages) + return false; + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) { + pr_debug("%s(page %p, count %lu)\n", __func__, + (void *)pages, count); + return false; + } + + return true; +} + /** * cma_release() - release allocated pages * @cma: Contiguous memory region for which the allocation is performed. @@ -539,16 +558,13 @@ bool cma_release(struct cma *cma, const struct page *pages, { unsigned long pfn; - if (!cma || !pages) + if (!cma_pages_valid(cma, pages, count)) return false; pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); pfn = page_to_pfn(pages); - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); diff --git a/mm/compaction.c b/mm/compaction.c index fbc60f964c38..6e446094ce90 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -761,6 +761,8 @@ isolate_freepages_range(struct compact_control *cc, /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(pg_data_t *pgdat) { + bool too_many; + unsigned long active, inactive, isolated; inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + @@ -770,7 +772,11 @@ static bool too_many_isolated(pg_data_t *pgdat) isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + node_page_state(pgdat, NR_ISOLATED_ANON); - return isolated > (inactive + active) / 2; + too_many = isolated > (inactive + active) / 2; + if (!too_many) + wake_throttle_isolated(pgdat); + + return too_many; } /** @@ -822,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (cc->mode == MIGRATE_ASYNC) return -EAGAIN; - congestion_wait(BLK_RW_ASYNC, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); if (fatal_signal_pending(current)) return -EINTR; diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 37024798a97c..5bcf05851ad0 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -30,7 +30,15 @@ config DAMON_VADDR select PAGE_IDLE_FLAG help This builds the default data access monitoring primitives for DAMON - that works for virtual address spaces. + that work for virtual address spaces. + +config DAMON_PADDR + bool "Data access monitoring primitives for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that works for the physical address space. config DAMON_VADDR_KUNIT_TEST bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS @@ -46,7 +54,7 @@ config DAMON_VADDR_KUNIT_TEST config DAMON_DBGFS bool "DAMON debugfs interface" - depends on DAMON_VADDR && DEBUG_FS + depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS help This builds the debugfs interface for DAMON. The user space admins can use the interface for arbitrary data access monitoring. @@ -65,4 +73,16 @@ config DAMON_DBGFS_KUNIT_TEST If unsure, say N. +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile index fed4be3bace3..f7d5ac377a2b 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DAMON) := core.o -obj-$(CONFIG_DAMON_VADDR) += vaddr.o +obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o +obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/core.c b/mm/damon/core.c index 30e9211f494a..c381b3c525d0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -10,8 +10,10 @@ #include <linux/damon.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/mm.h> #include <linux/random.h> #include <linux/slab.h> +#include <linux/string.h> #define CREATE_TRACE_POINTS #include <trace/events/damon.h> @@ -45,6 +47,9 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->nr_accesses = 0; INIT_LIST_HEAD(®ion->list); + region->age = 0; + region->last_nr_accesses = 0; + return region; } @@ -82,6 +87,74 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t) damon_free_region(r); } +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->min_sz_region = min_sz_region; + scheme->max_sz_region = max_sz_region; + scheme->min_nr_accesses = min_nr_accesses; + scheme->max_nr_accesses = max_nr_accesses; + scheme->min_age_region = min_age_region; + scheme->max_age_region = max_age_region; + scheme->action = action; + scheme->stat_count = 0; + scheme->stat_sz = 0; + INIT_LIST_HEAD(&scheme->list); + + scheme->quota.ms = quota->ms; + scheme->quota.sz = quota->sz; + scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.weight_sz = quota->weight_sz; + scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; + scheme->quota.weight_age = quota->weight_age; + scheme->quota.total_charged_sz = 0; + scheme->quota.total_charged_ns = 0; + scheme->quota.esz = 0; + scheme->quota.charged_sz = 0; + scheme->quota.charged_from = 0; + scheme->quota.charge_target_from = NULL; + scheme->quota.charge_addr_from = 0; + + scheme->wmarks.metric = wmarks->metric; + scheme->wmarks.interval = wmarks->interval; + scheme->wmarks.high = wmarks->high; + scheme->wmarks.mid = wmarks->mid; + scheme->wmarks.low = wmarks->low; + scheme->wmarks.activated = true; + + return scheme; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + damon_del_scheme(s); + damon_free_scheme(s); +} + /* * Construct a damon_target struct * @@ -107,6 +180,11 @@ void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) list_add_tail(&t->list, &ctx->adaptive_targets); } +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + static void damon_del_target(struct damon_target *t) { list_del(&t->list); @@ -153,6 +231,7 @@ struct damon_ctx *damon_new_ctx(void) ctx->max_nr_regions = 1000; INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); return ctx; } @@ -172,7 +251,13 @@ static void damon_destroy_targets(struct damon_ctx *ctx) void damon_destroy_ctx(struct damon_ctx *ctx) { + struct damos *s, *next_s; + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + kfree(ctx); } @@ -248,6 +333,30 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, } /** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); + return 0; +} + +/** * damon_nr_running_ctxs() - Return number of currently running contexts. */ int damon_nr_running_ctxs(void) @@ -281,17 +390,6 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) return sz; } -static bool damon_kdamond_running(struct damon_ctx *ctx) -{ - bool running; - - mutex_lock(&ctx->kdamond_lock); - running = ctx->kdamond != NULL; - mutex_unlock(&ctx->kdamond_lock); - - return running; -} - static int kdamond_fn(void *data); /* @@ -309,12 +407,11 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; - ctx->kdamond_stop = false; ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); - ctx->kdamond = 0; + ctx->kdamond = NULL; } } mutex_unlock(&ctx->kdamond_lock); @@ -365,13 +462,15 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs) */ static int __damon_stop(struct damon_c |