summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
commitfb46e22a9e3863e08aef8815df9f17d0f4b9aede (patch)
tree83e052911fa8d8d90bcf9de2796e17e19040613f /kernel/fork.c
parentd30e51aa7b1f6fa7dd78d4598d1e4c047fcc3fb9 (diff)
parent5e0a760b44417f7cadd79de2204d6247109558a0 (diff)
downloadlinux-fb46e22a9e3863e08aef8815df9f17d0f4b9aede.tar.gz
linux-fb46e22a9e3863e08aef8815df9f17d0f4b9aede.tar.bz2
linux-fb46e22a9e3863e08aef8815df9f17d0f4b9aede.zip
Merge tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Many singleton patches against the MM code. The patch series which are included in this merge do the following: - Peng Zhang has done some mapletree maintainance work in the series 'maple_tree: add mt_free_one() and mt_attr() helpers' 'Some cleanups of maple tree' - In the series 'mm: use memmap_on_memory semantics for dax/kmem' Vishal Verma has altered the interworking between memory-hotplug and dax/kmem so that newly added 'device memory' can more easily have its memmap placed within that newly added memory. - Matthew Wilcox continues folio-related work (including a few fixes) in the patch series 'Add folio_zero_tail() and folio_fill_tail()' 'Make folio_start_writeback return void' 'Fix fault handler's handling of poisoned tail pages' 'Convert aops->error_remove_page to ->error_remove_folio' 'Finish two folio conversions' 'More swap folio conversions' - Kefeng Wang has also contributed folio-related work in the series 'mm: cleanup and use more folio in page fault' - Jim Cromie has improved the kmemleak reporting output in the series 'tweak kmemleak report format'. - In the series 'stackdepot: allow evicting stack traces' Andrey Konovalov to permits clients (in this case KASAN) to cause eviction of no longer needed stack traces. - Charan Teja Kalla has fixed some accounting issues in the page allocator's atomic reserve calculations in the series 'mm: page_alloc: fixes for high atomic reserve caluculations'. - Dmitry Rokosov has added to the samples/ dorectory some sample code for a userspace memcg event listener application. See the series 'samples: introduce cgroup events listeners'. - Some mapletree maintanance work from Liam Howlett in the series 'maple_tree: iterator state changes'. - Nhat Pham has improved zswap's approach to writeback in the series 'workload-specific and memory pressure-driven zswap writeback'. - DAMON/DAMOS feature and maintenance work from SeongJae Park in the series 'mm/damon: let users feed and tame/auto-tune DAMOS' 'selftests/damon: add Python-written DAMON functionality tests' 'mm/damon: misc updates for 6.8' - Yosry Ahmed has improved memcg's stats flushing in the series 'mm: memcg: subtree stats flushing and thresholds'. - In the series 'Multi-size THP for anonymous memory' Ryan Roberts has added a runtime opt-in feature to transparent hugepages which improves performance by allocating larger chunks of memory during anonymous page faults. - Matthew Wilcox has also contributed some cleanup and maintenance work against eh buffer_head code int he series 'More buffer_head cleanups'. - Suren Baghdasaryan has done work on Andrea Arcangeli's series 'userfaultfd move option'. UFFDIO_MOVE permits userspace heap compaction algorithms to move userspace's pages around rather than UFFDIO_COPY'a alloc/copy/free. - Stefan Roesch has developed a 'KSM Advisor', in the series 'mm/ksm: Add ksm advisor'. This is a governor which tunes KSM's scanning aggressiveness in response to userspace's current needs. - Chengming Zhou has optimized zswap's temporary working memory use in the series 'mm/zswap: dstmem reuse optimizations and cleanups'. - Matthew Wilcox has performed some maintenance work on the writeback code, both code and within filesystems. The series is 'Clean up the writeback paths'. - Andrey Konovalov has optimized KASAN's handling of alloc and free stack traces for secondary-level allocators, in the series 'kasan: save mempool stack traces'. - Andrey also performed some KASAN maintenance work in the series 'kasan: assorted clean-ups'. - David Hildenbrand has gone to town on the rmap code. Cleanups, more pte batching, folio conversions and more. See the series 'mm/rmap: interface overhaul'. - Kinsey Ho has contributed some maintenance work on the MGLRU code in the series 'mm/mglru: Kconfig cleanup'. - Matthew Wilcox has contributed lruvec page accounting code cleanups in the series 'Remove some lruvec page accounting functions'" * tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (361 commits) mm, treewide: rename MAX_ORDER to MAX_PAGE_ORDER mm, treewide: introduce NR_PAGE_ORDERS selftests/mm: add separate UFFDIO_MOVE test for PMD splitting selftests/mm: skip test if application doesn't has root privileges selftests/mm: conform test to TAP format output selftests: mm: hugepage-mmap: conform to TAP format output selftests/mm: gup_test: conform test to TAP format output mm/selftests: hugepage-mremap: conform test to TAP format output mm/vmstat: move pgdemote_* out of CONFIG_NUMA_BALANCING mm: zsmalloc: return -ENOSPC rather than -EINVAL in zs_malloc while size is too large mm/memcontrol: remove __mod_lruvec_page_state() mm/khugepaged: use a folio more in collapse_file() slub: use a folio in __kmalloc_large_node slub: use folio APIs in free_large_kmalloc() slub: use alloc_pages_node() in alloc_slab_page() mm: remove inc/dec lruvec page state functions mm: ratelimit stat flush from workingset shrinker kasan: stop leaking stack trace handles mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE mm/mglru: add dummy pmd_dirty() ...
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 10917c3e1f03..56cf276432c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -650,7 +650,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
int retval;
unsigned long charge = 0;
LIST_HEAD(uf);
- VMA_ITERATOR(old_vmi, oldmm, 0);
VMA_ITERATOR(vmi, mm, 0);
uprobe_start_dup_mmap();
@@ -678,16 +677,22 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
khugepaged_fork(mm, oldmm);
- retval = vma_iter_bulk_alloc(&vmi, oldmm->map_count);
- if (retval)
+ /* Use __mt_dup() to efficiently build an identical maple tree. */
+ retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
+ if (unlikely(retval))
goto out;
mt_clear_in_rcu(vmi.mas.tree);
- for_each_vma(old_vmi, mpnt) {
+ for_each_vma(vmi, mpnt) {
struct file *file;
vma_start_write(mpnt);
if (mpnt->vm_flags & VM_DONTCOPY) {
+ retval = vma_iter_clear_gfp(&vmi, mpnt->vm_start,
+ mpnt->vm_end, GFP_KERNEL);
+ if (retval)
+ goto loop_out;
+
vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
continue;
}
@@ -749,9 +754,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (is_vm_hugetlb_page(tmp))
hugetlb_dup_vma_private(tmp);
- /* Link the vma into the MT */
- if (vma_iter_bulk_store(&vmi, tmp))
- goto fail_nomem_vmi_store;
+ /*
+ * Link the vma into the MT. After using __mt_dup(), memory
+ * allocation is not necessary here, so it cannot fail.
+ */
+ vma_iter_bulk_store(&vmi, tmp);
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
@@ -760,15 +767,28 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
- if (retval)
+ if (retval) {
+ mpnt = vma_next(&vmi);
goto loop_out;
+ }
}
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
loop_out:
vma_iter_free(&vmi);
- if (!retval)
+ if (!retval) {
mt_set_in_rcu(vmi.mas.tree);
+ } else if (mpnt) {
+ /*
+ * The entire maple tree has already been duplicated. If the
+ * mmap duplication fails, mark the failure point with
+ * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
+ * stop releasing VMAs that have not been duplicated after this
+ * point.
+ */
+ mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
+ mas_store(&vmi.mas, XA_ZERO_ENTRY);
+ }
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@@ -778,8 +798,6 @@ fail_uprobe_end:
uprobe_end_dup_mmap();
return retval;
-fail_nomem_vmi_store:
- unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:
@@ -2928,7 +2946,7 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
- if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ if (IS_ENABLED(CONFIG_LRU_GEN_WALKS_MMU) && !(clone_flags & CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p->mm);