summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 19:42:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 19:42:02 -0700
commit7fa8a8ee9400fe8ec188426e40e481717bc5e924 (patch)
treecc8fd6b4f936ec01e73238643757451e20478c07 /include
parent91ec4b0d11fe115581ce2835300558802ce55e6c (diff)
parent4d4b6d66db63ceed399f1fb1a4b24081d2590eb1 (diff)
downloadlinux-7fa8a8ee9400fe8ec188426e40e481717bc5e924.tar.gz
linux-7fa8a8ee9400fe8ec188426e40e481717bc5e924.tar.bz2
linux-7fa8a8ee9400fe8ec188426e40e481717bc5e924.zip
Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Nick Piggin's "shoot lazy tlbs" series, to improve the peformance of switching from a user process to a kernel thread. - More folio conversions from Kefeng Wang, Zhang Peng and Pankaj Raghav. - zsmalloc performance improvements from Sergey Senozhatsky. - Yue Zhao has found and fixed some data race issues around the alteration of memcg userspace tunables. - VFS rationalizations from Christoph Hellwig: - removal of most of the callers of write_one_page() - make __filemap_get_folio()'s return value more useful - Luis Chamberlain has changed tmpfs so it no longer requires swap backing. Use `mount -o noswap'. - Qi Zheng has made the slab shrinkers operate locklessly, providing some scalability benefits. - Keith Busch has improved dmapool's performance, making part of its operations O(1) rather than O(n). - Peter Xu adds the UFFD_FEATURE_WP_UNPOPULATED feature to userfaultd, permitting userspace to wr-protect anon memory unpopulated ptes. - Kirill Shutemov has changed MAX_ORDER's meaning to be inclusive rather than exclusive, and has fixed a bunch of errors which were caused by its unintuitive meaning. - Axel Rasmussen give userfaultfd the UFFDIO_CONTINUE_MODE_WP feature, which causes minor faults to install a write-protected pte. - Vlastimil Babka has done some maintenance work on vma_merge(): cleanups to the kernel code and improvements to our userspace test harness. - Cleanups to do_fault_around() by Lorenzo Stoakes. - Mike Rapoport has moved a lot of initialization code out of various mm/ files and into mm/mm_init.c. - Lorenzo Stoakes removd vmf_insert_mixed_prot(), which was added for DRM, but DRM doesn't use it any more. - Lorenzo has also coverted read_kcore() and vread() to use iterators and has thereby removed the use of bounce buffers in some cases. - Lorenzo has also contributed further cleanups of vma_merge(). - Chaitanya Prakash provides some fixes to the mmap selftesting code. - Matthew Wilcox changes xfs and afs so they no longer take sleeping locks in ->map_page(), a step towards RCUification of pagefaults. - Suren Baghdasaryan has improved mmap_lock scalability by switching to per-VMA locking. - Frederic Weisbecker has reworked the percpu cache draining so that it no longer causes latency glitches on cpu isolated workloads. - Mike Rapoport cleans up and corrects the ARCH_FORCE_MAX_ORDER Kconfig logic. - Liu Shixin has changed zswap's initialization so we no longer waste a chunk of memory if zswap is not being used. - Yosry Ahmed has improved the performance of memcg statistics flushing. - David Stevens has fixed several issues involving khugepaged, userfaultfd and shmem. - Christoph Hellwig has provided some cleanup work to zram's IO-related code paths. - David Hildenbrand has fixed up some issues in the selftest code's testing of our pte state changing. - Pankaj Raghav has made page_endio() unneeded and has removed it. - Peter Xu contributed some rationalizations of the userfaultfd selftests. - Yosry Ahmed has fixed an issue around memcg's page recalim accounting. - Chaitanya Prakash has fixed some arm-related issues in the selftests/mm code. - Longlong Xia has improved the way in which KSM handles hwpoisoned pages. - Peter Xu fixes a few issues with uffd-wp at fork() time. - Stefan Roesch has changed KSM so that it may now be used on a per-process and per-cgroup basis. * tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm,unmap: avoid flushing TLB in batch if PTE is inaccessible shmem: restrict noswap option to initial user namespace mm/khugepaged: fix conflicting mods to collapse_file() sparse: remove unnecessary 0 values from rc mm: move 'mmap_min_addr' logic from callers into vm_unmapped_area() hugetlb: pte_alloc_huge() to replace huge pte_alloc_map() maple_tree: fix allocation in mas_sparse_area() mm: do not increment pgfault stats when page fault handler retries zsmalloc: allow only one active pool compaction context selftests/mm: add new selftests for KSM mm: add new KSM process and sysfs knobs mm: add new api to enable ksm per process mm: shrinkers: fix debugfs file permissions mm: don't check VMA write permissions if the PTE/PMD indicates write permissions migrate_pages_batch: fix statistics for longterm pin retry userfaultfd: use helper function range_in_vma() lib/show_mem.c: use for_each_populated_zone() simplify code mm: correct arg in reclaim_pages()/reclaim_clean_pages_from_list() fs/buffer: convert create_page_buffers to folio_create_buffers fs/buffer: add folio_create_empty_buffers helper ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgalloc.h4
-rw-r--r--include/drm/ttm/ttm_pool.h2
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/gfp.h7
-rw-r--r--include/linux/gfp_types.h30
-rw-r--r--include/linux/highmem.h62
-rw-r--r--include/linux/huge_mm.h41
-rw-r--r--include/linux/hugetlb.h46
-rw-r--r--include/linux/io-mapping.h20
-rw-r--r--include/linux/kmsan.h43
-rw-r--r--include/linux/ksm.h37
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--include/linux/memfd.h4
-rw-r--r--include/linux/mm.h206
-rw-r--r--include/linux/mm_inline.h6
-rw-r--r--include/linux/mm_types.h46
-rw-r--r--include/linux/mmap_lock.h37
-rw-r--r--include/linux/mmzone.h34
-rw-r--r--include/linux/page-flags.h23
-rw-r--r--include/linux/page_ext.h2
-rw-r--r--include/linux/pageblock-flags.h4
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/pgtable.h9
-rw-r--r--include/linux/sched/coredump.h1
-rw-r--r--include/linux/sched/isolation.h12
-rw-r--r--include/linux/sched/mm.h28
-rw-r--r--include/linux/shmem_fs.h19
-rw-r--r--include/linux/slab.h5
-rw-r--r--include/linux/swap.h38
-rw-r--r--include/linux/uio.h2
-rw-r--r--include/linux/userfaultfd_k.h92
-rw-r--r--include/linux/vm_event_item.h6
-rw-r--r--include/linux/vmalloc.h7
-rw-r--r--include/linux/vmstat.h6
-rw-r--r--include/trace/events/cma.h58
-rw-r--r--include/trace/events/huge_memory.h5
-rw-r--r--include/trace/events/ksm.h251
-rw-r--r--include/trace/events/mmflags.h94
-rw-r--r--include/uapi/linux/prctl.h4
-rw-r--r--include/uapi/linux/userfaultfd.h17
42 files changed, 977 insertions, 366 deletions
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 977bea16cf1b..a7cf825befae 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -123,11 +123,11 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
- page = alloc_pages(gfp, 0);
+ page = alloc_page(gfp);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
- __free_pages(page, 0);
+ __free_page(page);
return NULL;
}
return (pmd_t *)page_address(page);
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index ef09b23d29e3..8ce14f9d202a 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -72,7 +72,7 @@ struct ttm_pool {
bool use_dma32;
struct {
- struct ttm_pool_type orders[MAX_ORDER];
+ struct ttm_pool_type orders[MAX_ORDER + 1];
} caching[TTM_NUM_CACHING_TYPES];
};
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 8f14dca5fed7..1520793c72da 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -196,11 +196,17 @@ void mark_buffer_write_io_error(struct buffer_head *bh);
void touch_buffer(struct buffer_head *bh);
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset);
+void folio_set_bh(struct buffer_head *bh, struct folio *folio,
+ unsigned long offset);
bool try_to_free_buffers(struct folio *);
+struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
+ bool retry);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry);
void create_empty_buffers(struct page *, unsigned long,
unsigned long b_state);
+void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
+ unsigned long b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
void end_buffer_async_write(struct buffer_head *bh, int uptodate);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3410aecffdb4..885f5395fcd0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -692,7 +692,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
*/
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
+void cgroup_rstat_flush_atomic(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 65a78773dcca..ed8cb537c6a7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -319,7 +319,7 @@ extern void page_frag_free(void *addr);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
-void page_alloc_init(void);
+void page_alloc_init_cpuhp(void);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
@@ -361,9 +361,4 @@ extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
#endif
void free_contig_range(unsigned long pfn, unsigned long nr_pages);
-#ifdef CONFIG_CMA
-/* CMA stuff */
-extern void init_cma_reserved_pageblock(struct page *page);
-#endif
-
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 5088637fe5c2..6583a58670c5 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -47,16 +47,14 @@ typedef unsigned int __bitwise gfp_t;
#define ___GFP_ACCOUNT 0x400000u
#define ___GFP_ZEROTAGS 0x800000u
#ifdef CONFIG_KASAN_HW_TAGS
-#define ___GFP_SKIP_ZERO 0x1000000u
-#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u
-#define ___GFP_SKIP_KASAN_POISON 0x4000000u
+#define ___GFP_SKIP_ZERO 0x1000000u
+#define ___GFP_SKIP_KASAN 0x2000000u
#else
-#define ___GFP_SKIP_ZERO 0
-#define ___GFP_SKIP_KASAN_UNPOISON 0
-#define ___GFP_SKIP_KASAN_POISON 0
+#define ___GFP_SKIP_ZERO 0
+#define ___GFP_SKIP_KASAN 0
#endif
#ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP 0x8000000u
+#define ___GFP_NOLOCKDEP 0x4000000u
#else
#define ___GFP_NOLOCKDEP 0
#endif
@@ -234,25 +232,24 @@ typedef unsigned int __bitwise gfp_t;
* memory tags at the same time as zeroing memory has minimal additional
* performace impact.
*
- * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation.
- * Only effective in HW_TAGS mode.
- *
- * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation.
- * Typically, used for userspace pages. Only effective in HW_TAGS mode.
+ * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
+ * Used for userspace and vmalloc pages; the latter are unpoisoned by
+ * kasan_unpoison_vmalloc instead. For userspace pages, results in
+ * poisoning being skipped as well, see should_skip_kasan_poison for
+ * details. Only effective in HW_TAGS mode.
*/
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
-#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON)
-#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
+#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN)
/* Disable lockdep for GFP context tracking */
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/**
@@ -335,8 +332,7 @@ typedef unsigned int __bitwise gfp_t;
#define GFP_DMA __GFP_DMA
#define GFP_DMA32 __GFP_DMA32
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \
- __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON)
+#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN)
#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 8fc10089e19e..4de1dbcd3ef6 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -243,12 +243,10 @@ static inline void clear_highpage(struct page *page)
static inline void clear_highpage_kasan_tagged(struct page *page)
{
- u8 tag;
+ void *kaddr = kmap_local_page(page);
- tag = page_kasan_tag(page);
- page_kasan_tag_reset(page);
- clear_highpage(page);
- page_kasan_tag_set(page, tag);
+ clear_page(kasan_reset_tag(kaddr));
+ kunmap_local(kaddr);
}
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
@@ -317,7 +315,29 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
#endif
+#ifndef __HAVE_ARCH_COPY_HIGHPAGE
+
+static inline void copy_highpage(struct page *to, struct page *from)
+{
+ char *vfrom, *vto;
+
+ vfrom = kmap_local_page(from);
+ vto = kmap_local_page(to);
+ copy_page(vto, vfrom);
+ kmsan_copy_page_meta(to, from);
+ kunmap_local(vto);
+ kunmap_local(vfrom);
+}
+
+#endif
+
#ifdef copy_mc_to_kernel
+/*
+ * If architecture supports machine check exception handling, define the
+ * #MC versions of copy_user_highpage and copy_highpage. They copy a memory
+ * page with #MC in source page (@from) handled, and return the number
+ * of bytes not copied if there was a #MC, otherwise 0 for success.
+ */
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
@@ -334,29 +354,35 @@ static inline int copy_mc_user_highpage(struct page *to, struct page *from,
return ret;
}
-#else
-static inline int copy_mc_user_highpage(struct page *to, struct page *from,
- unsigned long vaddr, struct vm_area_struct *vma)
-{
- copy_user_highpage(to, from, vaddr, vma);
- return 0;
-}
-#endif
-
-#ifndef __HAVE_ARCH_COPY_HIGHPAGE
-static inline void copy_highpage(struct page *to, struct page *from)
+static inline int copy_mc_highpage(struct page *to, struct page *from)
{
+ unsigned long ret;
char *vfrom, *vto;
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
- copy_page(vto, vfrom);
- kmsan_copy_page_meta(to, from);
+ ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
+ if (!ret)
+ kmsan_copy_page_meta(to, from);
kunmap_local(vto);
kunmap_local(vfrom);
+
+ return ret;
+}
+#else
+static inline int copy_mc_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ copy_user_highpage(to, from, vaddr, vma);
+ return 0;
}
+static inline int copy_mc_highpage(struct page *to, struct page *from)
+{
+ copy_highpage(to, from);
+ return 0;
+}
#endif
static inline void memcpy_page(struct page *dst_page, size_t dst_off,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 70bd867eba94..20284387b841 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,47 +39,12 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, pgprot_t newprot,
unsigned long cp_flags);
-vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
- pgprot_t pgprot, bool write);
-/**
- * vmf_insert_pfn_pmd - insert a pmd size pfn
- * @vmf: Structure describing the fault
- * @pfn: pfn to insert
- * @pgprot: page protection to use
- * @write: whether it's a write fault
- *
- * Insert a pmd size pfn. See vmf_insert_pfn() for additional info.
- *
- * Return: vm_fault_t value.
- */
-static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn,
- bool write)
-{
- return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
-}
-vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
- pgprot_t pgprot, bool write);
-
-/**
- * vmf_insert_pfn_pud - insert a pud size pfn
- * @vmf: Structure describing the fault
- * @pfn: pfn to insert
- * @pgprot: page protection to use
- * @write: whether it's a write fault
- *
- * Insert a pud size pfn. See vmf_insert_pfn() for additional info.
- *
- * Return: vm_fault_t value.
- */
-static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
- bool write)
-{
- return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
-}
+vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
enum transparent_hugepage_flag {
- TRANSPARENT_HUGEPAGE_NEVER_DAX,
+ TRANSPARENT_HUGEPAGE_UNSUPPORTED,
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4056b05d81ed..6d041aa9f0fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -150,13 +150,12 @@ unsigned long hugetlb_total_pages(void);
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
#ifdef CONFIG_USERFAULTFD
-int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- enum mcopy_atomic_mode mode,
- struct page **pagep,
- bool wp_copy);
+int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
@@ -184,6 +183,23 @@ extern struct list_head huge_boot_pages;
/* arch callbacks */
+#ifndef CONFIG_HIGHPTE
+/*
+ * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures
+ * which may go down to the lowest PTE level in their huge_pte_offset() and
+ * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap().
+ */
+static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address)
+{
+ return pte_offset_kernel(pmd, address);
+}
+static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
+{
+ return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address);
+}
+#endif
+
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz);
/*
@@ -385,14 +401,12 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
}
#ifdef CONFIG_USERFAULTFD
-static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
- pte_t *dst_pte,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- enum mcopy_atomic_mode mode,
- struct page **pagep,
- bool wp_copy)
+static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ uffd_flags_t flags,
+ struct folio **foliop)
{
BUG();
return 0;
@@ -810,7 +824,7 @@ static inline unsigned huge_page_shift(struct hstate *h)
static inline bool hstate_is_gigantic(struct hstate *h)
{
- return huge_page_order(h) >= MAX_ORDER;
+ return huge_page_order(h) > MAX_ORDER;
}
static inline unsigned int pages_per_huge_page(const struct hstate *h)
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 09d4f17c8d3b..7376c1df9c90 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
BUG_ON(offset >= mapping->size);
phys_addr = mapping->base + offset;
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+ else
+ migrate_disable();
pagefault_disable();
return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
}
@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
{
kunmap_local_indexed((void __force *)vaddr);
pagefault_enable();
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ else
+ migrate_enable();
}
static inline void __iomem *
@@ -162,7 +168,10 @@ static inline void __iomem *
io_mapping_map_atomic_wc(struct io_mapping *mapping,
unsigned long offset)
{
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+ else
+ migrate_disable();
pagefault_disable();
return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
}
@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
{
io_mapping_unmap(vaddr);
pagefault_enable();
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ else
+ migrate_enable();
}
static inline void __iomem *
diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index 30b17647ce3c..e0c23a32cdf0 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -54,7 +54,8 @@ void __init kmsan_init_runtime(void);
* Freed pages are either returned to buddy allocator or held back to be used
* as metadata pages.
*/
-bool __init kmsan_memblock_free_pages(struct page *page, unsigned int order);
+bool __init __must_check kmsan_memblock_free_pages(struct page *page,
+ unsigned int order);
/**
* kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call.
@@ -137,9 +138,11 @@ void kmsan_kfree_large(const void *ptr);
* vmalloc metadata address range. Returns 0 on success, callers must check
* for non-zero return value.
*/
-int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
- pgprot_t prot, struct page **pages,
- unsigned int page_shift);
+int __must_check kmsan_vmap_pages_range_noflush(unsigned long start,
+ unsigned long end,
+ pgprot_t prot,
+ struct page **pages,
+ unsigned int page_shift);
/**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
@@ -163,9 +166,9 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end);
* virtual memory. Returns 0 on success, callers must check for non-zero return
* value.
*/
-int kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int page_shift);
+int __must_check kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int page_shift);
/**
* kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
@@ -237,8 +240,8 @@ static inline void kmsan_init_runtime(void)
{
}
-static inline bool kmsan_memblock_free_pages(struct page *page,
- unsigned int order)
+static inline bool __must_check kmsan_memblock_free_pages(struct page *page,
+ unsigned int order)
{
return true;
}
@@ -251,10 +254,9 @@ static inline void kmsan_task_exit(struct task_struct *task)
{
}
-static inline int kmsan_alloc_page(struct page *page, unsigned int order,
- gfp_t flags)
+static inline void kmsan_alloc_page(struct page *page, unsigned int order,
+ gfp_t flags)
{
- return 0;
}
static inline void kmsan_free_page(struct page *page, unsigned int order)
@@ -283,11 +285,9 @@ static inline void kmsan_kfree_large(const void *ptr)
{
}
-static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
- unsigned long end,
- pgprot_t prot,
- struct page **pages,
- unsigned int page_shift)
+static inline int __must_check kmsan_vmap_pages_range_noflush(
+ unsigned long start, unsigned long end, pgprot_t prot,
+ struct page **pages, unsigned int page_shift)
{
return 0;
}
@@ -297,10 +297,11 @@ static inline void kmsan_vunmap_range_noflush(unsigned long start,
{
}
-static inline int kmsan_ioremap_page_range(unsigned long start,
- unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int page_shift)
+static inline int __must_check kmsan_ioremap_page_range(unsigned long start,
+ unsigned long end,
+ phys_addr_t phys_addr,
+ pgprot_t prot,
+ unsigned int page_shift)
{
return 0;
}
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 7e232ba59b86..7a9b76fb6c3f 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -18,13 +18,26 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
+
+void ksm_add_vma(struct vm_area_struct *vma);
+int ksm_enable_merge_any(struct mm_struct *mm);
+
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
- return __ksm_enter(mm);
+ int ret;
+
+ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) {
+ ret = __ksm_enter(mm);
+ if (ret)
+ return ret;
+ }
+
+ if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags))
+ set_bit(MMF_VM_MERGE_ANY, &mm->flags);
+
return 0;
}
@@ -51,8 +64,21 @@ struct page *ksm_might_need_to_copy(struct page *page,
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
+#ifdef CONFIG_MEMORY_FAILURE
+void collect_procs_ksm(struct page *page, struct list_head *to_kill,
+ int force_early);
+#endif
+
+#ifdef CONFIG_PROC_FS
+long ksm_process_profit(struct mm_struct *);
+#endif /* CONFIG_PROC_FS */
+
#else /* !CONFIG_KSM */
+static inline void ksm_add_vma(struct vm_area_struct *vma)
+{
+}
+
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
return 0;
@@ -62,6 +88,13 @@ static inline void ksm_exit(struct mm_struct *mm)
{
}
+#ifdef CONFIG_MEMORY_FAILURE
+static inline void collect_procs_ksm(struct page *page,
+ struct list_head *to_kill, int force_early)
+{
+}
+#endif
+
#ifdef CONFIG_MMU