summaryrefslogtreecommitdiff
path: root/include/linux/swapops.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
commit98931dd95fd489fcbfa97da563505a6f071d7c77 (patch)
tree44683fc4a92efa614acdca2742a7ff19d26da1e3 /include/linux/swapops.h
parentdf202b452fe6c6d6f1351bad485e2367ef1e644e (diff)
parentf403f22f8ccb12860b2b62fec3173c6ccd45938b (diff)
downloadlinux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.gz
linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.bz2
linux-98931dd95fd489fcbfa97da563505a6f071d7c77.zip
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Almost all of MM here. A few things are still getting finished off, reviewed, etc. - Yang Shi has improved the behaviour of khugepaged collapsing of readonly file-backed transparent hugepages. - Johannes Weiner has arranged for zswap memory use to be tracked and managed on a per-cgroup basis. - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for runtime enablement of the recent huge page vmemmap optimization feature. - Baolin Wang contributes a series to fix some issues around hugetlb pagetable invalidation. - Zhenwei Pi has fixed some interactions between hwpoisoned pages and virtualization. - Tong Tiangen has enabled the use of the presently x86-only page_table_check debugging feature on arm64 and riscv. - David Vernet has done some fixup work on the memcg selftests. - Peter Xu has taught userfaultfd to handle write protection faults against shmem- and hugetlbfs-backed files. - More DAMON development from SeongJae Park - adding online tuning of the feature and support for monitoring of fixed virtual address ranges. Also easier discovery of which monitoring operations are available. - Nadav Amit has done some optimization of TLB flushing during mprotect(). - Neil Brown continues to labor away at improving our swap-over-NFS support. - David Hildenbrand has some fixes to anon page COWing versus get_user_pages(). - Peng Liu fixed some errors in the core hugetlb code. - Joao Martins has reduced the amount of memory consumed by device-dax's compound devmaps. - Some cleanups of the arch-specific pagemap code from Anshuman Khandual. - Muchun Song has found and fixed some errors in the TLB flushing of transparent hugepages. - Roman Gushchin has done more work on the memcg selftests. ... and, of course, many smaller fixes and cleanups. Notably, the customary million cleanup serieses from Miaohe Lin" * tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits) mm: kfence: use PAGE_ALIGNED helper selftests: vm: add the "settings" file with timeout variable selftests: vm: add "test_hmm.sh" to TEST_FILES selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests selftests: vm: add migration to the .gitignore selftests/vm/pkeys: fix typo in comment ksm: fix typo in comment selftests: vm: add process_mrelease tests Revert "mm/vmscan: never demote for memcg reclaim" mm/kfence: print disabling or re-enabling message include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace" include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion" mm: fix a potential infinite loop in start_isolate_page_range() MAINTAINERS: add Muchun as co-maintainer for HugeTLB zram: fix Kconfig dependency warning mm/shmem: fix shmem folio swapoff hang cgroup: fix an error handling path in alloc_pagecache_max_30M() mm: damon: use HPAGE_PMD_SIZE tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate nodemask.h: fix compilation error with GCC12 ...
Diffstat (limited to 'include/linux/swapops.h')
-rw-r--r--include/linux/swapops.h124
1 files changed, 111 insertions, 13 deletions
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d356ab4047f7..fe220df499f1 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -13,10 +13,10 @@
* get good packing density in that tree, so the index should be dense in
* the low-order bits.
*
- * We arrange the `type' and `offset' fields so that `type' is at the seven
+ * We arrange the `type' and `offset' fields so that `type' is at the six
* high-order bits of the swp_entry_t and `offset' is right-aligned in the
* remaining bits. Although `type' itself needs only five bits, we allow for
- * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry().
+ * shmem/tmpfs to shift it all up a further one bit: see swp_to_radix_entry().
*
* swp_entry_t's are *never* stored anywhere in their arch-dependent format.
*/
@@ -26,6 +26,8 @@
/* Clear all flags but only keep swp_entry_t related information */
static inline pte_t pte_swp_clear_flags(pte_t pte)
{
+ if (pte_swp_exclusive(pte))
+ pte = pte_swp_clear_exclusive(pte);
if (pte_swp_soft_dirty(pte))
pte = pte_swp_clear_soft_dirty(pte);
if (pte_swp_uffd_wp(pte))
@@ -194,6 +196,7 @@ static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
static inline int is_migration_entry(swp_entry_t entry)
{
return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
+ swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE ||
swp_type(entry) == SWP_MIGRATION_WRITE);
}
@@ -202,11 +205,26 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
}
+static inline int is_readable_migration_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_READ);
+}
+
+static inline int is_readable_exclusive_migration_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE);
+}
+
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
return swp_entry(SWP_MIGRATION_READ, offset);
}
+static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, offset);
+}
+
static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
{
return swp_entry(SWP_MIGRATION_WRITE, offset);
@@ -224,6 +242,11 @@ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
return swp_entry(0, 0);
}
+static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
{
return swp_entry(0, 0);
@@ -244,9 +267,92 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
}
+static inline int is_readable_migration_entry(swp_entry_t entry)
+{
+ return 0;
+}
#endif
+typedef unsigned long pte_marker;
+
+#define PTE_MARKER_UFFD_WP BIT(0)
+#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP)
+
+#ifdef CONFIG_PTE_MARKER
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+ return swp_entry(SWP_PTE_MARKER, marker);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_PTE_MARKER;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+ return swp_offset(entry) & PTE_MARKER_MASK;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+ return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
+}
+
+#else /* CONFIG_PTE_MARKER */
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+ /* This should never be called if !CONFIG_PTE_MARKER */
+ WARN_ON_ONCE(1);
+ return swp_entry(0, 0);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+ return 0;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+ return false;
+}
+
+#endif /* CONFIG_PTE_MARKER */
+
+static inline pte_t make_pte_marker(pte_marker marker)
+{
+ return swp_entry_to_pte(make_pte_marker_entry(marker));
+}
+
+/*
+ * This is a special version to check pte_none() just to cover the case when
+ * the pte is a pte marker. It existed because in many cases the pte marker
+ * should be seen as a none pte; it's just that we have stored some information
+ * onto the none pte so it becomes not-none any more.
+ *
+ * It should be used when the pte is file-backed, ram-based and backing
+ * userspace pages, like shmem. It is not needed upon pgtables that do not
+ * support pte markers at all. For example, it's not needed on anonymous
+ * memory, kernel-only memory (including when the system is during-boot),
+ * non-ram based generic file-system. It's fine to be used even there, but the
+ * extra pte marker check will be pure overhead.
+ *
+ * For systems configured with !CONFIG_PTE_MARKER this will be automatically
+ * optimized to pte_none().
+ */
+static inline int pte_none_mostly(pte_t pte)
+{
+ return pte_none(pte) || is_pte_marker(pte);
+}
+
static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
{
struct page *p = pfn_to_page(swp_offset(entry));
@@ -274,7 +380,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
struct page_vma_mapped_walk;
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
+extern int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
struct page *page);
extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw,
@@ -304,10 +410,10 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
static inline int is_pmd_migration_entry(pmd_t pmd)
{
- return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
+ return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
}
#else
-static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
+static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
struct page *page)
{
BUILD_BUG();
@@ -387,18 +493,10 @@ static inline void num_poisoned_pages_inc(void)
}
#endif
-#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \
- defined(CONFIG_DEVICE_PRIVATE)
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
}
-#else
-static inline int non_swap_entry(swp_entry_t entry)
-{
- return 0;
-}
-#endif
#endif /* CONFIG_MMU */
#endif /* _LINUX_SWAPOPS_H */