diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
commit | 98931dd95fd489fcbfa97da563505a6f071d7c77 (patch) | |
tree | 44683fc4a92efa614acdca2742a7ff19d26da1e3 /include/linux/swapops.h | |
parent | df202b452fe6c6d6f1351bad485e2367ef1e644e (diff) | |
parent | f403f22f8ccb12860b2b62fec3173c6ccd45938b (diff) | |
download | linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.gz linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.bz2 linux-98931dd95fd489fcbfa97da563505a6f071d7c77.zip |
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
"Almost all of MM here. A few things are still getting finished off,
reviewed, etc.
- Yang Shi has improved the behaviour of khugepaged collapsing of
readonly file-backed transparent hugepages.
- Johannes Weiner has arranged for zswap memory use to be tracked and
managed on a per-cgroup basis.
- Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
runtime enablement of the recent huge page vmemmap optimization
feature.
- Baolin Wang contributes a series to fix some issues around hugetlb
pagetable invalidation.
- Zhenwei Pi has fixed some interactions between hwpoisoned pages and
virtualization.
- Tong Tiangen has enabled the use of the presently x86-only
page_table_check debugging feature on arm64 and riscv.
- David Vernet has done some fixup work on the memcg selftests.
- Peter Xu has taught userfaultfd to handle write protection faults
against shmem- and hugetlbfs-backed files.
- More DAMON development from SeongJae Park - adding online tuning of
the feature and support for monitoring of fixed virtual address
ranges. Also easier discovery of which monitoring operations are
available.
- Nadav Amit has done some optimization of TLB flushing during
mprotect().
- Neil Brown continues to labor away at improving our swap-over-NFS
support.
- David Hildenbrand has some fixes to anon page COWing versus
get_user_pages().
- Peng Liu fixed some errors in the core hugetlb code.
- Joao Martins has reduced the amount of memory consumed by
device-dax's compound devmaps.
- Some cleanups of the arch-specific pagemap code from Anshuman
Khandual.
- Muchun Song has found and fixed some errors in the TLB flushing of
transparent hugepages.
- Roman Gushchin has done more work on the memcg selftests.
... and, of course, many smaller fixes and cleanups. Notably, the
customary million cleanup serieses from Miaohe Lin"
* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
mm: kfence: use PAGE_ALIGNED helper
selftests: vm: add the "settings" file with timeout variable
selftests: vm: add "test_hmm.sh" to TEST_FILES
selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
selftests: vm: add migration to the .gitignore
selftests/vm/pkeys: fix typo in comment
ksm: fix typo in comment
selftests: vm: add process_mrelease tests
Revert "mm/vmscan: never demote for memcg reclaim"
mm/kfence: print disabling or re-enabling message
include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
mm: fix a potential infinite loop in start_isolate_page_range()
MAINTAINERS: add Muchun as co-maintainer for HugeTLB
zram: fix Kconfig dependency warning
mm/shmem: fix shmem folio swapoff hang
cgroup: fix an error handling path in alloc_pagecache_max_30M()
mm: damon: use HPAGE_PMD_SIZE
tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
nodemask.h: fix compilation error with GCC12
...
Diffstat (limited to 'include/linux/swapops.h')
-rw-r--r-- | include/linux/swapops.h | 124 |
1 files changed, 111 insertions, 13 deletions
diff --git a/include/linux/swapops.h b/include/linux/swapops.h index d356ab4047f7..fe220df499f1 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -13,10 +13,10 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the seven + * We arrange the `type' and `offset' fields so that `type' is at the six * high-order bits of the swp_entry_t and `offset' is right-aligned in the * remaining bits. Although `type' itself needs only five bits, we allow for - * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). + * shmem/tmpfs to shift it all up a further one bit: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ @@ -26,6 +26,8 @@ /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { + if (pte_swp_exclusive(pte)) + pte = pte_swp_clear_exclusive(pte); if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); if (pte_swp_uffd_wp(pte)) @@ -194,6 +196,7 @@ static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) static inline int is_migration_entry(swp_entry_t entry) { return unlikely(swp_type(entry) == SWP_MIGRATION_READ || + swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE || swp_type(entry) == SWP_MIGRATION_WRITE); } @@ -202,11 +205,26 @@ static inline int is_writable_migration_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline int is_readable_migration_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_MIGRATION_READ); +} + +static inline int is_readable_exclusive_migration_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE); +} + static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_READ, offset); } +static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) +{ + return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, offset); +} + static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_WRITE, offset); @@ -224,6 +242,11 @@ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) return swp_entry(0, 0); } +static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) +{ + return swp_entry(0, 0); +} + static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); @@ -244,9 +267,92 @@ static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; } +static inline int is_readable_migration_entry(swp_entry_t entry) +{ + return 0; +} #endif +typedef unsigned long pte_marker; + +#define PTE_MARKER_UFFD_WP BIT(0) +#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP) + +#ifdef CONFIG_PTE_MARKER + +static inline swp_entry_t make_pte_marker_entry(pte_marker marker) +{ + return swp_entry(SWP_PTE_MARKER, marker); +} + +static inline bool is_pte_marker_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_PTE_MARKER; +} + +static inline pte_marker pte_marker_get(swp_entry_t entry) +{ + return swp_offset(entry) & PTE_MARKER_MASK; +} + +static inline bool is_pte_marker(pte_t pte) +{ + return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); +} + +#else /* CONFIG_PTE_MARKER */ + +static inline swp_entry_t make_pte_marker_entry(pte_marker marker) +{ + /* This should never be called if !CONFIG_PTE_MARKER */ + WARN_ON_ONCE(1); + return swp_entry(0, 0); +} + +static inline bool is_pte_marker_entry(swp_entry_t entry) +{ + return false; +} + +static inline pte_marker pte_marker_get(swp_entry_t entry) +{ + return 0; +} + +static inline bool is_pte_marker(pte_t pte) +{ + return false; +} + +#endif /* CONFIG_PTE_MARKER */ + +static inline pte_t make_pte_marker(pte_marker marker) +{ + return swp_entry_to_pte(make_pte_marker_entry(marker)); +} + +/* + * This is a special version to check pte_none() just to cover the case when + * the pte is a pte marker. It existed because in many cases the pte marker + * should be seen as a none pte; it's just that we have stored some information + * onto the none pte so it becomes not-none any more. + * + * It should be used when the pte is file-backed, ram-based and backing + * userspace pages, like shmem. It is not needed upon pgtables that do not + * support pte markers at all. For example, it's not needed on anonymous + * memory, kernel-only memory (including when the system is during-boot), + * non-ram based generic file-system. It's fine to be used even there, but the + * extra pte marker check will be pure overhead. + * + * For systems configured with !CONFIG_PTE_MARKER this will be automatically + * optimized to pte_none(). + */ +static inline int pte_none_mostly(pte_t pte) +{ + return pte_none(pte) || is_pte_marker(pte); +} + static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -274,7 +380,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION -extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, +extern int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page); extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, @@ -304,10 +410,10 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) static inline int is_pmd_migration_entry(pmd_t pmd) { - return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); + return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); } #else -static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, +static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) { BUILD_BUG(); @@ -387,18 +493,10 @@ static inline void num_poisoned_pages_inc(void) } #endif -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \ - defined(CONFIG_DEVICE_PRIVATE) static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; } -#else -static inline int non_swap_entry(swp_entry_t entry) -{ - return 0; -} -#endif #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ |