/* SPDX-License-Identifier: GPL-2.0-or-later */
/* internal.h: mm/ internal definitions
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#ifndef __MM_INTERNAL_H
#define __MM_INTERNAL_H
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/tracepoint-defs.h>
struct folio_batch;
/*
* The set of flags that only affect watermark checking and reclaim
* behaviour. This is used by the MM to obey the caller constraints
* about IO, FS and watermark checking while ignoring placement
* hints such as HIGHMEM usage.
*/
#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
__GFP_NOLOCKDEP)
/* The GFP flags allowed during early boot */
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
/* Control allocation cpuset and node placement constraints */
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
/*
* Different from WARN_ON_ONCE(), no warning will be issued
* when we specify __GFP_NOWARN.
*/
#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
static bool __section(".data.once") __warned; \
int __ret_warn_once = !!(cond); \
\
if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
__warned = true; \
WARN_ON(1); \
} \
unlikely(__ret_warn_once); \
})
void page_writeback_init(void);
/*
* If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages,
* its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit
* above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently
* leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
*/
#define ENTIRELY_MAPPED 0x800000
#define FOLIO_PAGES_MAPPED (ENTIRELY_MAPPED - 1)
/*
* Flags passed to __show_mem() and show_free_areas() to suppress output in
* various contexts.
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
/*
* How many individual pages have an elevated _mapcount. Excludes
* the folio's entire_mapcount.
*
* Don't use this function outside of debugging code.
*/
static inline int folio_nr_pages_mapped(const struct folio *folio)
{
return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
}
/*
* Retrieve the first entry of a folio based on a provided entry within the
* folio. We cannot rely on folio->swap as there is no guarantee that it has
* been initialized. Used for calling arch_swap_restore()
*/
static inline swp_entry_t folio_swap(swp_entry_t entry,
const struct folio *folio)
{
swp_entry_t swap = {
.val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)),
};
return swap;
}
static inline void *folio_raw_mapping(const struct folio *folio)
{
unsigned long mapping = (unsigned long)folio->mapping;
return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
}
#ifdef CONFIG_MMU
/* Flags for folio_pte_batch(). */
typedef int __bitwise fpb_t;
/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0))
/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1))
static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
{
if (flags & FPB_IGNORE_DIRTY)
pte = pte_mkclean(pte);
if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
pte = pte_clear_soft_dirty(pte);
return pte_wrprotect(pte_mkold(pte));
}
/**
* folio_pte_batch - detect a PTE batch for a large folio
* @folio: The large folio to detect a PTE batch for.
* @addr: The user virtual address the first page is mapped at.
* @start_ptep: Page table pointer for the first entry.
* @pte: Page table entry for the first page.
* @max_nr: The maximum number of table entries to consider.
* @flags: Flags to modify the PTE batch semantics.
* @any_writable: Optional pointer to indicate whether any entry except the
* first one is writable.
* @any_young: Optional pointer to indicate whether any entry except the
* first one is young.
* @any_dirty: Optional pointer to indicate whether any entry except the
* first one is dirty.
*
* Detect a PTE batch: consecutive (present) PTEs that map consecutive
* pages of the same large folio.
*
* All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
* the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
* soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
*
* start_ptep must map any page of the folio. max_nr must be at least one and
* must be limited by the caller so scanning cannot exceed a single page table.
*
* Return: the number of table entries in the batch.
*/
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
bool *any_writable, bool *any_young, bool *any_dirty)
{
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
const pte_t *end_ptep = start_ptep + max_nr;
pte_t expected_pte, *ptep;
bool writable, young, dirty;
int nr;
if (any_writable)
*any_writable = false;
if (any_young)
*any_young = false;
if (any_dirty)
*any_dirty = false;
VM_WARN_ON_FOLIO(!pte_present(pte), folio);
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
nr = pte_batch_hint(start_ptep, pte);
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
ptep = start_ptep + nr;
while (ptep < end_ptep) {
pte = ptep_get(ptep);
if (any_writab
|