summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/bitops/instrumented-lock.h28
-rw-r--r--include/asm-generic/bitops/lock.h20
-rw-r--r--include/asm-generic/pgalloc.h7
-rw-r--r--include/linux/bootmem_info.h2
-rw-r--r--include/linux/buffer_head.h81
-rw-r--r--include/linux/cacheinfo.h1
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/damon.h74
-rw-r--r--include/linux/dax.h10
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/hugetlb.h25
-rw-r--r--include/linux/hugetlb_cgroup.h11
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/memblock.h9
-rw-r--r--include/linux/memcontrol.h93
-rw-r--r--include/linux/memory-tiers.h41
-rw-r--r--include/linux/mempolicy.h58
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h176
-rw-r--r--include/linux/mm_inline.h20
-rw-r--r--include/linux/mm_types.h49
-rw-r--r--include/linux/mmu_notifier.h9
-rw-r--r--include/linux/mmzone.h29
-rw-r--r--include/linux/page-flags.h19
-rw-r--r--include/linux/pagemap.h33
-rw-r--r--include/linux/percpu_counter.h30
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/sched/coredump.h19
-rw-r--r--include/linux/sched/mm.h4
-rw-r--r--include/linux/sched/numa_balancing.h6
-rw-r--r--include/linux/shmem_fs.h16
-rw-r--r--include/linux/shrinker.h87
-rw-r--r--include/linux/userfaultfd_k.h28
-rw-r--r--include/linux/wait.h9
-rw-r--r--include/trace/events/damon.h45
-rw-r--r--include/trace/events/migrate.h24
-rw-r--r--include/trace/events/vmscan.h8
-rw-r--r--include/uapi/linux/fs.h59
-rw-r--r--include/uapi/linux/mempolicy.h2
-rw-r--r--include/uapi/linux/prctl.h3
-rw-r--r--include/uapi/linux/userfaultfd.h9
43 files changed, 837 insertions, 344 deletions
diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h
index eb64bd4f11f3..542d3727ee4e 100644
--- a/include/asm-generic/bitops/instrumented-lock.h
+++ b/include/asm-generic/bitops/instrumented-lock.h
@@ -58,27 +58,25 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
return arch_test_and_set_bit_lock(nr, addr);
}
-#if defined(arch_clear_bit_unlock_is_negative_byte)
/**
- * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
+ * xor_unlock_is_negative_byte - XOR a single byte in memory and test if
+ * it is negative, for unlock.
+ * @mask: Change the bits which are set in this mask.
+ * @addr: The address of the word containing the byte to change.
*
+ * Changes some of bits 0-6 in the word pointed to by @addr.
* This operation is atomic and provides release barrier semantics.
+ * Used to optimise some folio operations which are commonly paired
+ * with an unlock or end of writeback. Bit 7 is used as PG_waiters to
+ * indicate whether anybody is waiting for the unlock.
*
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
+ * Return: Whether the top bit of the byte is set.
*/
-static inline bool
-clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
kcsan_release();
- instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
- return arch_clear_bit_unlock_is_negative_byte(nr, addr);
+ instrument_atomic_write(addr, sizeof(long));
+ return arch_xor_unlock_is_negative_byte(mask, addr);
}
-/* Let everybody know we have it. */
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
-#endif
-
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 40913516e654..14d4ec8c5152 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -66,27 +66,15 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
raw_atomic_long_set_release((atomic_long_t *)p, old);
}
-/**
- * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
- *
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
- */
-#ifndef arch_clear_bit_unlock_is_negative_byte
-static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr,
- volatile unsigned long *p)
+#ifndef arch_xor_unlock_is_negative_byte
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
{
long old;
- unsigned long mask = BIT_MASK(nr);
- p += BIT_WORD(nr);
- old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+ old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p);
return !!(old & BIT(7));
}
-#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte
#endif
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index c75d4a753849..879e5f8aa5e9 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -169,6 +169,8 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
ptdesc = pagetable_alloc(gfp, 0);
if (!ptdesc)
return NULL;
+
+ pagetable_pud_ctor(ptdesc);
return ptdesc_address(ptdesc);
}
@@ -190,8 +192,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
{
+ struct ptdesc *ptdesc = virt_to_ptdesc(pud);
+
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- pagetable_free(virt_to_ptdesc(pud));
+ pagetable_pud_dtor(ptdesc);
+ pagetable_free(ptdesc);
}
#ifndef __HAVE_ARCH_PUD_FREE
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index e1a3c9c9754c..cffa38a73618 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -60,7 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page,
static inline void free_bootmem_page(struct page *page)
{
- kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+ kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE);
free_reserved_page(page);
}
#endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 44e9de51eedf..5f23ee599889 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -198,13 +198,11 @@ void touch_buffer(struct buffer_head *bh);
void folio_set_bh(struct buffer_head *bh, struct folio *folio,
unsigned long offset);
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
- bool retry);
+ gfp_t gfp);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry);
-void create_empty_buffers(struct page *, unsigned long,
- unsigned long b_state);
-void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
- unsigned long b_state);
+struct buffer_head *create_empty_buffers(struct folio *folio,
+ unsigned long blocksize, unsigned long b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
void end_buffer_async_write(struct buffer_head *bh, int uptodate);
@@ -227,8 +225,8 @@ void __wait_on_buffer(struct buffer_head *);
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
unsigned size);
-struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp);
+struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp);
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
@@ -338,17 +336,38 @@ sb_breadahead(struct super_block *sb, sector_t block)
__breadahead(sb->s_bdev, block, sb->s_blocksize);
}
-static inline struct buffer_head *
-sb_getblk(struct super_block *sb, sector_t block)
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+ sector_t block, unsigned size)
{
- return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+ gfp_t gfp;
+
+ gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+ gfp |= __GFP_NOFAIL;
+
+ return bdev_getblk(bdev, block, size, gfp);
}
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+ sector_t block, unsigned size)
+{
+ gfp_t gfp;
-static inline struct buffer_head *
-sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp)
+ gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+ gfp |= __GFP_MOVABLE | __GFP_NOFAIL;
+
+ return bdev_getblk(bdev, block, size, gfp);
+}
+
+static inline struct buffer_head *sb_getblk(struct super_block *sb,
+ sector_t block)
{
- return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp);
+ return __getblk(sb->s_bdev, block, sb->s_blocksize);
+}
+
+static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb,
+ sector_t block, gfp_t gfp)
+{
+ return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp);
}
static inline struct buffer_head *
@@ -385,20 +404,6 @@ static inline void lock_buffer(struct buffer_head *bh)
__lock_buffer(bh);
}
-static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
- sector_t block,
- unsigned size)
-{
- return __getblk_gfp(bdev, block, size, 0);
-}
-
-static inline struct buffer_head *__getblk(struct block_device *bdev,
- sector_t block,
- unsigned size)
-{
- return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
-}
-
static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
{
if (!buffer_uptodate(bh) && trylock_buffer(bh)) {
@@ -450,6 +455,28 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
}
+/**
+ * get_nth_bh - Get a reference on the n'th buffer after this one.
+ * @bh: The buffer to start counting from.
+ * @count: How many buffers to skip.
+ *
+ * This is primarily useful for finding the nth buffer in a folio; in
+ * that case you pass the head buffer and the byte offset in the folio
+ * divided by the block size. It can be used for other purposes, but
+ * it will wrap at the end of the folio rather than returning NULL or
+ * proceeding to the next folio for you.
+ *
+ * Return: The requested buffer with an elevated refcount.
+ */
+static inline __must_check
+struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count)
+{
+ while (count--)
+ bh = bh->b_this_page;
+ get_bh(bh);
+ return bh;
+}
+
bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
#ifdef CONFIG_BUFFER_HEAD
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index a5cfd44fab45..d504eb4b49ab 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -73,6 +73,7 @@ struct cacheinfo {
struct cpu_cacheinfo {
struct cacheinfo *info_list;
+ unsigned int per_cpu_data_slice_size;
unsigned int num_levels;
unsigned int num_leaves;
bool cpu_map_populated;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 265da00a1a8b..4a6b6b77ccb6 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -115,6 +115,11 @@ enum {
* Enable recursive subtree protection
*/
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
+
+ /*
+ * Enable hugetlb accounting for the memory controller.
+ */
+ CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
};
/* cftype->flags */
diff --git a/include/linux/damon.h b/include/linux/damon.h
index ae2664d1d5f1..ab2f17d9926b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -40,9 +40,24 @@ struct damon_addr_range {
* @ar: The address range of the region.
* @sampling_addr: Address of the sample for the next access check.
* @nr_accesses: Access frequency of this region.
+ * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for
+ * each sampling interval.
* @list: List head for siblings.
* @age: Age of this region.
*
+ * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be
+ * increased for every &damon_attrs->sample_interval if an access to the region
+ * during the last sampling interval is found. The update of this field should
+ * not be done with direct access but with the helper function,
+ * damon_update_region_access_rate().
+ *
+ * @nr_accesses_bp is another representation of @nr_accesses in basis point
+ * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a
+ * manner similar to moving sum. By the algorithm, this value becomes
+ * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can
+ * be used when the aggregation interval is too huge and therefore cannot wait
+ * for it before getting the access monitoring results.
+ *
* @age is initially zero, increased for each aggregation interval, and reset
* to zero again if the access frequency is significantly changed. If two
* regions are merged into a new region, both @nr_accesses and @age of the new
@@ -52,6 +67,7 @@ struct damon_region {
struct damon_addr_range ar;
unsigned long sampling_addr;
unsigned int nr_accesses;
+ unsigned int nr_accesses_bp;
struct list_head list;
unsigned int age;
@@ -298,24 +314,24 @@ struct damos_access_pattern {
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
* @pattern: Access pattern of target regions.
* @action: &damo_action to be applied to the target regions.
+ * @apply_interval_us: The time between applying the @action.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @filters: Additional set of &struct damos_filter for &action.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
- * For each aggregation interval, DAMON finds regions which fit in the
+ * For each @apply_interval_us, DAMON finds regions which fit in the
* &pattern and applies &action to those. To avoid consuming too much
* CPU time or IO resources for the &action, &quota is used.
*
+ * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead.
+ *
* To do the work only when needed, schemes can be activated for specific
* system situations using &wmarks. If all schemes that registered to the
* monitoring context are inactive, DAMON stops monitoring either, and just
* repeatedly checks the watermarks.
*
- * If all schemes that registered to a &struct damon_ctx are inactive, DAMON
- * stops monitoring and just repeatedly checks the watermarks.
- *
* Before applying the &action to a memory region, &struct damon_operations
* implementation could check pages of the region and skip &action to respect
* &filters
@@ -327,6 +343,14 @@ struct damos_access_pattern {
struct damos {
struct damos_access_pattern pattern;
enum damos_action action;
+ unsigned long apply_interval_us;
+/* private: internal use only */
+ /*
+ * number of sample intervals that should be passed before applying
+ * @action
+ */
+ unsigned long next_apply_sis;
+/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
struct list_head filters;
@@ -472,13 +496,14 @@ struct damon_callback {
* regions.
*
* For each @sample_interval, DAMON checks whether each region is accessed or
- * not. It aggregates and keeps the access information (number of accesses to
- * each region) for @aggr_interval time. DAMON also checks whether the target
- * memory regions need update (e.g., by ``mmap()`` calls from the application,
- * in case of virtual memory monitoring) and applies the changes for each
- * @ops_update_interval. All time intervals are in micro-seconds.
- * Please refer to &struct damon_operations and &struct damon_callback for more
- * detail.
+ * not during the last @sample_interval. If such access is found, DAMON
+ * aggregates the information by increasing &damon_region->nr_accesses for
+ * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON
+ * also checks whether the target memory regions need update (e.g., by
+ * ``mmap()`` calls from the application, in case of virtual memory monitoring)
+ * and applies the changes for each @ops_update_interval. All time intervals
+ * are in micro-seconds. Please refer to &struct damon_operations and &struct
+ * damon_callback for more detail.
*/
struct damon_attrs {
unsigned long sample_interval;
@@ -522,8 +547,18 @@ struct damon_ctx {
struct damon_attrs attrs;
/* private: internal use only */
- struct timespec64 last_aggregation;
- struct timespec64 last_ops_update;
+ /* number of sample intervals that passed since this context started */
+ unsigned long passed_sample_intervals;
+ /*
+ * number of sample intervals that should be passed before next
+ * aggregation
+ */
+ unsigned long next_aggregation_sis;
+ /*
+ * number of sample intervals that should be passed before next ops
+ * update
+ */
+ unsigned long next_ops_update_sis;
/* public: */
struct task_struct *kdamond;
@@ -608,6 +643,8 @@ void damon_add_region(struct damon_region *r, struct damon_target *t);
void damon_destroy_region(struct damon_region *r, struct damon_target *t);
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
unsigned int nr_ranges);
+void damon_update_region_access_rate(struct damon_region *r, bool accessed,
+ struct damon_attrs *attrs);
struct damos_filter *damos_new_filter(enum damos_filter_type type,
bool matching);
@@ -615,7 +652,9 @@ void damos_add_filter(struct damos *s, struct damos_filter *f);
void damos_destroy_filter(struct damos_filter *f);
struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
- enum damos_action action, struct damos_quota *quota,
+ enum damos_action action,
+ unsigned long apply_interval_us,
+ struct damos_quota *quota,
struct damos_watermarks *wmarks);
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
@@ -642,6 +681,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
}
+static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs)
+{
+ /* {aggr,sample}_interval are unsigned long, hence could overflow */
+ return min(attrs->aggr_interval / attrs->sample_interval,
+ (unsigned long)UINT_MAX);
+}
+
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 22cd9902345d..b463502b16e1 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -159,8 +159,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-dax_entry_t dax_lock_page(struct page *page);
-void dax_unlock_page(struct page *page, dax_entry_t cookie);
+dax_entry_t dax_lock_folio(struct folio *folio);
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie);
dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
unsigned long index, struct page **page);
void dax_unlock_mapping_entry(struct address_space *mapping,
@@ -182,14 +182,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
return -EOPNOTSUPP;
}
-static inline dax_entry_t dax_lock_page(struct page *page)
+static inline dax_entry_t dax_lock_folio(struct folio *folio)
{
- if (IS_DAX(page->mapping->host))
+ if (IS_DAX(folio->mapping->host))
return ~0UL;
return 0;
}
-static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
+static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
{
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c27c324ba58a..98b7a7a8c42e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -454,7 +454,7 @@ extern const struct address_space_operations empty_aops;
* It is also used to block modification of page cache contents through
* memory mappings.
* @gfp_mask: Memory allocation flags to use for allocating pages.
- * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
@@ -557,7 +557,7 @@ static inline int mapping_mapped(struct address_space *mapping)
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
+ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
@@ -1270,7 +1270,7 @@ struct super_block {
const struct dentry_operations *s_d_op; /* default d_op for dentries */
- struct shrinker s_shrink; /* per-sb shrinker handle */
+ struct shrinker *s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 665f06675c83..de292a007138 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -8,6 +8,7 @@
#include <linux/topology.h>
struct vm_area_struct;
+struct mempolicy;
/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
-struct folio *folio_alloc(gfp_t gfp, unsigned order);
+struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid);
+struct folio *folio_alloc(gfp_t gfp, unsigned int order);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
#else
@@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
+static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid)
+{
+ return alloc_pages(gfp, order);
+}
static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
@@ -320,11 +328,13 @@ extern void page_frag_free(void *addr);
#define free_page(addr) free_pages((addr), 0)
void page_alloc_init_cpuhp(void);
+int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
void page_alloc_init_late(void);
+void setup_pcp_cacheinfo(void);
/*
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 47d25a5e1933..d3acecc5db4b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -30,7 +30,7 @@ void free_huge_folio(struct folio *folio);
#ifdef CONFIG_HUGETLB_PAGE
-#include <linux/mempolicy.h>
+#include <linux/pagemap.h>
#include <linux/shm.h>
#include <asm/tlbflush.h>
@@ -280,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long cp_flags);
bool is_hugetlb_entry_migration(pte_t pte);
+bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -544,7 +545,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
}
struct hugetlbfs_inode_info {
- struct shared_policy policy;
struct inode vfs_inode;
unsigned int seals;
};
@@ -748,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
-struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
- unsigned long address);
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -844,6 +842,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
return huge_page_size(h) / 512;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return filemap_lock_folio(mapping, idx << huge_page_order(h));
+}
+
#include <asm/hugetlb.h>
#ifndef is_hugepage_only_range
@@ -1040,6 +1044,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return NULL;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return NULL;
+}
+
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
@@ -1060,13 +1070,6 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
return NULL;
}
-static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
- struct vm_area_struct *vma,
- unsigned long address)
-{
- return NULL;
-}
-
static inline int __alloc_bootmem_huge_page(struct hstate *h)
{
return 0;
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 3d82d91f49ac..e5d64b8b59c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -22,13 +22,6 @@ struct resv_map;
struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB
-/*
- * Minimum page order trackable by hugetlb cgroup.
- * At least 3 pages are necessary for all the tracking information.
- * The second tail page contains all of the hugetlb-specific fields.
- */
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
-
enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
@@ -68,8 +61,6 @@ static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
{
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
- if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
- return NULL;
if (rsvd)
return folio->_hugetlb_cgroup_rsvd;
else
@@ -91,8 +82,6 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg, bool rsvd)
{