summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev-defs.h8
-rw-r--r--include/linux/backing-dev.h50
-rw-r--r--include/linux/cma.h10
-rw-r--r--include/linux/damon.h87
-rw-r--r--include/linux/fault-inject.h2
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/gfp.h10
-rw-r--r--include/linux/highmem-internal.h10
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/kthread.h22
-rw-r--r--include/linux/list_lru.h17
-rw-r--r--include/linux/memcontrol.h46
-rw-r--r--include/linux/memory.h12
-rw-r--r--include/linux/memory_hotplug.h124
-rw-r--r--include/linux/migrate.h8
-rw-r--r--include/linux/mm.h11
-rw-r--r--include/linux/mmzone.h22
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/node.h25
-rw-r--r--include/linux/page-flags.h92
-rw-r--r--include/linux/pageblock-flags.h7
-rw-r--r--include/linux/pagemap.h7
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/sched/sysctl.h10
-rw-r--r--include/linux/shmem_fs.h1
-rw-r--r--include/linux/slab.h3
-rw-r--r--include/linux/swap.h6
-rw-r--r--include/linux/thread_info.h5
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--include/linux/vm_event_item.h3
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/linux/xarray.h9
-rw-r--r--include/ras/ras_event.h1
-rw-r--r--include/trace/events/compaction.h26
-rw-r--r--include/trace/events/writeback.h28
-rw-r--r--include/uapi/linux/userfaultfd.h8
36 files changed, 386 insertions, 321 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 993c5628a726..e863c88df95f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -207,14 +207,6 @@ struct backing_dev_info {
#endif
};
-enum {
- BLK_RW_ASYNC = 0,
- BLK_RW_SYNC = 1,
-};
-
-void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
-void set_bdi_congested(struct backing_dev_info *bdi, int sync);
-
struct wb_lock_cookie {
bool locked;
unsigned long flags;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 483979c1b9f4..87ce24d238f3 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -135,13 +135,6 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb)
struct backing_dev_info *inode_to_bdi(struct inode *inode);
-static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
-{
- return wb->congested & cong_bits;
-}
-
-long congestion_wait(int sync, long timeout);
-
static inline bool mapping_can_writeback(struct address_space *mapping)
{
return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
@@ -162,7 +155,6 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
gfp_t gfp);
void wb_memcg_offline(struct mem_cgroup *memcg);
void wb_blkcg_offline(struct blkcg *blkcg);
-int inode_congested(struct inode *inode, int cong_bits);
/**
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -390,50 +382,8 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
{
}
-static inline int inode_congested(struct inode *inode, int cong_bits)
-{
- return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
-}
-
#endif /* CONFIG_CGROUP_WRITEBACK */
-static inline int inode_read_congested(struct inode *inode)
-{
- return inode_congested(inode, 1 << WB_sync_congested);
-}
-
-static inline int inode_write_congested(struct inode *inode)
-{
- return inode_congested(inode, 1 << WB_async_congested);
-}
-
-static inline int inode_rw_congested(struct inode *inode)
-{
- return inode_congested(inode, (1 << WB_sync_congested) |
- (1 << WB_async_congested));
-}
-
-static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
-{
- return wb_congested(&bdi->wb, cong_bits);
-}
-
-static inline int bdi_read_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, 1 << WB_sync_congested);
-}
-
-static inline int bdi_write_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, 1 << WB_async_congested);
-}
-
-static inline int bdi_rw_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, (1 << WB_sync_congested) |
- (1 << WB_async_congested));
-}
-
const char *bdi_dev_name(struct backing_dev_info *bdi);
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/cma.h b/include/linux/cma.h
index bd801023504b..90fd742fd1ef 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -20,6 +20,14 @@
#define CMA_MAX_NAME 64
+/*
+ * TODO: once the buddy -- especially pageblock merging and alloc_contig_range()
+ * -- can deal with only some pageblocks of a higher-order page being
+ * MIGRATE_CMA, we can use pageblock_nr_pages.
+ */
+#define CMA_MIN_ALIGNMENT_PAGES MAX_ORDER_NR_PAGES
+#define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES)
+
struct cma;
extern unsigned long totalcma_pages;
@@ -50,4 +58,6 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
+
+extern void cma_reserve_pages_on_error(struct cma *cma);
#endif
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5e1e3a128b77..f23cbfa4248d 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -60,19 +60,18 @@ struct damon_region {
/**
* struct damon_target - Represents a monitoring target.
- * @id: Unique identifier for this target.
+ * @pid: The PID of the virtual address space to monitor.
* @nr_regions: Number of monitoring target regions of this target.
* @regions_list: Head of the monitoring target regions of this target.
* @list: List head for siblings.
*
* Each monitoring context could have multiple targets. For example, a context
* for virtual memory address spaces could have multiple target processes. The
- * @id of each target should be unique among the targets of the context. For
- * example, in the virtual address monitoring context, it could be a pidfd or
- * an address of an mm_struct.
+ * @pid should be set for appropriate &struct damon_operations including the
+ * virtual address spaces monitoring operations.
*/
struct damon_target {
- unsigned long id;
+ struct pid *pid;
unsigned int nr_regions;
struct list_head regions_list;
struct list_head list;
@@ -88,6 +87,7 @@ struct damon_target {
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
* @DAMOS_STAT: Do nothing but count the stat.
+ * @NR_DAMOS_ACTIONS: Total number of DAMOS actions
*/
enum damos_action {
DAMOS_WILLNEED,
@@ -96,6 +96,7 @@ enum damos_action {
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
DAMOS_STAT, /* Do nothing but only record the stat */
+ NR_DAMOS_ACTIONS,
};
/**
@@ -121,9 +122,9 @@ enum damos_action {
* uses smaller one as the effective quota.
*
* For selecting regions within the quota, DAMON prioritizes current scheme's
- * target memory regions using the &struct damon_primitive->get_scheme_score.
+ * target memory regions using the &struct damon_operations->get_scheme_score.
* You could customize the prioritization logic by setting &weight_sz,
- * &weight_nr_accesses, and &weight_age, because monitoring primitives are
+ * &weight_nr_accesses, and &weight_age, because monitoring operations are
* encouraged to respect those.
*/
struct damos_quota {
@@ -158,10 +159,12 @@ struct damos_quota {
*
* @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme.
* @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000].
+ * @NR_DAMOS_WMARK_METRICS: Total number of DAMOS watermark metrics
*/
enum damos_wmark_metric {
DAMOS_WMARK_NONE,
DAMOS_WMARK_FREE_MEM_RATE,
+ NR_DAMOS_WMARK_METRICS,
};
/**
@@ -254,13 +257,26 @@ struct damos {
struct list_head list;
};
+/**
+ * enum damon_ops_id - Identifier for each monitoring operations implementation
+ *
+ * @DAMON_OPS_VADDR: Monitoring operations for virtual address spaces
+ * @DAMON_OPS_PADDR: Monitoring operations for the physical address space
+ */
+enum damon_ops_id {
+ DAMON_OPS_VADDR,
+ DAMON_OPS_PADDR,
+ NR_DAMON_OPS,
+};
+
struct damon_ctx;
/**
- * struct damon_primitive - Monitoring primitives for given use cases.
+ * struct damon_operations - Monitoring operations for given use cases.
*
- * @init: Initialize primitive-internal data structures.
- * @update: Update primitive-internal data structures.
+ * @id: Identifier of this operations set.
+ * @init: Initialize operations-related data structures.
+ * @update: Update operations-related data structures.
* @prepare_access_checks: Prepare next access check of target regions.
* @check_accesses: Check the accesses to target regions.
* @reset_aggregated: Reset aggregated accesses monitoring results.
@@ -270,18 +286,20 @@ struct damon_ctx;
* @cleanup: Clean up the context.
*
* DAMON can be extended for various address spaces and usages. For this,
- * users should register the low level primitives for their target address
- * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread
+ * users should register the low level operations for their target address
+ * space and usecase via the &damon_ctx.ops. Then, the monitoring thread
* (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
- * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * the monitoring, @update after each &damon_ctx.ops_update_interval, and
* @check_accesses, @target_valid and @prepare_access_checks after each
* &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each
* &damon_ctx.aggr_interval.
*
- * @init should initialize primitive-internal data structures. For example,
+ * Each &struct damon_operations instance having valid @id can be registered
+ * via damon_register_ops() and selected by damon_select_ops() later.
+ * @init should initialize operations-related data structures. For example,
* this could be used to construct proper monitoring target regions and link
* those to @damon_ctx.adaptive_targets.
- * @update should update the primitive-internal data structures. For example,
+ * @update should update the operations-related data structures. For example,
* this could be used to update monitoring target regions for current status.
* @prepare_access_checks should manipulate the monitoring regions to be
* prepared for the next access check.
@@ -301,7 +319,8 @@ struct damon_ctx;
* monitoring.
* @cleanup is called from @kdamond just before its termination.
*/
-struct damon_primitive {
+struct damon_operations {
+ enum damon_ops_id id;
void (*init)(struct damon_ctx *context);
void (*update)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
@@ -355,15 +374,15 @@ struct damon_callback {
*
* @sample_interval: The time between access samplings.
* @aggr_interval: The time between monitor results aggregations.
- * @primitive_update_interval: The time between monitoring primitive updates.
+ * @ops_update_interval: The time between monitoring operations updates.
*
* For each @sample_interval, DAMON checks whether each region is accessed or
* not. It aggregates and keeps the access information (number of accesses to
* each region) for @aggr_interval time. DAMON also checks whether the target
* memory regions need update (e.g., by ``mmap()`` calls from the application,
* in case of virtual memory monitoring) and applies the changes for each
- * @primitive_update_interval. All time intervals are in micro-seconds.
- * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * @ops_update_interval. All time intervals are in micro-seconds.
+ * Please refer to &struct damon_operations and &struct damon_callback for more
* detail.
*
* @kdamond: Kernel thread who does the monitoring.
@@ -375,7 +394,7 @@ struct damon_callback {
*
* Once started, the monitoring thread runs until explicitly required to be
* terminated or every monitoring target is invalid. The validity of the
- * targets is checked via the &damon_primitive.target_valid of @primitive. The
+ * targets is checked via the &damon_operations.target_valid of @ops. The
* termination can also be explicitly requested by writing non-zero to
* @kdamond_stop. The thread sets @kdamond to NULL when it terminates.
* Therefore, users can know whether the monitoring is ongoing or terminated by
@@ -385,7 +404,7 @@ struct damon_callback {
* Note that the monitoring thread protects only @kdamond and @kdamond_stop via
* @kdamond_lock. Accesses to other fields must be protected by themselves.
*
- * @primitive: Set of monitoring primitives for given use cases.
+ * @ops: Set of monitoring operations for given use cases.
* @callback: Set of callbacks for monitoring events notifications.
*
* @min_nr_regions: The minimum number of adaptive monitoring regions.
@@ -396,17 +415,17 @@ struct damon_callback {
struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
- unsigned long primitive_update_interval;
+ unsigned long ops_update_interval;
/* private: internal use only */
struct timespec64 last_aggregation;
- struct timespec64 last_primitive_update;
+ struct timespec64 last_ops_update;
/* public: */
struct task_struct *kdamond;
struct mutex kdamond_lock;
- struct damon_primitive primitive;
+ struct damon_operations ops;
struct damon_callback callback;
unsigned long min_nr_regions;
@@ -475,7 +494,7 @@ struct damos *damon_new_scheme(
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
-struct damon_target *damon_new_target(unsigned long id);
+struct damon_target *damon_new_target(void);
void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
bool damon_targets_empty(struct damon_ctx *ctx);
void damon_free_target(struct damon_target *t);
@@ -484,28 +503,18 @@ unsigned int damon_nr_regions(struct damon_target *t);
struct damon_ctx *damon_new_ctx(void);
void damon_destroy_ctx(struct damon_ctx *ctx);
-int damon_set_targets(struct damon_ctx *ctx,
- unsigned long *ids, ssize_t nr_ids);
int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
- unsigned long aggr_int, unsigned long primitive_upd_int,
+ unsigned long aggr_int, unsigned long ops_upd_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
int damon_set_schemes(struct damon_ctx *ctx,
struct damos **schemes, ssize_t nr_schemes);
int damon_nr_running_ctxs(void);
+int damon_register_ops(struct damon_operations *ops);
+int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
-int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
#endif /* CONFIG_DAMON */
-#ifdef CONFIG_DAMON_VADDR
-bool damon_va_target_valid(void *t);
-void damon_va_set_primitives(struct damon_ctx *ctx);
-#endif /* CONFIG_DAMON_VADDR */
-
-#ifdef CONFIG_DAMON_PADDR
-bool damon_pa_target_valid(void *t);
-void damon_pa_set_primitives(struct damon_ctx *ctx);
-#endif /* CONFIG_DAMON_PADDR */
-
#endif /* _DAMON_H */
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index e525f6957c49..2d04f6448cde 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -64,6 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
struct kmem_cache;
+bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 27746a3da8fd..4fb6d5a50be7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -42,6 +42,7 @@
#include <linux/mount.h>
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
+#include <linux/slab.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -930,10 +931,15 @@ struct fown_struct {
* struct file_ra_state - Track a file's readahead state.
* @start: Where the most recent readahead started.
* @size: Number of pages read in the most recent readahead.
- * @async_size: Start next readahead when this many pages are left.
- * @ra_pages: Maximum size of a readahead request.
+ * @async_size: Numer of pages that were/are not needed immediately
+ * and so were/are genuinely "ahead". Start next readahead when
+ * the first of these pages is accessed.
+ * @ra_pages: Maximum size of a readahead request, copied from the bdi.
* @mmap_miss: How many mmap accesses missed in the page cache.
* @prev_pos: The last byte in the most recent read request.
+ *
+ * When this structure is passed to ->readahead(), the "most recent"
+ * readahead means the current readahead.
*/
struct file_ra_state {
pgoff_t start;
@@ -1435,6 +1441,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
+#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
/* Possible states of 'frozen' field */
enum {
@@ -3108,6 +3115,16 @@ extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_privs(struct file *);
+/*
+ * This must be used for allocating filesystems specific inodes to set
+ * up the inode reclaim context correctly.
+ */
+static inline void *
+alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp)
+{
+ return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp);
+}
+
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
{
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 80f63c862be5..20f6fbe12993 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -79,7 +79,7 @@ struct vm_area_struct;
* DOC: Page mobility and placement hints
*
* Page mobility and placement hints
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ---------------------------------
*
* These flags provide hints about how mobile the page is. Pages with similar
* mobility are placed within the same pageblocks to minimise problems due
@@ -112,7 +112,7 @@ struct vm_area_struct;
* DOC: Watermark modifiers
*
* Watermark modifiers -- controls access to emergency reserves
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ------------------------------------------------------------
*
* %__GFP_HIGH indicates that the caller is high-priority and that granting
* the request is necessary before the system can make forward progress.
@@ -144,7 +144,7 @@ struct vm_area_struct;
* DOC: Reclaim modifiers
*
* Reclaim modifiers
- * ~~~~~~~~~~~~~~~~~
+ * -----------------
* Please note that all the following flags are only applicable to sleepable
* allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
*
@@ -224,7 +224,7 @@ struct vm_area_struct;
* DOC: Action modifiers
*
* Action modifiers
- * ~~~~~~~~~~~~~~~~
+ * ----------------
*
* %__GFP_NOWARN suppresses allocation failure reports.
*
@@ -256,7 +256,7 @@ struct vm_area_struct;
* DOC: Useful GFP flag combinations
*
* Useful GFP flag combinations
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ----------------------------
*
* Useful GFP flag combinations that are commonly used. It is recommended
* that subsystems start with one of these combinations and then set/clear
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 0a0b2b09b1b8..a77be5630209 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -246,6 +246,16 @@ do { \
__kunmap_atomic(__addr); \
} while (0)
+/**
+ * kunmap_local - Unmap a page mapped via kmap_local_page().
+ * @__addr: An address within the page mapped
+ *
+ * @__addr can be any address within the mapped page. Commonly it is the
+ * address return from kmap_local_page(), but it can also include offsets.
+ *
+ * Unmapping should be done in the reverse order of the mapping. See
+ * kmap_local_page() for details.
+ */
#define kunmap_local(__addr) \
do { \
BUILD_BUG_ON(__same_type((__addr), struct page *)); \
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d1897a69c540..08357b4c7be7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -754,7 +754,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { }
static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
vm_flags_t flags)
{
- return entry;
+ return pte_mkhuge(entry);
}
#endif
@@ -1075,12 +1075,6 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
}
#endif /* CONFIG_HUGETLB_PAGE */
-#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
-extern bool hugetlb_free_vmemmap_enabled;
-#else
-#define hugetlb_free_vmemmap_enabled false
-#endif
-
static inline spinlock_t *huge_pte_lock(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 3df4ea04716f..de5d75bafd66 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -141,12 +141,6 @@ struct kthread_delayed_work {
struct timer_list timer;
};
-#define KTHREAD_WORKER_INIT(worker) { \
- .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \
- .work_list = LIST_HEAD_INIT((worker).work_list), \
- .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\
- }
-
#define KTHREAD_WORK_INIT(work, fn) { \
.node = LIST_HEAD_INIT((work).node), \
.func = (fn), \
@@ -158,9 +152,6 @@ struct kthread_delayed_work {
TIMER_IRQSAFE), \
}
-#define DEFINE_KTHREAD_WORKER(worker) \
- struct kthread_worker worker = KTHREAD_WORKER_INIT(worker)
-
#define DEFINE_KTHREAD_WORK(work, fn) \
struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
@@ -168,19 +159,6 @@ struct kthread_delayed_work {
struct kthread_delayed_work dwork = \
KTHREAD_DELAYED_WORK_INIT(dwork, fn)
-/*
- * kthread_worker.lock needs its own lockdep class key when defined on
- * stack with lockdep enabled. Use the following macros in such cases.
- */
-#ifdef CONFIG_LOCKDEP
-# define KTHREAD_WORKER_INIT_ONSTACK(worker) \
- ({ kthread_init_worker(&worker); worker; })
-# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \
- struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
-#else
-# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
-#endif
-
extern void __kthread_init_worker(struct kthread_worker *worker,
const char *name, struct lock_class_key *key);
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 1b5fceb565df..b35968ee9fb5 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/shrinker.h>
+#include <linux/xarray.h>
struct mem_cgroup;
@@ -33,8 +34,8 @@ struct list_lru_one {
struct list_lru_memcg {
struct rcu_head rcu;
- /* array of per cgroup lists, indexed by memcg_cache_id */
- struct list_lru_one *lru[];
+ /* array of per cgroup per node lists, indexed by node id */
+ struct list_lru_one node[];
};
struct list_lru_node {
@@ -42,11 +43,7 @@ struct list_lru_node {
spinlock_t lock;
/* global list, used for the root cgroup in cgroup aware lrus */
struct list_lru_one lru;
-#ifdef CONFIG_MEMCG_KMEM
- /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
- struct list_lru_memcg __rcu *memcg_lrus;
-#endif
- long nr_items;
+ long nr_items;
} ____cacheline_aligned_in_smp;
struct list_lru {
@@ -55,6 +52,7 @@ struct list_lru {
struct list_head list;
int shrinker_id;
bool memcg_aware;
+ struct xarray xa;
#endif
};
@@ -69,8 +67,9 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
#define list_lru_init_memcg(lru, shrinker) \
__list_lru_init((lru), true, NULL, shrinker)
-int memcg_update_all_list_lrus(int num_memcgs);
-void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg);
+int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
+ gfp_t gfp);
+void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
/**
* list_lru_add: add an element to the lru list's tail
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0abbd685703b..a68dce3873fc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -34,6 +34,7 @@ enum memcg_stat_item {
MEMCG_SOCK,
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
+ MEMCG_KMEM,
MEMCG_NR_STAT,
};
@@ -523,6 +524,20 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
+static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+retry:
+ memcg = obj_cgroup_memcg(objcg);
+ if (unlikely(!css_tryget(&memcg->css)))
+ goto retry;
+ rcu_read_unlock();
+
+ return memcg;
+}
+
#ifdef CONFIG_MEMCG_KMEM
/*
* folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
@@ -841,9 +856,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
*/
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
- if (!memcg->memory.parent)
- return NULL;
- return mem_cgroup_from_counter(memcg->memory.parent, memory);
+ return mem_cgroup_from_css(memcg->css.parent);
}
static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
@@ -1672,18 +1685,6 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
extern struct static_key_false memcg_kmem_enabled_key;
-extern int memcg_nr_cache_ids;
-void memcg_get_cache_ids(void);
-void memcg_put_cache_ids(void);
-
-/*
- * Helper macro to loop through all memcg-specific caches. Callers must still
- * check if the cache is valid (it is either valid or NULL).
- * the slab_mutex must be held when looping through those caches
- */
-#define for_each_memcg_cache_index(_idx) \
- for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++)
-
static inline bool memcg_kmem_enabled(void)
{
return static_branch_likely(&memcg_kmem_enabled_key);
@@ -1707,7 +1708,7 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
* A helper for accessing memcg's kmem_id, used for getting
* corresponding LRU lists.
*/
-static inline int memcg_cache_id(struct mem_cgroup *memcg)
+static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
return memcg ? memcg->kmemcg_id : -1;
}
@@ -1740,27 +1741,16 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}
-#define for_each_memcg_cache_index(_idx) \
- for (; NULL; )
-
static inline bool memcg_kmem_enabled(void)
{
return false;
}
-static inline int memcg_cache_id(struct mem_cgroup *memcg)
+static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
return -1;
}
-static inline void memcg_get_cache_ids(void)
-{
-}
-
-static inline void memcg_put_cache_ids(void)
-{
-}
-
static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
return NULL;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 88eb587b5143..aa619464a1df 100644
--- a/include/linux/memory.h
+++ b/include/