summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:58:05 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:58:05 -0700
commit24d9e8b3c9c8a6f72c8b4c196a703e144928d919 (patch)
tree81d9a41265b30c776a2a70a517fddb5e5da62ed0
parent07fdad3a93756b872da7b53647715c48d0f4a2d0 (diff)
parentca74b8cadaad4b179f77f1f4dc3d288be9a580f1 (diff)
downloadlinux-24d9e8b3c9c8a6f72c8b4c196a703e144928d919.tar.gz
linux-24d9e8b3c9c8a6f72c8b4c196a703e144928d919.tar.bz2
linux-24d9e8b3c9c8a6f72c8b4c196a703e144928d919.zip
Merge tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - A new layer for caching objects for allocation and free via percpu arrays called sheaves. The aim is to combine the good parts of SLAB (lower-overhead and simpler percpu caching, compared to SLUB) without the past issues with arrays for freeing remote NUMA node objects and their flushing. It also allows more efficient kfree_rcu(), and cheaper object preallocations for cases where the exact number of objects is unknown, but an upper bound is. Currently VMAs and maple nodes are using this new caching, with a plan to enable it for all caches and remove the complex SLUB fastpath based on cpu (partial) slabs and this_cpu_cmpxchg_double(). (Vlastimil Babka, with Liam Howlett and Pedro Falcato for the maple tree changes) - Re-entrant kmalloc_nolock(), which allows opportunistic allocations from NMI and tracing/kprobe contexts. Building on prior page allocator and memcg changes, it will result in removing BPF-specific caches on top of slab (Alexei Starovoitov) - Various fixes and cleanups. (Kuan-Wei Chiu, Matthew Wilcox, Suren Baghdasaryan, Ye Liu) * tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (40 commits) slab: Introduce kmalloc_nolock() and kfree_nolock(). slab: Reuse first bit for OBJEXTS_ALLOC_FAIL slab: Make slub local_(try)lock more precise for LOCKDEP mm: Introduce alloc_frozen_pages_nolock() mm: Allow GFP_ACCOUNT to be used in alloc_pages_nolock(). locking/local_lock: Introduce local_lock_is_locked(). maple_tree: Convert forking to use the sheaf interface maple_tree: Add single node allocation support to maple state maple_tree: Prefilled sheaf conversion and testing tools/testing: Add support for prefilled slab sheafs maple_tree: Replace mt_free_one() with kfree() maple_tree: Use kfree_rcu in ma_free_rcu testing/radix-tree/maple: Hack around kfree_rcu not existing tools/testing: include maple-shim.c in maple.c maple_tree: use percpu sheaves for maple_node_cache mm, vma: use percpu sheaves for vm_area_struct cache tools/testing: Add support for changes to slab for sheaves slab: allow NUMA restricted allocations to use percpu sheaves tools/testing/vma: Implement vm_refcnt reset slab: skip percpu sheaves for remote object freeing ...
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/kasan.h13
-rw-r--r--include/linux/local_lock.h2
-rw-r--r--include/linux/local_lock_internal.h16
-rw-r--r--include/linux/maple_tree.h6
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/rtmutex.h10
-rw-r--r--include/linux/slab.h51
-rw-r--r--kernel/bpf/stream.c2
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/locking/rtmutex_common.h9
-rw-r--r--lib/maple_tree.c667
-rw-r--r--lib/test_maple_tree.c137
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/common.c5
-rw-r--r--mm/page_alloc.c55
-rw-r--r--mm/slab.h20
-rw-r--r--mm/slab_common.c37
-rw-r--r--mm/slub.c2357
-rw-r--r--mm/vma_init.c1
-rw-r--r--tools/include/linux/slab.h165
-rw-r--r--tools/testing/radix-tree/maple.c514
-rw-r--r--tools/testing/shared/linux.c120
-rw-r--r--tools/testing/shared/maple-shared.h11
-rw-r--r--tools/testing/shared/maple-shim.c7
-rw-r--r--tools/testing/vma/vma_internal.h259
27 files changed, 2901 insertions, 1584 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5ebf26fcdcfa..0ceb4e09306c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
}
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
-struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
+struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index fe5ce9215821..b4973e7c2940 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
}
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
- bool still_accessible);
+ bool still_accessible, bool no_quarantine);
/**
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
* @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
* @Return true if KASAN took ownership of the object; false otherwise.
*/
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
- void *object, bool init,
- bool still_accessible)
+ void *object, bool init,
+ bool still_accessible,
+ bool no_quarantine)
{
if (kasan_enabled())
- return __kasan_slab_free(s, object, init, still_accessible);
+ return __kasan_slab_free(s, object, init, still_accessible,
+ no_quarantine);
return false;
}
@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
- bool init, bool still_accessible)
+ bool init, bool still_accessible,
+ bool no_quarantine)
{
return false;
}
diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index 2ba846419524..0d91d060e3e9 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -66,6 +66,8 @@
*/
#define local_trylock(lock) __local_trylock(this_cpu_ptr(lock))
+#define local_lock_is_locked(lock) __local_lock_is_locked(lock)
+
/**
* local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
* interrupts if acquired
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index d80b5306a2c0..a4dc479157b5 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -17,7 +17,10 @@ typedef struct {
/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */
typedef struct {
- local_lock_t llock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+ struct task_struct *owner;
+#endif
u8 acquired;
} local_trylock_t;
@@ -31,7 +34,7 @@ typedef struct {
.owner = NULL,
# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \
- .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) },
+ LOCAL_LOCK_DEBUG_INIT(lockname)
static inline void local_lock_acquire(local_lock_t *l)
{
@@ -81,7 +84,7 @@ do { \
local_lock_debug_init(lock); \
} while (0)
-#define __local_trylock_init(lock) __local_lock_init(lock.llock)
+#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock)
#define __spinlock_nested_bh_init(lock) \
do { \
@@ -162,6 +165,9 @@ do { \
!!tl; \
})
+/* preemption or migration must be disabled before calling __local_lock_is_locked */
+#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired)
+
#define __local_lock_release(lock) \
do { \
local_trylock_t *tl; \
@@ -282,4 +288,8 @@ do { \
__local_trylock(lock); \
})
+/* migration must be disabled before calling __local_lock_is_locked */
+#define __local_lock_is_locked(__lock) \
+ (rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current)
+
#endif /* CONFIG_PREEMPT_RT */
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index bafe143b1f78..51a64ff23b88 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -442,7 +442,9 @@ struct ma_state {
struct maple_enode *node; /* The node containing this entry */
unsigned long min; /* The minimum index of this node - implied pivot min */
unsigned long max; /* The maximum index of this node - implied pivot max */
- struct maple_alloc *alloc; /* Allocated nodes for this operation */
+ struct slab_sheaf *sheaf; /* Allocated nodes for this operation */
+ struct maple_node *alloc; /* A single allocated node for fast path writes */
+ unsigned long node_request; /* The number of nodes to allocate for this operation */
enum maple_status status; /* The status of the state (active, start, none, etc) */
unsigned char depth; /* depth of tree descent during write */
unsigned char offset;
@@ -490,7 +492,9 @@ struct ma_wr_state {
.status = ma_start, \
.min = 0, \
.max = ULONG_MAX, \
+ .sheaf = NULL, \
.alloc = NULL, \
+ .node_request = 0, \
.mas_flags = 0, \
.store_type = wr_invalid, \
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fb27e3d2fdac..9924f157aae0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -341,17 +341,25 @@ enum page_memcg_data_flags {
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
};
+#define __OBJEXTS_ALLOC_FAIL MEMCG_DATA_OBJEXTS
#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS
#else /* CONFIG_MEMCG */
+#define __OBJEXTS_ALLOC_FAIL (1UL << 0)
#define __FIRST_OBJEXT_FLAG (1UL << 0)
#endif /* CONFIG_MEMCG */
enum objext_flags {
- /* slabobj_ext vector failed to allocate */
- OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG,
+ /*
+ * Use bit 0 with zero other bits to signal that slabobj_ext vector
+ * failed to allocate. The same bit 0 with valid upper bits means
+ * MEMCG_DATA_OBJEXTS.
+ */
+ OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
+ /* slabobj_ext vector allocated with kmalloc_nolock() */
+ OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
/* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
};
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa9f1021541e..ede4c6bf6f22 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
return READ_ONCE(lock->owner) != NULL;
}
+#ifdef CONFIG_RT_MUTEXES
+#define RT_MUTEX_HAS_WAITERS 1UL
+
+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
+{
+ unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+ return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
+}
+#endif
extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
/**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d5a8ab98035c..561597dd2164 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -335,6 +335,37 @@ struct kmem_cache_args {
* %NULL means no constructor.
*/
void (*ctor)(void *);
+ /**
+ * @sheaf_capacity: Enable sheaves of given capacity for the cache.
+ *
+ * With a non-zero value, allocations from the cache go through caching
+ * arrays called sheaves. Each cpu has a main sheaf that's always
+ * present, and a spare sheaf that may be not present. When both become
+ * empty, there's an attempt to replace an empty sheaf with a full sheaf
+ * from the per-node barn.
+ *
+ * When no full sheaf is available, and gfp flags allow blocking, a
+ * sheaf is allocated and filled from slab(s) using bulk allocation.
+ * Otherwise the allocation falls back to the normal operation
+ * allocating a single object from a slab.
+ *
+ * Analogically when freeing and both percpu sheaves are full, the barn
+ * may replace it with an empty sheaf, unless it's over capacity. In
+ * that case a sheaf is bulk freed to slab pages.
+ *
+ * The sheaves do not enforce NUMA placement of objects, so allocations
+ * via kmem_cache_alloc_node() with a node specified other than
+ * NUMA_NO_NODE will bypass them.
+ *
+ * Bulk allocation and free operations also try to use the cpu sheaves
+ * and barn, but fallback to using slab pages directly.
+ *
+ * When slub_debug is enabled for the cache, the sheaf_capacity argument
+ * is ignored.
+ *
+ * %0 means no sheaves will be created.
+ */
+ unsigned int sheaf_capacity;
};
struct kmem_cache *__kmem_cache_create_args(const char *name,
@@ -470,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
void kfree(const void *objp);
+void kfree_nolock(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
@@ -798,6 +830,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf **sheafp, unsigned int size);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+
+void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp,
+ struct slab_sheaf *sheaf) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc_from_sheaf(...) \
+ alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__))
+
+unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
+
/*
* These macros allow declaring a kmem_buckets * parameter alongside size, which
* can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
@@ -910,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
+void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
+#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
+
#define kmem_buckets_alloc(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c
index ab592db4a4bf..eb6c5a21c2ef 100644
--- a/kernel/bpf/stream.c
+++ b/kernel/bpf/stream.c
@@ -83,7 +83,7 @@ static struct bpf_stream_page *bpf_stream_page_replace(void)
struct bpf_stream_page *stream_page, *old_stream_page;
struct page *page;
- page = alloc_pages_nolock(NUMA_NO_NODE, 0);
+ page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
if (!page)
return NULL;
stream_page = page_address(page);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a48fa86f82a7..2a9456a3e730 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -583,7 +583,7 @@ static bool can_alloc_pages(void)
static struct page *__bpf_alloc_page(int nid)
{
if (!can_alloc_pages())
- return alloc_pages_nolock(nid, 0);
+ return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0);
return alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 78dd3d8c6554..cf6ddd1b23a2 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -153,15 +153,6 @@ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
pi_tree.entry);
}
-#define RT_MUTEX_HAS_WAITERS 1UL
-
-static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
-{
- unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
-
- return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
-}
-
/*
* Constants for rt mutex functions which have a selectable deadlock
* detection.
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index b4ee2d29d7a9..ab4c6c21a625 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -83,13 +83,9 @@
/*
* Maple state flags
- * * MA_STATE_BULK - Bulk insert mode
- * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
* * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
*/
-#define MA_STATE_BULK 1
-#define MA_STATE_REBALANCE 2
-#define MA_STATE_PREALLOC 4
+#define MA_STATE_PREALLOC 1
#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
#define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
@@ -176,26 +172,25 @@ static inline struct maple_node *mt_alloc_one(gfp_t gfp)
return kmem_cache_alloc(maple_node_cache, gfp);
}
-static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
+static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
- return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
+ kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
}
-static inline void mt_free_one(struct maple_node *node)
+static void mt_return_sheaf(struct slab_sheaf *sheaf)
{
- kmem_cache_free(maple_node_cache, node);
+ kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
}
-static inline void mt_free_bulk(size_t size, void __rcu **nodes)
+static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
{
- kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
+ return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
}
-static void mt_free_rcu(struct rcu_head *head)
+static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
+ unsigned int size)
{
- struct maple_node *node = container_of(head, struct maple_node, rcu);
-
- kmem_cache_free(maple_node_cache, node);
+ return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
}
/*
@@ -208,7 +203,7 @@ static void mt_free_rcu(struct rcu_head *head)
static void ma_free_rcu(struct maple_node *node)
{
WARN_ON(node->parent != ma_parent_ptr(node));
- call_rcu(&node->rcu, mt_free_rcu);
+ kfree_rcu(node, rcu);
}
static void mt_set_height(struct maple_tree *mt, unsigned char height)
@@ -591,67 +586,6 @@ static __always_inline bool mte_dead_node(const struct maple_enode *enode)
}
/*
- * mas_allocated() - Get the number of nodes allocated in a maple state.
- * @mas: The maple state
- *
- * The ma_state alloc member is overloaded to hold a pointer to the first
- * allocated node or to the number of requested nodes to allocate. If bit 0 is
- * set, then the alloc contains the number of requested nodes. If there is an
- * allocated node, then the total allocated nodes is in that node.
- *
- * Return: The total number of nodes allocated
- */
-static inline unsigned long mas_allocated(const struct ma_state *mas)
-{
- if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
- return 0;
-
- return mas->alloc->total;
-}
-
-/*
- * mas_set_alloc_req() - Set the requested number of allocations.
- * @mas: the maple state
- * @count: the number of allocations.
- *
- * The requested number of allocations is either in the first allocated node,
- * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
- * no allocated node. Set the request either in the node or do the necessary
- * encoding to store in @mas->alloc directly.
- */
-static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
-{
- if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
- if (!count)
- mas->alloc = NULL;
- else
- mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
- return;
- }
-
- mas->alloc->request_count = count;
-}
-
-/*
- * mas_alloc_req() - get the requested number of allocations.
- * @mas: The maple state
- *
- * The alloc count is either stored directly in @mas, or in
- * @mas->alloc->request_count if there is at least one node allocated. Decode
- * the request count if it's stored directly in @mas->alloc.
- *
- * Return: The allocation request count.
- */
-static inline unsigned int mas_alloc_req(const struct ma_state *mas)
-{
- if ((unsigned long)mas->alloc & 0x1)
- return (unsigned long)(mas->alloc) >> 1;
- else if (mas->alloc)
- return mas->alloc->request_count;
- return 0;
-}
-
-/*
* ma_pivots() - Get a pointer to the maple node pivots.
* @node: the maple node
* @type: the node type
@@ -1032,24 +966,6 @@ static inline void mas_descend(struct ma_state *mas)
}
/*
- * mte_set_gap() - Set a maple node gap.
- * @mn: The encoded maple node
- * @gap: The offset of the gap to set
- * @val: The gap value
- */
-static inline void mte_set_gap(const struct maple_enode *mn,
- unsigned char gap, unsigned long val)
-{
- switch (mte_node_type(mn)) {
- default:
- break;
- case maple_arange_64:
- mte_to_node(mn)->ma64.gap[gap] = val;
- break;
- }
-}
-
-/*
* mas_ascend() - Walk up a level of the tree.
* @mas: The maple state
*
@@ -1152,79 +1068,24 @@ static int mas_ascend(struct ma_state *mas)
*
* Return: A pointer to a maple node.
*/
-static inline struct maple_node *mas_pop_node(struct ma_state *mas)
+static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
- struct maple_alloc *ret, *node = mas->alloc;
- unsigned long total = mas_allocated(mas);
- unsigned int req = mas_alloc_req(mas);
-
- /* nothing or a request pending. */
- if (WARN_ON(!total))
- return NULL;
+ struct maple_node *ret;
- if (total == 1) {
- /* single allocation in this ma_state */
+ if (mas->alloc) {
+ ret = mas->alloc;
mas->alloc = NULL;
- ret = node;
- goto single_node;
+ goto out;
}
- if (node->node_count == 1) {
- /* Single allocation in this node. */
- mas->alloc = node->slot[0];
- mas->alloc->total = node->total - 1;
- ret = node;
- goto new_head;
- }
- node->total--;
- ret = node->slot[--node->node_count];
- node->slot[node->node_count] = NULL;
+ if (WARN_ON_ONCE(!mas->sheaf))
+ return NULL;
-single_node:
-new_head:
- if (req) {
- req++;
- mas_set_alloc_req(mas, req);
- }
+ ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
+out:
memset(ret, 0, sizeof(*ret));
- return (struct maple_node *)ret;
-}
-
-/*
- * mas_push_node() - Push a node back on the maple state allocation.
- * @mas: The maple state
- * @used: The used maple node
- *
- * Stores the maple node back into @mas->alloc for reuse. Updates allocated and
- * requested node count as necessary.
- */
-static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
-{
- struct maple_alloc *reuse = (struct maple_alloc *)used;
- struct maple_alloc *head = mas->alloc;
- unsigned long count;
- unsigned int requested = mas_alloc_req(mas);
-
- count = mas_allocated(mas);
-
- reuse->request_count = 0;
- reuse->node_count = 0;
- if (count) {
- if (head->node_count < MAPLE_ALLOC_SLOTS) {
- head->slot[head->node_count++] = reuse;
- head->total++;
- goto done;
- }
- reuse->slot[0] = head;
- reuse->node_count = 1;
- }
-
- reuse->total = count + 1;
- mas->alloc = reuse;
-done:
- if (requested > 1)
- mas_set_alloc_req(mas, requested - 1);
+ return ret;
}
/*
@@ -1234,121 +1095,81 @@ done:
*/
static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
- struct maple_alloc *node;
- unsigned long allocated = mas_allocated(mas);
- unsigned int requested = mas_alloc_req(mas);
- unsigned int count;
- void **slots = NULL;
- unsigned int max_req = 0;
-
- if (!requested)
+ if (!mas->node_request)
return;
- mas_set_alloc_req(mas, 0);
- if (mas->mas_flags & MA_STATE_PREALLOC) {
- if (allocated)
+ if (mas->node_request == 1) {
+ if (mas->sheaf)
+ goto use_sheaf;
+
+ if (mas->alloc)
return;
- WARN_ON(!allocated);
- }
- if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
- node = (struct maple_alloc *)mt_alloc_one(gfp);
- if (!node)
- goto nomem_one;
+ mas->alloc = mt_alloc_one(gfp);
+ if (!mas->alloc)
+ goto error;
- if (allocated) {
- node->slot[0] = mas->alloc;
- node->node_count = 1;
- } else {
- node->node_count = 0;
- }
+ mas->node_request = 0;
+ return;
+ }
- mas->alloc = node;
- node->total = ++allocated;
- node->request_count = 0;
- requested--;
+use_sheaf:
+ if (unlikely(mas->alloc)) {
+ kfree(mas->alloc);
+ mas->alloc = NULL;
}
- node = mas->alloc;
- while (requested) {
- max_req = MAPLE_ALLOC_SLOTS - node->node_count;
- slots = (void **)&node->slot[node->node_count];
- max_req = min(requested, max_req);
- count = mt_alloc_bulk(gfp, max_req, slots);
- if (!count)
- goto nomem_bulk;
+ if (mas->sheaf) {
+ unsigned long refill;
- if (node->node_count == 0) {
- node->slot[0]->node_count = 0;
- node->slot[0]->request_count = 0;
+ refill = mas->node_request;
+ if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
+ mas->node_request = 0;
+ return;
}
- node->node_count += count;
- allocated += count;
- /* find a non-full node*/
- do {
- node = node->slot[0];
- } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
- requested -= count;
- }
- mas->alloc->total = allocated;
- return;
+ if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
+ goto error;
-nomem_bulk:
- /* Clean up potential freed allocations on bulk failure */
- memset(slots, 0, max_req * sizeof(unsigned long));
- mas->alloc->total = allocated;
-nomem_one:
- mas_set_alloc_req(mas, requested);
- mas_set_err(mas, -ENOMEM);
-}
+ mas->node_request = 0;
+ return;
+ }
-/*
- * mas_free() - Free an encoded maple node
- * @mas: The maple state
- * @used: The encoded maple node to free.
- *
- * Uses rcu free if necessary, pushes @used back on the maple state allocations
- * otherwise.
- */
-static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
-{
- struct maple_node *tmp = mte_to_node(used);
+ mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
+ if (likely(mas->sheaf)) {
+ mas->node_request = 0;
+ return;
+ }
- if (mt_in_rcu(mas->tree))
- ma_free_rcu(tmp);
- else
- mas_push_node(mas, tmp);
+error:
+ mas_set_err(mas, -ENOMEM);
}
-/*
- * mas_node_count_gfp() - Check if enough nodes are allocated and request more
- * if there is not enough nodes.
- * @mas: The maple state
- * @count: The number of nodes needed
- * @gfp: the gfp flags
- */
-static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
+static inline void mas_empty_nodes(struct ma_state *mas)
{
- unsigned long allocated = mas_allocated(mas);
+ mas->node_request = 0;
+ if (mas->sheaf) {
+ mt_return_sheaf(mas->sheaf);
+ mas->sheaf = NULL;
+ }
- if (allocated < count) {
- mas_set_alloc_req(mas, count - allocated);
- mas_alloc_nodes(mas, gfp);
+ if (mas->alloc) {
+ kfree(mas->alloc);
+ mas->alloc = NULL;
}
}
/*
- * mas_node_count() - Check if enough nodes are allocated and request more if
- * there is not enough nodes.
+ * mas_free() - Free an encoded maple node
* @mas: The maple state
- * @count: The number of nodes needed
+ * @used: The encoded maple node to free.
*
- * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
+ * Uses rcu free if necessary, pushes @used back on the maple state allocations
+ * otherwise.
*/
-static void mas_node_count(struct ma_state *mas, int count)
+static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
{
- return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
+ ma_free_rcu(mte_to_node(used));
}
/*
@@ -1878,21 +1699,7 @@ static inline int mab_calc_split(struct ma_state *mas,
* end on a NULL entry, with the exception of the left-most leaf. The
* limitation means that the split of a node must be checked for this condition
* and be able to put more data in one direction or the other.
- */
- if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
- *mid_split = 0;
- split = b_end - mt_min_slots[bn->type];
-
- if (!ma_is_leaf(bn->type))
- return split;
-
- mas->mas_flags |= MA_STATE_REBALANCE;
- if (!bn->slot[split])
- split--;
- return split;
- }
-
- /*
+ *
* Although extremely rare, it is possible to enter what is known as the 3-way
* split scenario. The 3-way split comes about by means of a store of a range
* that overwrites the end and beginning of two full nodes. The result is a set
@@ -2040,27 +1847,6 @@ static inline void mab_mas_cp(struct maple