// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include <linux/bug.h>
#include <linux/export.h>
#include <linux/kmemleak.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/gpu_buddy.h>
/**
* gpu_buddy_assert - assert a condition in the buddy allocator
* @condition: condition expected to be true
*
* When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
* a WARN_ON() and also calls kunit_fail_current_test() so that any running
* kunit test is properly marked as failed. The stringified condition is
* included in the failure message for easy identification.
*
* When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
* builds retain the same warning semantics as before.
*/
#if IS_ENABLED(CONFIG_KUNIT)
#include <kunit/test-bug.h>
#define gpu_buddy_assert(condition) do { \
if (WARN_ON(!(condition))) \
kunit_fail_current_test("gpu_buddy_assert(" #condition ")"); \
} while (0)
#else
#define gpu_buddy_assert(condition) WARN_ON(!(condition))
#endif
static struct kmem_cache *slab_blocks;
static unsigned int
gpu_buddy_block_state(struct gpu_buddy_block *block)
{
return block->header & GPU_BUDDY_HEADER_STATE;
}
static bool
gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
{
return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
}
static bool
gpu_buddy_block_is_split(struct gpu_buddy_block *block)
{
return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
}
static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
{
u64 offset = gpu_buddy_block_offset(block);
if (!offset)
/*
* __ffs64(0) is undefined; offset 0 is maximally aligned, so return
* a value greater than any possible alignment.
*/
return 64 + 1;
return __ffs64(offset);
}
RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
struct gpu_buddy_block, rb,
unsigned int, subtree_max_alignment,
gpu_buddy_block_offset_alignment);
static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
struct gpu_buddy_block *parent,
unsigned int order,
u64 offset)
{
struct gpu_buddy_block *block;
BUG_ON(order > GPU_BUDDY_MAX_ORDER);
block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
if (!block)
return NULL;
block->header = offset;
block->header |= order;
block->parent = parent;
RB_CLEAR_NODE(&block->rb);
BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
return block;
}
static void gpu_block_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
kmem_cache_free(slab_blocks, block);
}
static enum gpu_buddy_free_tree
get_block_tree(struct gpu_buddy_block *block)
{
return gpu_buddy_block_is_clear(block) ?
GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
}
static struct gpu_buddy_block *
rbtree_get_free_block(const struct rb_node *node)
{
return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
}
static struct gpu_buddy_block *
rbtree_last_free_block(struct rb_root *root)
{
return rbtree_get_free_block(rb_last(root));
}
static bool rbtree_is_empty(struct rb_root *root)
{
return RB_EMPTY_ROOT(root);
}
static void rbtree_insert(struct gpu_buddy *mm,
struct gpu_buddy_block *block,
enum gpu_buddy_free_tree tree)
{
struct rb_node **link, *parent = NULL;
unsigned int block_alignment, order;
struct gpu_buddy_block *node;
struct rb_root *root;
order = gpu_buddy_block_order(block);
block_alignment = gpu_buddy_block_offset_alignment(block);
root = &mm->free_trees[tree][order];
link = &root->rb_node;
while (*link) {
parent = *link;
node = rbtree_get_free_block(parent);
/*
* Manual augmentation update during insertion traversal. Required
* because rb_insert_augmented() only calls rotate callback during
* rotations. This ensures all ancestors on the insertion path have
* correct subtree_max_alignment values.
*/
if (node->subtree_max_alignment < block_alignment)
node->subtree_max_alignment = block_alignment;
if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
link = &parent->rb_left;
else
link = &parent->rb_right;
}
block->subtree_max_alignment = block_alignment;
rb_link_node(&block->rb, parent, link);
rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
}
static void rbtree_remove(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
unsigned int order = gpu_buddy_block_order(block);
enum gpu_buddy_free_tree tree;
struct rb_root *root;
tree = get_block_tree(block);
root = &mm->free_trees[tree][order];
rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
RB_CLEAR_NODE(&block->rb);
}
static void clear_reset(struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_CLEAR;
}
static void mark_cleared(struct gpu_buddy_block *block)
{
block->header |= GPU_BUDDY_HEADER_CLEAR;
}
static void mark_allocated(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_ALLOCATED;
rbtree_remove(mm, block);
}
static void mark_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
enum gpu_buddy_free_tree tree;
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_FREE;
tree = get_block_tree(block);
rbtree_insert(mm, block, tree);
}
static void mark_split(struct gpu_buddy *mm,
struct gpu_buddy_block *block)
{
block->header &= ~GPU_BUDDY_HEADER_STATE;
block->header |= GPU_BUDDY_SPLIT;
rbtree_remove(mm, block);
}
static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
{
return s1 <= e2 && e1 >= s2;
}
static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
{
return s1 <= s2 && e1 >= e2;
}
static struct gpu_buddy_block *
__get_buddy(struct gpu_buddy_block *block)
{
struct gpu_buddy_block *parent;
parent = block->parent;
if (!parent)
return NULL;
if (parent->left == block)
return parent->right;
return parent->left;
}
static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
struct gpu_buddy_block *block,
bool force_merge)
{
struct gpu_buddy_block *parent;
unsigned int order;
while ((parent = block->parent)) {
struct gpu_buddy_block *buddy;
buddy = __get_buddy(block);
if (!gpu_buddy_block_is_free(buddy))
break;
if (!force_merge) {
/*
* Check the block and its buddy clear state and exit
* the loop if they both have the dissimilar state.
*/
if (gpu_buddy_block_is_clear(block) !=
gpu_buddy_block_is_clear(buddy))
break;
if (gpu_buddy_block_is_clear(block))
mark_cleared(parent);
}
rbtree_remove(mm, buddy);
if (force_merge && gpu_buddy_block_is_clear(buddy))
mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
gpu_block_free(mm, block);
gpu_block_free(mm, buddy);
block = parent;
}
order = gpu_buddy_block_order(block);
mark_free(mm, block);
return order;
}
static int __force_merge(struct gpu_buddy *mm,
u64 start,
u64 end,
unsigned int min_order)
{
unsigned int tree, order;
int i;
if (!min_order)
return -ENOMEM;
if (min_order > mm->max_order)
return -EINVAL;
for_each_free_tree(tree) {
for (i = min_order - 1; i >= 0; i--) {
struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
while (iter) {
struct gpu_buddy_block *block, *buddy;
u64 block_start, block_end;
block = rbtree_get_free_block(iter);
iter = rb_prev(iter);
if (!block || !block->parent)
continue;
block_start = gpu_buddy_block_offset(block);
block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!
|