// SPDX-License-Identifier: GPL-2.0
/*
* Code for working with individual keys, and sorted sets of keys with in a
* btree node
*
* Copyright 2012 Google, Inc.
*/
#include "bcachefs.h"
#include "btree_cache.h"
#include "bset.h"
#include "eytzinger.h"
#include "trace.h"
#include "util.h"
#include <linux/unaligned.h>
#include <linux/console.h>
#include <linux/random.h>
#include <linux/prefetch.h>
static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
struct btree *);
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
{
unsigned n = ARRAY_SIZE(iter->data);
while (n && __btree_node_iter_set_end(iter, n - 1))
--n;
return n;
}
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
{
return bch2_bkey_to_bset_inlined(b, k);
}
/*
* There are never duplicate live keys in the btree - but including keys that
* have been flagged as deleted (and will be cleaned up later) we _will_ see
* duplicates.
*
* Thus the sort order is: usual key comparison first, but for keys that compare
* equal the deleted key(s) come first, and the (at most one) live version comes
* last.
*
* The main reason for this is insertion: to handle overwrites, we first iterate
* over keys that compare equal to our insert key, and then insert immediately
* prior to the first key greater than the key we're inserting - our insert
* position will be after all keys that compare equal to our insert key, which
* by the time we actually do the insert will all be deleted.
*/
void bch2_dump_bset(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned set)
{
struct bkey_packed *_k, *_n;
struct bkey uk, n;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
if (!i->u64s)
return;
for (_k = i->start;
_k < vstruct_last(i);
_k = _n) {
_n = bkey_p_next(_k);
if (!_k->u64s) {
printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set,
_k->_data - i->_data);
break;
}
k = bkey_disassemble(b, _k, &uk);
printbuf_reset(&buf);
if (c)
bch2_bkey_val_to_text(&buf, c, k);
else
bch2_bkey_to_text(&buf, k.k);
printk(KERN_ERR "block %u key %5zu: %s\n", set,
_k->_data - i->_data, buf.buf);
if (_n == vstruct_last(i))
continue;
n = bkey_unpack_key(b, _n);
if (bpos_lt(n.p, k.k->p)) {
printk(KERN_ERR "Key skipped backwards\n");
continue;
}
if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p))
printk(KERN_ERR "Duplicate keys\n");
}
printbuf_exit(&buf);
}
void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
{
console_lock();
for_each_bset(b, t)
bch2_dump_bset(c, b, bset(b, t), t - b->set);
console_unlock();
}
void bch2_dump_btree_node_iter(struct btree *b,
struct btree_node_iter *iter)
{
struct btree_node_iter_set *set;
struct printbuf buf = PRINTBUF;
printk(KERN_ERR "btree node iter with %u/%u sets:\n",
__btree_node_iter_used(iter), b->nsets);
btree_node_iter_for_each(iter, set) {
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k);
struct bkey uk = bkey_unpack_key(b, k);
printbuf_reset(&buf);
bch2_bkey_to_text(&buf, &uk);
printk(KERN_ERR "set %zu key %u: %s\n",
t - b->set, set->k, buf.buf);
}
printbuf_exit(&buf);
}
struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
{
struct bkey_packed *k;
struct btree_nr_keys nr = {};
for_each_bset(b, t)
bset_tree_for_each_key(b, t, k)
if (!bkey_deleted(k))
btree_keys_account_key_add(&nr, t - b->set, k);
return nr;
}
void __bch2_verify_btree_nr_keys(struct btree *b)
{
struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
static void __bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
struct btree *b)
{
struct btree_node_iter iter = *_iter;
const struct bkey_packed *k, *n;
k = bch2_btree_node_iter_peek_all(&iter, b);
__bch2_btree_node_iter_advance(&iter, b);
n = bch2_btree_node_iter_peek_all(&iter, b);
bkey_unpack_key(b, k);
if (n &&
bkey_iter_cmp(b, k, n) > 0) {
struct btree_node_iter_set *set;
struct bkey ku = bkey_unpack_key(b, k);
struct bkey nu = bkey_unpack_key(b, n);
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
bch2_dump_btree_node(NULL, b);
bch2_bkey_to_text(&buf1, &ku);
bch2_bkey_to_text(&buf2, &nu);
printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
buf1.buf, buf2.buf);
printk(KERN_ERR "iter was:");
btree_node_iter_for_each(_iter, set) {
struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k2);
printk(" [%zi %zi]", t - b->set,
k2->_data - bset(b, t)->_data);
}
panic("\n");
}
}
void __bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b)
{
struct btree_node_iter_set *set, *s2;
struct bkey_packed *k, *p;
if (bch2_btree_node_iter_end(iter))
return;
/* Verify no duplicates: */
btree_node_iter_for_each(iter, set) {
BUG_ON(set->k > set->end);
btree_node_iter_for_each(iter, s2)
BUG_ON(set != s2 && set->end == s2->end);
}
/* Verify that set->end is correct: */
btree_node_iter_for_each(iter, set) {
for_each_bset(b, t)
if (set->end == t->end_offset) {
BUG_ON(set->k < btree_bkey_first_offset(t) ||
set->k >= t->end_offset);
goto found;
}
BUG();
found:
do {} while (0);
}
/* Verify iterator is sorted: */
btree_node_iter_for_each(iter, set)
BUG_ON(set != iter->data &&
btree_node_iter_cmp(b, set[-1], set[0]) > 0);
k = bch2_btree_node_iter_peek_all(iter, b);
for_each_bset(b, t) {
if (iter->data[0].end == t->end_offset)
continue;
p = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(iter, b, t));
BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
}
}
static void __bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
struct bkey_packed *insert, unsigned clobber_u64s)
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
struct bkey_packed *next = (void *) ((u64 *) where-&g
|