summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-02-11 22:48:05 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-07-14 19:00:13 -0400
commitfb23d57a6dfc4e521c003dc542799f07d22d269e (patch)
tree0a814ccf2c3c06614f1b2ed0a48950f3a57783e4
parent4c4a7d48bd59380fa4fc75f2cd341e9de09adbf7 (diff)
downloadlinux-fb23d57a6dfc4e521c003dc542799f07d22d269e.tar.gz
linux-fb23d57a6dfc4e521c003dc542799f07d22d269e.tar.bz2
linux-fb23d57a6dfc4e521c003dc542799f07d22d269e.zip
bcachefs: Convert gc to new accounting
Rewrite fsck/gc for the new accounting scheme. This adds a second set of in-memory accounting counters for gc to use; like with other parts of gc we run all trigger in TRIGGER_GC mode, then compare what we calculated to existing in-memory accounting at the end. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_background.c34
-rw-r--r--fs/bcachefs/alloc_foreground.c16
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/btree_gc.c133
-rw-r--r--fs/bcachefs/btree_trans_commit.c4
-rw-r--r--fs/bcachefs/buckets.c185
-rw-r--r--fs/bcachefs/buckets.h16
-rw-r--r--fs/bcachefs/buckets_types.h7
-rw-r--r--fs/bcachefs/disk_accounting.c191
-rw-r--r--fs/bcachefs/disk_accounting.h86
-rw-r--r--fs/bcachefs/ec.c97
-rw-r--r--fs/bcachefs/inode.c43
-rw-r--r--fs/bcachefs/recovery.c3
-rw-r--r--fs/bcachefs/replicas.c86
-rw-r--r--fs/bcachefs/replicas.h1
-rw-r--r--fs/bcachefs/super.c9
-rw-r--r--fs/bcachefs/util.h4
17 files changed, 343 insertions, 576 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 3df1099750af..9bb0dbe134d5 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -774,7 +774,7 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s
};
s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
- return bch2_disk_accounting_mod(trans, &acc, d, 3);
+ return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
}
int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
@@ -894,7 +894,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
old_a->cached_sectors) {
ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
- -((s64) old_a->cached_sectors));
+ -((s64) old_a->cached_sectors),
+ flags & BTREE_TRIGGER_gc);
if (ret)
goto err;
}
@@ -973,35 +974,6 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
bch2_gc_gens_async(c);
}
-
- if ((flags & BTREE_TRIGGER_gc) &&
- (flags & BTREE_TRIGGER_bucket_invalidate)) {
- struct bch_alloc_v4 new_a_convert;
- const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);
-
- percpu_down_read(&c->mark_lock);
- struct bucket *g = gc_bucket(ca, new.k->p.offset);
- if (unlikely(!g)) {
- percpu_up_read(&c->mark_lock);
- goto invalid_bucket;
- }
- g->gen_valid = 1;
-
- bucket_lock(g);
-
- g->gen_valid = 1;
- g->gen = new_a->gen;
- g->data_type = new_a->data_type;
- g->stripe = new_a->stripe;
- g->stripe_redundancy = new_a->stripe_redundancy;
- g->dirty_sectors = new_a->dirty_sectors;
- g->cached_sectors = new_a->cached_sectors;
-
- bucket_unlock(g);
- percpu_up_read(&c->mark_lock);
-
- bch2_dev_usage_update(c, ca, old_a, new_a);
- }
err:
printbuf_exit(&buf);
bch2_dev_put(ca);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 991e07a79064..cabf866c7956 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1708,15 +1708,13 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "capacity\t%llu\n", c->capacity);
prt_printf(out, "reserved\t%llu\n", c->reserved);
- percpu_down_read(&c->mark_lock);
- prt_printf(out, "hidden\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
- prt_printf(out, "btree\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
- prt_printf(out, "data\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.data));
- prt_printf(out, "cached\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
- prt_printf(out, "reserved\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
- prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
- prt_printf(out, "nr_inodes\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
- percpu_up_read(&c->mark_lock);
+ prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden));
+ prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree));
+ prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->usage->data));
+ prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->usage->cached));
+ prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->usage->reserved));
+ prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
+ prt_printf(out, "nr_inodes\t%llu\n", percpu_u64_get(&c->usage->nr_inodes));
prt_newline(out);
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index b9f5327ab033..33605fa8e70f 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -546,7 +546,6 @@ struct bch_dev {
struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage;
- struct bch_dev_usage __percpu *usage_gc;
/* Allocator: */
u64 new_fs_bucket_idx;
@@ -741,7 +740,7 @@ struct bch_fs {
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
- struct bch_accounting_mem accounting;
+ struct bch_accounting_mem accounting[2];
struct bch_replicas_cpu replicas;
struct bch_replicas_cpu replicas_gc;
@@ -890,7 +889,6 @@ struct bch_fs {
seqcount_t usage_lock;
struct bch_fs_usage_base __percpu *usage;
- struct bch_fs_usage __percpu *usage_gc;
u64 __percpu *online_reserved;
struct io_clock io_clock[2];
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index c79258e3e69c..0fe869cff8be 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -20,6 +20,7 @@
#include "buckets.h"
#include "clock.h"
#include "debug.h"
+#include "disk_accounting.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -735,132 +736,25 @@ static int bch2_mark_superblocks(struct bch_fs *c)
static void bch2_gc_free(struct bch_fs *c)
{
+ bch2_accounting_free(&c->accounting[1]);
+
genradix_free(&c->reflink_gc_table);
genradix_free(&c->gc_stripes);
for_each_member_device(c, ca) {
kvfree(rcu_dereference_protected(ca->buckets_gc, 1));
ca->buckets_gc = NULL;
-
- free_percpu(ca->usage_gc);
- ca->usage_gc = NULL;
- }
-
- free_percpu(c->usage_gc);
- c->usage_gc = NULL;
-}
-
-static int bch2_gc_done(struct bch_fs *c)
-{
- struct bch_dev *ca = NULL;
- struct printbuf buf = PRINTBUF;
- unsigned i;
- int ret = 0;
-
- percpu_down_write(&c->mark_lock);
-
-#define copy_field(_err, _f, _msg, ...) \
- if (fsck_err_on(dst->_f != src->_f, c, _err, \
- _msg ": got %llu, should be %llu" , ##__VA_ARGS__, \
- dst->_f, src->_f)) \
- dst->_f = src->_f
-#define copy_dev_field(_err, _f, _msg, ...) \
- copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__)
-#define copy_fs_field(_err, _f, _msg, ...) \
- copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)
-
- __for_each_member_device(c, ca) {
- /* XXX */
- struct bch_dev_usage *dst = this_cpu_ptr(ca->usage);
- struct bch_dev_usage *src = (void *)
- bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
- dev_usage_u64s());
-
- for (i = 0; i < BCH_DATA_NR; i++) {
- copy_dev_field(dev_usage_buckets_wrong,
- d[i].buckets, "%s buckets", bch2_data_type_str(i));
- copy_dev_field(dev_usage_sectors_wrong,
- d[i].sectors, "%s sectors", bch2_data_type_str(i));
- copy_dev_field(dev_usage_fragmented_wrong,
- d[i].fragmented, "%s fragmented", bch2_data_type_str(i));
- }
}
-
- {
-#if 0
- unsigned nr = fs_usage_u64s(c);
- /* XX: */
- struct bch_fs_usage *dst = this_cpu_ptr(c->usage);
- struct bch_fs_usage *src = (void *)
- bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
-
- copy_fs_field(fs_usage_hidden_wrong,
- b.hidden, "hidden");
- copy_fs_field(fs_usage_btree_wrong,
- b.btree, "btree");
-
- copy_fs_field(fs_usage_data_wrong,
- b.data, "data");
- copy_fs_field(fs_usage_cached_wrong,
- b.cached, "cached");
- copy_fs_field(fs_usage_reserved_wrong,
- b.reserved, "reserved");
- copy_fs_field(fs_usage_nr_inodes_wrong,
- b.nr_inodes,"nr_inodes");
-
- for (i = 0; i < BCH_REPLICAS_MAX; i++)
- copy_fs_field(fs_usage_persistent_reserved_wrong,
- persistent_reserved[i],
- "persistent_reserved[%i]", i);
-
- for (i = 0; i < c->replicas.nr; i++) {
- struct bch_replicas_entry_v1 *e =
- cpu_replicas_entry(&c->replicas, i);
-
- printbuf_reset(&buf);
- bch2_replicas_entry_to_text(&buf, e);
-
- copy_fs_field(fs_usage_replicas_wrong,
- replicas[i], "%s", buf.buf);
- }
-#endif
- }
-
-#undef copy_fs_field
-#undef copy_dev_field
-#undef copy_stripe_field
-#undef copy_field
-fsck_err:
- bch2_dev_put(ca);
- bch_err_fn(c, ret);
- percpu_up_write(&c->mark_lock);
- printbuf_exit(&buf);
- return ret;
}
static int bch2_gc_start(struct bch_fs *c)
{
- BUG_ON(c->usage_gc);
-
- c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
- sizeof(u64), GFP_KERNEL);
- if (!c->usage_gc) {
- bch_err(c, "error allocating c->usage_gc");
- return -BCH_ERR_ENOMEM_gc_start;
- }
-
for_each_member_device(c, ca) {
- BUG_ON(ca->usage_gc);
-
- ca->usage_gc = alloc_percpu(struct bch_dev_usage);
- if (!ca->usage_gc) {
- bch_err(c, "error allocating ca->usage_gc");
+ int ret = bch2_dev_usage_init(ca, true);
+ if (ret) {
bch2_dev_put(ca);
- return -BCH_ERR_ENOMEM_gc_start;
+ return ret;
}
-
- this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
- ca->mi.nbuckets - ca->mi.first_bucket);
}
return 0;
@@ -908,6 +802,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
gc.data_type = old->data_type;
gc.dirty_sectors = old->dirty_sectors;
}
+ percpu_up_read(&c->mark_lock);
/*
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
@@ -916,9 +811,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
alloc_data_type_set(&gc, gc.data_type);
if (gc.data_type != old_gc.data_type ||
- gc.dirty_sectors != old_gc.dirty_sectors)
- bch2_dev_usage_update(c, ca, &old_gc, &gc);
- percpu_up_read(&c->mark_lock);
+ gc.dirty_sectors != old_gc.dirty_sectors) {
+ ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
+ if (ret)
+ return ret;
+ }
gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
@@ -1235,7 +1132,9 @@ int bch2_check_allocations(struct bch_fs *c)
gc_pos_set(c, gc_phase(GC_PHASE_start));
ret = bch2_mark_superblocks(c);
- BUG_ON(ret);
+ bch_err_msg(c, ret, "marking superblocks");
+ if (ret)
+ goto out;
ret = bch2_gc_btrees(c);
if (ret)
@@ -1246,7 +1145,7 @@ int bch2_check_allocations(struct bch_fs *c)
bch2_journal_block(&c->journal);
out:
ret = bch2_gc_alloc_done(c) ?:
- bch2_gc_done(c) ?:
+ bch2_accounting_gc_done(c) ?:
bch2_gc_stripes_done(c) ?:
bch2_gc_reflink_done(c);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 92305c12cb75..30e24725eb12 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -724,7 +724,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
a->k.version = journal_pos_to_bversion(&trans->journal_res,
(u64 *) entry - (u64 *) trans->journal_entries);
BUG_ON(bversion_zero(a->k.version));
- ret = bch2_accounting_mem_mod(trans, accounting_i_to_s_c(a));
+ ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false);
if (ret)
goto revert_fs_usage;
}
@@ -812,7 +812,7 @@ revert_fs_usage:
struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
bch2_accounting_neg(a);
- bch2_accounting_mem_mod(trans, a.c);
+ bch2_accounting_mem_mod_locked(trans, a.c, false);
bch2_accounting_neg(a);
}
percpu_up_read(&c->mark_lock);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 12faf2ffda1c..e3bf7ed5c073 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -84,96 +84,6 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
}
}
-void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
- const struct bch_alloc_v4 *old,
- const struct bch_alloc_v4 *new)
-{
- struct bch_fs_usage *fs_usage;
- struct bch_dev_usage *u;
-
- preempt_disable();
- fs_usage = this_cpu_ptr(c->usage_gc);
-
- if (data_type_is_hidden(old->data_type))
- fs_usage->b.hidden -= ca->mi.bucket_size;
- if (data_type_is_hidden(new->data_type))
- fs_usage->b.hidden += ca->mi.bucket_size;
-
- u = this_cpu_ptr(ca->usage_gc);
-
- u->d[old->data_type].buckets--;
- u->d[new->data_type].buckets++;
-
- u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old);
- u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new);
-
- u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old);
- u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new);
-
- u->d[BCH_DATA_cached].sectors -= old->cached_sectors;
- u->d[BCH_DATA_cached].sectors += new->cached_sectors;
-
- unsigned old_unstriped = bch2_bucket_sectors_unstriped(*old);
- u->d[BCH_DATA_unstriped].buckets -= old_unstriped != 0;
- u->d[BCH_DATA_unstriped].sectors -= old_unstriped;
-
- unsigned new_unstriped = bch2_bucket_sectors_unstriped(*new);
- u->d[BCH_DATA_unstriped].buckets += new_unstriped != 0;
- u->d[BCH_DATA_unstriped].sectors += new_unstriped;
-
- preempt_enable();
-}
-
-int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
- struct bch_replicas_entry_v1 *r, s64 sectors)
-{
- struct bch_fs_usage *fs_usage;
- int idx, ret = 0;
- struct printbuf buf = PRINTBUF;
-
- percpu_down_read(&c->mark_lock);
-
- idx = bch2_replicas_entry_idx(c, r);
- if (idx < 0 &&
- fsck_err(c, ptr_to_missing_replicas_entry,
- "no replicas entry\n while marking %s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- percpu_up_read(&c->mark_lock);
- ret = bch2_mark_replicas(c, r);
- percpu_down_read(&c->mark_lock);
-
- if (ret)
- goto err;
- idx = bch2_replicas_entry_idx(c, r);
- }
- if (idx < 0) {
- ret = -1;
- goto err;
- }
-
- preempt_disable();
- fs_usage = this_cpu_ptr(c->usage_gc);
- fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors);
- fs_usage->replicas[idx] += sectors;
- preempt_enable();
-err:
-fsck_err:
- percpu_up_read(&c->mark_lock);
- printbuf_exit(&buf);
- return ret;
-}
-
-static inline int update_cached_sectors(struct bch_fs *c,
- struct bkey_s_c k,
- unsigned dev, s64 sectors)
-{
- struct bch_replicas_padded r;
-
- bch2_replicas_entry_cached(&r.e, dev);
-
- return bch2_update_replicas(c, k, &r.e, sectors);
-}
-
static int bch2_check_fix_ptr(struct btree_trans *trans,
struct bkey_s_c k,
struct extent_ptr_decoded p,
@@ -574,8 +484,6 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
bool warn = false;
percpu_down_read(&c->mark_lock);
- preempt_disable();
- struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
s64 added = src->btree + src->data + src->reserved;
@@ -603,13 +511,9 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
this_cpu_sub(*c->online_reserved, added);
}
- dst->hidden += src->hidden;
- dst->btree += src->btree;
- dst->data += src->data;
- dst->cached += src->cached;
- dst->reserved += src->reserved;
- dst->nr_inodes += src->nr_inodes;
-
+ preempt_disable();
+ struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
+ acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
preempt_enable();
percpu_up_read(&c->mark_lock);
@@ -691,13 +595,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new);
- if (!ret) {
- alloc_to_bucket(g, new);
- bch2_dev_usage_update(c, ca, &old, &new);
- }
+ alloc_to_bucket(g, new);
bucket_unlock(g);
err_unlock:
percpu_up_read(&c->mark_lock);
+
+ if (!ret)
+ ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
err:
bch2_dev_put(ca);
@@ -742,7 +646,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
};
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = data_type;
- ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1);
+ ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -751,8 +655,6 @@ err:
if (flags & BTREE_TRIGGER_gc) {
struct bch_fs *c = trans->c;
- BUG_ON(!(flags & BTREE_TRIGGER_gc));
-
struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
if (!m) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu",
@@ -775,11 +677,16 @@ err:
m->block_sectors[p.ec.block] += sectors;
- struct bch_replicas_padded r = m->r;
+ struct disk_accounting_pos acc = {
+ .type = BCH_DISK_ACCOUNTING_replicas,
+ };
+ memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
mutex_unlock(&c->ec_stripes_heap_lock);
- r.e.data_type = data_type;
- bch2_update_replicas(c, k, &r.e, sectors);
+ acc.replicas.data_type = data_type;
+ int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, true);
+ if (ret)
+ return ret;
}
return 0;
@@ -791,7 +698,6 @@ static int __trigger_extent(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
bool gc = flags & BTREE_TRIGGER_gc;
- struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -818,11 +724,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (p.ptr.cached) {
if (!stale) {
- ret = !gc
- ? bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors)
- : update_cached_sectors(c, k, p.ptr.dev, disk_sectors);
- bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
- bch2_err_str(ret));
+ ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc);
if (ret)
return ret;
}
@@ -844,16 +746,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (acc.replicas.nr_devs) {
- ret = !gc
- ? bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1)
- : bch2_update_replicas(c, k, &acc.replicas, replicas_sectors);
- if (unlikely(ret && gc)) {
- struct printbuf buf = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
- printbuf_exit(&buf);
- }
+ ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc);
if (ret)
return ret;
}
@@ -906,36 +799,18 @@ static int __trigger_reservation(struct btree_trans *trans,
enum btree_id btree_id, unsigned level, struct bkey_s_c k,
enum btree_iter_update_trigger_flags flags)
{
- struct bch_fs *c = trans->c;
- unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
- s64 sectors = (s64) k.k->size;
+ if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
+ s64 sectors = k.k->size;
- if (flags & BTREE_TRIGGER_overwrite)
- sectors = -sectors;
+ if (flags & BTREE_TRIGGER_overwrite)
+ sectors = -sectors;
- if (flags & BTREE_TRIGGER_transactional) {
struct disk_accounting_pos acc = {
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
- .persistent_reserved.nr_replicas = replicas,
+ .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
};
- return bch2_disk_accounting_mod(trans, &acc, &sectors, 1);
- }
-
- if (flags & BTREE_TRIGGER_gc) {
- sectors *= replicas;
-
- percpu_down_read(&c->mark_lock);
- preempt_disable();
-
- struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
-
- replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
- fs_usage->b.reserved += sectors;
- fs_usage->persistent_reserved[replicas - 1] += sectors;
-
- preempt_enable();
- percpu_up_read(&c->mark_lock);
+ return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, flags & BTREE_TRIGGER_gc);
}
return 0;
@@ -989,10 +864,13 @@ err:
return ret;
}
-static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca,
u64 b, enum bch_data_type data_type, unsigned sectors,
enum btree_iter_update_trigger_flags flags)
{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, b);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
@@ -1019,9 +897,10 @@ static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
g->data_type = data_type;
g->dirty_sectors += sectors;
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
- bch2_dev_usage_update(c, ca, &old, &new);
+ bucket_unlock(g);
percpu_up_read(&c->mark_lock);
- return 0;
+ ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
+ return ret;
err:
bucket_unlock(g);
err_unlock:
@@ -1045,7 +924,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
return 0;
if (flags & BTREE_TRIGGER_gc)
- return bch2_mark_metadata_bucket(trans->c, ca, b, type, sectors, flags);
+ return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags);
else if (flags & BTREE_TRIGGER_transactional)
return commit_do(trans, NULL, NULL, 0,
__bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 42ff3e9df587..fc6359f84e82 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -273,16 +273,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca,
/* Filesystem usage: */
-static inline unsigned __fs_usage_u64s(unsigned nr_replicas)
-{
- return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas;
-}
-
-static inline unsigned fs_usage_u64s(struct bch_fs *c)
-{
- return __fs_usage_u64s(READ_ONCE(c->replicas.nr));
-}
-
static inline unsigned dev_usage_u64s(void)
{
return sizeof(struct bch_dev_usage) / sizeof(u64);
@@ -291,12 +281,6 @@ static inline unsigned dev_usage_u64s(void)
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
-void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *,
- const struct bch_alloc_v4 *,
- const struct bch_alloc_v4 *);
-int bch2_update_replicas(struct bch_fs *, struct bkey_s_c,
- struct bch_replicas_entry_v1 *, s64);
-
int bch2_bucket_ref_update(struct btree_trans *, struct bch_dev *,
struct bkey_s_c, const struct bch_extent_ptr *,
s64, enum bch_data_type, u8, u8, u32 *);
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 7ad15f809348..c9698cdf866f 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -57,13 +57,6 @@ struct bch_fs_usage_base {
u64 nr_inodes;
};
-struct bch_fs_usage {
- /* all fields are in units of 512 byte sectors: */
- struct bch_fs_usage_base b;
- u64 persistent_reserved[BCH_REPLICAS_MAX];
- u64 replicas[];
-};
-
struct bch_fs_usage_short {
u64 capacity;
u64 used;
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index f5b5d896979e..e8dfd67eab8a 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -66,9 +66,20 @@ static const char * const disk_accounting_type_strs[] = {
NULL
};
+static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_pos *pos,
+ s64 *d, unsigned nr)
+{
+ struct bkey_i_accounting *acc = bkey_accounting_init(k);
+
+ acc->k.p = disk_accounting_pos_to_bpos(pos);
+ set_bkey_val_u64s(&acc->k, sizeof(struct bch_accounting) / sizeof(u64) + nr);
+
+ memcpy_u64s_small(acc->v.d, d, nr);
+}
+
int bch2_disk_accounting_mod(struct btree_trans *trans,
struct disk_accounting_pos *k,
- s64 *d, unsigned nr)
+ s64 *d, unsigned nr, bool gc)
{
/* Normalize: */
switch (k->type) {
@@ -79,21 +90,18 @@ int bch2_disk_accounting_mod(struct btree_trans *trans,
BUG_ON(nr > BCH_ACCOUNTING_MAX_COUNTERS);
- struct {
- __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS);
- } k_i;
- struct bkey_i_accounting *acc = bkey_accounting_init(&k_i.k);
-
- acc->k.p = disk_accounting_pos_to_bpos(k);
- set_bkey_val_u64s(&acc->k, sizeof(struct bch_accounting) / sizeof(u64) + nr);
+ struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i;
- memcpy_u64s_small(acc->v.d, d, nr);
+ accounting_key_init(&k_i.k, k, d, nr);
- return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &acc->k_i);
+ return likely(!gc)
+ ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k)
+ : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
}
int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
- unsigned dev, s64 sectors)
+ unsigned dev, s64 sectors,
+ bool gc)
{
struct disk_accounting_pos acc = {
.type = BCH_DISK_ACCOUNTING_replicas,
@@ -101,7 +109,7 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
bch2_replicas_entry_cached(&acc.replicas, dev);
- return bch2_disk_accounting_mod(trans, &acc, &sectors, 1);
+ return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
}
int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -199,7 +207,7 @@ int bch2_accounting_update_sb(struct btree_trans *trans)
return 0;
}
-static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a)
+static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc)
{
struct bch_replicas_padded r;
@@ -207,7 +215,7 @@ static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_
!bch2_replicas_marked_locked(c, &r.e))
return -BCH_ERR_btree_insert_need_mark_replicas;
- struct bch_accounting_mem *acc = &c->accounting;
+ struct bch_accounting_mem *acc = &c->accounting[gc];
unsigned new_nr_counters = acc->nr_counters + bch2_accounting_counters(a.k);
u64 __percpu *new_counters = __alloc_percpu_gfp(new_nr_counters * sizeof(u64),
@@ -243,11 +251,11 @@ static int __bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_
return 0;
}
-int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a)
+int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc)
{
percpu_up_read(&c->mark_lock);
percpu_down_write(&c->mark_lock);
- int ret = __bch2_accounting_mem_mod_slowpath(c, a);
+ int ret = __bch2_accounting_mem_mod_slowpath(c, a, gc);
percpu_up_write(&c->mark_lock);
percpu_down_read(&c->mark_lock);
return ret;
@@ -263,7 +271,7 @@ int bch2_accounting_mem_mod_slowpath(struct bch_fs *c, struct bkey_s_c_accountin
*/
int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
{
- struct bch_accounting_mem *acc = &c->accounting;
+ struct bch_accounting_mem *acc = &c->accounting[0];
int ret = 0;
darray_init(usage);
@@ -296,6 +304,129 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
return ret;
}
+/* Ensures all counters in @src exist in @dst: */
+static int copy_counters(struct bch_accounting_mem *dst,
+ struct bch_accounting_mem *src)
+{
+ unsigned orig_dst_k_nr = dst->k.nr;
+ unsigned dst_counters = dst->nr_counters;
+
+ darray_for_each(src->k, i)
+ if (eytzinger0_find(dst->k.data, orig_dst_k_nr, sizeof(dst->k.data[0]),
+ accounting_pos_cmp, &i->pos) >= orig_dst_k_nr) {
+ if (darray_push(&dst->k, ((struct accounting_pos_offset) {
+ .pos = i->pos,
+ .offset = dst_counters,
+ .nr_counters = i->nr_counters })))
+ goto err;
+
+ dst_counters += i->nr_counters;
+ }
+
+ if (dst->k.nr == orig_dst_k_nr)
+ return 0;
+
+ u64 __percpu *new_counters = __alloc_percpu_gfp(dst_counters * sizeof(u64),
+ sizeof(u64), GFP_KERNEL);
+ if (!new_counters)
+ goto err;
+
+ preempt_disable();
+ memcpy(this_cpu_ptr(new_counters),
+ bch2_acc_percpu_u64s(dst->v, dst->nr_counters),
+ dst->nr_counters * sizeof(u64));
+ preempt_enable();
+
+ free_percpu(dst->v);
+ dst->v = new_counters;
+ dst->nr_counters = dst_counters;
+
+ eytzinger0_sort(dst->k.data, dst->k.nr, sizeof(dst->k.data[0]), accounting_pos_cmp, NULL);
+
+ return 0;
+err:
+ dst->k.nr = orig_dst_k_nr;
+ return -BCH_ERR_ENOMEM_disk_accounting;
+}
+
+int bch2_accounting_gc_done(struct bch_fs *c)
+{
+ struct bch_accounting_mem *dst = &c->accounting[0];
+ struct bch_accounting_mem *src = &c->accounting[1];
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ percpu_down_write(&c->mark_lock);
+
+ ret = copy_counters(dst, src) ?:
+ copy_counters(src, dst);
+ if (ret)
+ goto err;
+
+ BUG_ON(dst->k.nr != src->k.nr);
+
+ for (unsigned i = 0; i < src->k.nr; i++) {
+ BUG_ON(src->k.data[i].nr_counters != dst->k.data[i].nr_counters);
+ BUG_ON(!bpos_eq(dst->k.data[i].pos, src->k.data[i].pos));
+
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, src->k.data[i].pos);
+
+ unsigned nr = src->k.data[i].nr_counters;
+ u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS];
+ u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS];
+
+ bch2_accounting_mem_read_counters(c, i, dst_v, nr, false);
+ bch2_accounting_mem_read_counters(c, i, src_v, nr, true);
+
+ if (memcmp(dst_v, src_v, nr * sizeof(u64))) {
+ printbuf_reset(&buf);
+ prt_str(&buf, "accounting mismatch for ");
+ bch2_accounting_key_to_text(&buf, &acc_k);
+
+ prt_str(&buf, ": got");
+ for (unsigned j = 0; j < nr; j++)
+ prt_printf(&buf, " %llu", dst_v[j]);
+
+ prt_str(&buf, " should be");
+ for (unsigned j = 0; j < nr; j++)
+ prt_printf(&buf, " %llu", src_v[j