// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_buf.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
#include "disk_groups.h"
#include "inode.h"
#include "io.h"
#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
#include "super-io.h"
#include "trace.h"
#include <linux/ioprio.h>
#include <linux/kthread.h>
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
struct moving_io {
struct list_head list;
struct closure cl;
bool read_completed;
unsigned read_sectors;
unsigned write_sectors;
struct bch_read_bio rbio;
struct migrate_write write;
/* Must be last since it is variable size */
struct bio_vec bi_inline_vecs[0];
};
struct moving_context {
/* Closure for waiting on all reads and writes to complete */
struct closure cl;
struct bch_move_stats *stats;
struct list_head reads;
/* in flight sectors: */
atomic_t read_sectors;
atomic_t write_sectors;
wait_queue_head_t wait;
};
static int bch2_migrate_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct btree_trans trans;
struct btree_iter *iter;
struct migrate_write *m =
container_of(op, struct migrate_write, op);
struct keylist *keys = &op->insert_keys;
struct bkey_buf _new, _insert;
int ret = 0;
bch2_bkey_buf_init(&_new);
bch2_bkey_buf_init(&_insert);
bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, m->btree_id,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (1) {
struct bkey_s_c k;
struct bkey_i *insert;
struct bkey_i_extent *new;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
bool did_work = false;
bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_reset(&trans, 0);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
new = bkey_i_to_extent(bch2_keylist_front(keys));
if (bversion_cmp(k.k->version, new->k.version) ||
!bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
goto nomatch;
bkey_reassemble(_insert.k, k);
insert = _insert.k;
bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
new = bkey_i_to_extent(_new.k);
bch2_cut_front(iter->pos, &new->k_i);
bch2_cut_front(iter->pos, insert);
bch2_cut_back(new->k.p, insert);
bch2_cut_back(insert->k.p, &new->k_i);
if (m->data_cmd == DATA_REWRITE) {
struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
bch2_bkey_has_device(bkey_i_to_s_c(insert),
m->data_opts.rewrite_dev);
if (!old_ptr)
goto nomatch;
if (old_ptr->cached)
extent_for_each_ptr(extent_i_to_s(new), new_ptr)
new_ptr->cached = true;
bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
}
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
/*
* raced with another move op? extent already
* has a pointer to the device we just wrote
* data to
*/
continue;
}
bch2_extent_ptr_decoded_append(insert, &p);
did_work = true;
}
if (!did_work)
goto nomatch;
bch2_bkey_narrow_crcs(insert,
(struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, bkey_i_to_s(insert));
bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
op->opts.background_target,
op->opts.data_replicas);
ret = bch2_sum_sector_overwrites(&trans, iter, insert,
&extending,
&should_check_enospc,
&i_sectors_delta,
&disk_sectors_delta);
if (ret)
goto err;
if (disk_sectors_delta > (s64) op->res.sectors) {
ret = bch2_disk_reservation_add(c, &op->res,
disk_sectors_delta - op->res.sectors,
!should_check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
goto out;
}
bch2_trans_update(&trans, iter, insert, 0);
ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
BTREE_INSERT_NOFAIL|
m->data_opts.btree_insert_flags);
err:
if (!ret)
atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR)
ret = 0;
if (ret)
break;
next:
while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
bch2_keylist_pop_front(keys);
if (bch2_keylist_empty(keys))
goto out;
}
continue;
nomatch:
if (m->ctxt) {
BUG_ON(k.k->p.offset <= iter->pos.offset);
atomic64_inc(&m->ctxt->stats->keys_raced);
atomic64_add(k.k->p.offset - iter->pos.offset,
&m-><