diff options
| author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-09-10 18:05:17 -0400 |
|---|---|---|
| committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:10:12 -0400 |
| commit | 1809b8cba756d32bd6e976ed4ee64efdf66c6d94 (patch) | |
| tree | f25287cf7f337aa8b8a093a91e0478a909833b8c | |
| parent | cbf57db53f311b09de2c17b514e104d421d72871 (diff) | |
| download | linux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.tar.gz linux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.tar.bz2 linux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.zip | |
bcachefs: Break up io.c
More reorganization, this splits up io.c into
- io_read.c
- io_misc.c - fallocate, fpunch, truncate
- io_write.c
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
34 files changed, 1751 insertions, 1692 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 0a4d2fed66c1..9c00dabb26ac 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -46,7 +46,9 @@ bcachefs-y := \ fs-io-pagecache.o \ fsck.o \ inode.o \ - io.o \ + io_read.o \ + io_misc.o \ + io_write.o \ journal.o \ journal_io.o \ journal_reclaim.o \ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index e02749ddc362..8e1888a89011 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -25,7 +25,7 @@ #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "io.h" +#include "io_write.h" #include "journal.h" #include "movinggc.h" #include "nocow_locking.h" diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 0edbb73a5ec8..00f53cb5d44b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -14,7 +14,7 @@ #include "debug.h" #include "error.h" #include "extents.h" -#include "io.h" +#include "io_write.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" #include "recovery.h" diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index cd99bbb00a5a..7e03dd76fb38 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -7,7 +7,7 @@ #include "btree_locking.h" #include "checksum.h" #include "extents.h" -#include "io_types.h" +#include "io_write_types.h" struct bch_fs; struct btree_write; diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 6b17f7cc5860..f1651807c2b7 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -3,7 +3,6 @@ #include "checksum.h" #include "compress.h" #include "extents.h" -#include "io.h" #include "super-io.h" #include <linux/lz4.h> diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 81518f20d37d..29576c4c109d 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -9,7 +9,7 @@ #include "ec.h" #include "error.h" #include "extents.h" -#include "io.h" +#include "io_write.h" #include "keylist.h" #include "move.h" #include "nocow_locking.h" diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 49e9055cbb52..7ca1f98d7e94 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -4,7 +4,7 @@ #define _BCACHEFS_DATA_UPDATE_H #include "bkey_buf.h" -#include "io_types.h" +#include "io_write_types.h" struct moving_context; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ae47e1854b80..5f3e65f9069e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -19,7 +19,6 @@ #include "extents.h" #include "fsck.h" #include "inode.h" -#include "io.h" #include "super.h" #include <linux/console.h> diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 67a5453a36d9..40e72b96745a 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -11,10 +11,11 @@ #include "btree_update.h" #include "btree_write_buffer.h" #include "buckets.h" +#include "checksum.h" #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "io.h" +#include "io_read.h" #include "keylist.h" #include "recovery.h" #include "replicas.h" diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index dc906fc9176f..8d58f2cca260 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -61,3 +61,10 @@ int __bch2_err_class(int err) return -err; } + +const char *bch2_blk_status_to_str(blk_status_t status) +{ + if (status == BLK_STS_REMOVED) + return "device removed"; + return blk_status_to_str(status); +} diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index f7fa87442e98..379d9d7ed333 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -249,4 +249,8 @@ static inline long bch2_err_class(long err) return err < 0 ? __bch2_err_class(err) : err; } +#define BLK_STS_REMOVED ((__force blk_status_t)128) + +const char *bch2_blk_status_to_str(blk_status_t); + #endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 39009cf0c448..2a5af8872613 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "error.h" -#include "io.h" #include "super.h" #define FSCK_ERR_RATELIMIT_NR 10 diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index dc22182d532f..2034d635c718 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -8,7 +8,8 @@ #include "fs-io-buffered.h" #include "fs-io-direct.h" #include "fs-io-pagecache.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include <linux/backing-dev.h> #include <linux/pagemap.h> diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 2b29abd24d56..219bc1124477 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -7,7 +7,8 @@ #include "fs-io.h" #include "fs-io-direct.h" #include "fs-io-pagecache.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include <linux/kthread.h> #include <linux/pagemap.h> diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index ceab12fb8a8f..0b0b3b0d6c7d 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -16,7 +17,7 @@ #include "fsck.h" #include "inode.h" #include "journal.h" -#include "io.h" +#include "io_misc.h" #include "keylist.h" #include "quota.h" #include "reflink.h" diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index bb5b709fa8cf..bc6e8439d40b 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -6,7 +6,7 @@ #include "buckets.h" #include "fs.h" -#include "io_types.h" +#include "io_write_types.h" #include "quota.h" #include <linux/uio.h> diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 08f810992a1b..0648874d54f3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -19,7 +19,7 @@ #include "fs-io-pagecache.h" #include "fsck.h" #include "inode.h" -#include "io.h" +#include "io_read.h" #include "journal.h" #include "keylist.h" #include "quota.h" diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h deleted file mode 100644 index 831e3f1b7e41..000000000000 --- a/fs/bcachefs/io.h +++ /dev/null @@ -1,202 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_IO_H -#define _BCACHEFS_IO_H - -#include "checksum.h" -#include "bkey_buf.h" -#include "io_types.h" - -#define to_wbio(_bio) \ - container_of((_bio), struct bch_write_bio, bio) - -#define to_rbio(_bio) \ - container_of((_bio), struct bch_read_bio, bio) - -void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); -void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); - -#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -void bch2_latency_acct(struct bch_dev *, u64, int); -#else -static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} -#endif - -void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, - enum bch_data_type, const struct bkey_i *, bool); - -#define BLK_STS_REMOVED ((__force blk_status_t)128) - -const char *bch2_blk_status_to_str(blk_status_t); - -#define BCH_WRITE_FLAGS() \ - x(ALLOC_NOWAIT) \ - x(CACHED) \ - x(DATA_ENCODED) \ - x(PAGES_STABLE) \ - x(PAGES_OWNED) \ - x(ONLY_SPECIFIED_DEVS) \ - x(WROTE_DATA_INLINE) \ - x(FROM_INTERNAL) \ - x(CHECK_ENOSPC) \ - x(SYNC) \ - x(MOVE) \ - x(IN_WORKER) \ - x(DONE) \ - x(IO_ERROR) \ - x(CONVERT_UNWRITTEN) - -enum __bch_write_flags { -#define x(f) __BCH_WRITE_##f, - BCH_WRITE_FLAGS() -#undef x -}; - -enum bch_write_flags { -#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), - BCH_WRITE_FLAGS() -#undef x -}; - -static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) -{ - return op->watermark == BCH_WATERMARK_copygc - ? op->c->copygc_wq - : op->c->btree_update_wq; -} - -int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, - struct bkey_i *, bool *, s64 *, s64 *); -int bch2_extent_update(struct btree_trans *, subvol_inum, - struct btree_iter *, struct bkey_i *, - struct disk_reservation *, u64, s64 *, bool); -int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, - unsigned, struct bch_io_opts, s64 *, - struct write_point_specifier); - -int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, - subvol_inum, u64, s64 *); -int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); - -static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, - struct bch_io_opts opts) -{ - op->c = c; - op->end_io = NULL; - op->flags = 0; - op->written = 0; - op->error = 0; - op->csum_type = bch2_data_checksum_type(c, opts); - op->compression_opt = opts.compression; - op->nr_replicas = 0; - op->nr_replicas_required = c->opts.data_replicas_required; - op->watermark = BCH_WATERMARK_normal; - op->incompressible = 0; - op->open_buckets.nr = 0; - op->devs_have.nr = 0; - op->target = 0; - op->opts = opts; - op->subvol = 0; - op->pos = POS_MAX; - op->version = ZERO_VERSION; - op->write_point = (struct write_point_specifier) { 0 }; - op->res = (struct disk_reservation) { 0 }; - op->new_i_size = U64_MAX; - op->i_sectors_delta = 0; - op->devs_need_flush = NULL; -} - -void bch2_write(struct closure *); - -void bch2_write_point_do_index_updates(struct work_struct *); - -static inline struct bch_write_bio *wbio_init(struct bio *bio) -{ - struct bch_write_bio *wbio = to_wbio(bio); - - memset(&wbio->wbio, 0, sizeof(wbio->wbio)); - return wbio; -} - -void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); - -struct bch_devs_mask; -struct cache_promote_op; -struct extent_ptr_decoded; - -int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, - struct bkey_buf *); - -static inline int bch2_read_indirect_extent(struct btree_trans *trans, - enum btree_id *data_btree, - unsigned *offset_into_extent, - struct bkey_buf *k) -{ - if (k->k->k.type != KEY_TYPE_reflink_p) - return 0; - - *data_btree = BTREE_ID_reflink; - return __bch2_read_indirect_extent(trans, offset_into_extent, k); -} - -enum bch_read_flags { - BCH_READ_RETRY_IF_STALE = 1 << 0, - BCH_READ_MAY_PROMOTE = 1 << 1, - BCH_READ_USER_MAPPED = 1 << 2, - BCH_READ_NODECODE = 1 << 3, - BCH_READ_LAST_FRAGMENT = 1 << 4, - - /* internal: */ - BCH_READ_MUST_BOUNCE = 1 << 5, - BCH_READ_MUST_CLONE = 1 << 6, - BCH_READ_IN_RETRY = 1 << 7, -}; - -int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, - struct bvec_iter, struct bpos, enum btree_id, - struct bkey_s_c, unsigned, - struct bch_io_failures *, unsigned); - -static inline void bch2_read_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, struct bpos read_pos, - enum btree_id data_btree, struct bkey_s_c k, - unsigned offset_into_extent, unsigned flags) -{ - __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, - data_btree, k, offset_into_extent, NULL, flags); -} - -void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, - subvol_inum, struct bch_io_failures *, unsigned flags); - -static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - subvol_inum inum) -{ - struct bch_io_failures failed = { .nr = 0 }; - - BUG_ON(rbio->_state); - - rbio->c = c; - rbio->start_time = local_clock(); - rbio->subvol = inum.subvol; - - __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, - BCH_READ_RETRY_IF_STALE| - BCH_READ_MAY_PROMOTE| - BCH_READ_USER_MAPPED); -} - -static inline struct bch_read_bio *rbio_init(struct bio *bio, - struct bch_io_opts opts) -{ - struct bch_read_bio *rbio = to_rbio(bio); - - rbio->_state = 0; - rbio->promote = NULL; - rbio->opts = opts; - return rbio; -} - -void bch2_fs_io_exit(struct bch_fs *); -int bch2_fs_io_init(struct bch_fs *); - -#endif /* _BCACHEFS_IO_H */ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c new file mode 100644 index 000000000000..c04e5dacfc8d --- /dev/null +++ b/fs/bcachefs/io_misc.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * io_misc.c - fallocate, fpunch, truncate: + */ + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "bkey_buf.h" +#include "btree_update.h" +#include "buckets.h" +#include "clock.h" +#include "extents.h" +#include "io_misc.h" +#include "io_write.h" +#include "subvolume.h" + +/* Overwrites whatever was present with zeroes: */ +int bch2_extent_fallocate(struct btree_trans *trans, + subvol_inum inum, + struct btree_iter *iter, + unsigned sectors, + struct bch_io_opts opts, + s64 *i_sectors_delta, + struct write_point_specifier write_point) +{ + struct bch_fs *c = trans->c; + struct disk_reservation disk_res = { 0 }; + struct closure cl; + struct open_buckets open_buckets = { 0 }; + struct bkey_s_c k; + struct bkey_buf old, new; + unsigned sectors_allocated = 0; + bool have_reservation = false; + bool unwritten = opts.nocow && + c->sb.version >= bcachefs_metadata_version_unwritten_extents; + int ret; + + bch2_bkey_buf_init(&old); + bch2_bkey_buf_init(&new); + closure_init_stack(&cl); + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + return ret; + + sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); + + if (!have_reservation) { + unsigned new_replicas = + max(0, (int) opts.data_replicas - + (int) bch2_bkey_nr_ptrs_fully_allocated(k)); + /* + * Get a disk reservation before (in the nocow case) calling + * into the allocator: + */ + ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); + if (unlikely(ret)) + goto err; + + bch2_bkey_buf_reassemble(&old, c, k); + } + + if (have_reservation) { + if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) + goto err; + + bch2_key_resize(&new.k->k, sectors); + } else if (!unwritten) { + struct bkey_i_reservation *reservation; + + bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); + reservation = bkey_reservation_init(new.k); + reservation->k.p = iter->pos; + bch2_key_resize(&reservation->k, sectors); + reservation->v.nr_replicas = opts.data_replicas; + } else { + struct bkey_i_extent *e; + struct bch_devs_list devs_have; + struct write_point *wp; + struct bch_extent_ptr *ptr; + + devs_have.nr = 0; + + bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); + + e = bkey_extent_init(new.k); + e->k.p = iter->pos; + + ret = bch2_alloc_sectors_start_trans(trans, + opts.foreground_target, + false, + write_point, + &devs_have, + opts.data_replicas, + opts.data_replicas, + BCH_WATERMARK_normal, 0, &cl, &wp); + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) + ret = -BCH_ERR_transaction_restart_nested; + if (ret) + goto err; + + sectors = min(sectors, wp->sectors_free); + sectors_allocated = sectors; + + bch2_key_resize(&e->k, sectors); + + bch2_open_bucket_get(c, wp, &open_buckets); + bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); + bch2_alloc_sectors_done(c, wp); + + extent_for_each_ptr(extent_i_to_s(e), ptr) + ptr->unwritten = true; + } + + have_reservation = true; + + ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, + 0, i_sectors_delta, true); +err: + if (!ret && sectors_allocated) + bch2_increment_clock(c, sectors_allocated, WRITE); + + bch2_open_buckets_put(c, &open_buckets); + bch2_disk_reservation_put(c, &disk_res); + bch2_bkey_buf_exit(&new, c); + bch2_bkey_buf_exit(&old, c); + + if (closure_nr_remaining(&cl) != 1) { + bch2_trans_unlock(trans); + closure_sync(&cl); + } + + return ret; +} + +/* + * Returns -BCH_ERR_transacton_restart if we had to drop locks: + */ +int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, + subvol_inum inum, u64 end, + s64 *i_sectors_delta) +{ + struct bch_fs *c = trans->c; + unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); + struct bpos end_pos = POS(inum.inum, end); + struct bkey_s_c k; + int ret = 0, ret2 = 0; + u32 snapshot; + + while (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(c, 0); + struct bkey_i delete; + + if (ret) + ret2 = ret; + + bch2_trans_begin(trans); + + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + continue; + + bch2_btree_iter_set_snapshot(iter, snapshot); + + /* + * peek_upto() doesn't have ideal semantics for extents: + */ + k = bch2_btree_iter_peek_upto(iter, end_pos); + if (!k.k) + break; + + ret = bkey_err(k); + if (ret) + continue; + + bkey_init(&delete.k); + delete.k.p = iter->pos; + + /* create the biggest key we can */ + bch2_key_resize(&delete.k, max_sectors); + bch2_cut_back(end_pos, &delete); + + ret = bch2_extent_update(trans, inum, iter, &delete, + &disk_res, 0, i_sectors_delta, false); + bch2_disk_reservation_put(c, &disk_res); + } + + return ret ?: ret2; +} + +int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, + s64 *i_sectors_delta) +{ + struct btree_trans trans; + struct btree_iter iter; + int ret; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(inum.inum, start), + BTREE_ITER_INTENT); + + ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); + + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + + return ret; +} diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h new file mode 100644 index 000000000000..46e9ce3251d6 --- /dev/null +++ b/fs/bcachefs/io_misc.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_IO_MISC_H +#define _BCACHEFS_IO_MISC_H + +int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, + unsigned, struct bch_io_opts, s64 *, + struct write_point_specifier); +int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, + subvol_inum, u64, s64 *); +int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); + +#endif /* _BCACHEFS_IO_MISC_H */ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c new file mode 100644 index 000000000000..cd62bf730396 --- /dev/null +++ b/fs/bcachefs/io_read.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Some low level IO code, and hacks for various block layer limitations + * + * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> + * Copyright 2012 Google, Inc. + */ + +#include "bcachefs.h" +#include "alloc_background.h" +#include "alloc_foreground.h" +#include "btree_update.h" +#include "buckets.h" +#include "checksum.h" +#include "clock.h" +#include "compress.h" +#include "data_update.h" +#include "disk_groups.h" +#include "ec.h" +#include "error.h" +#include "io_read.h" +#include "io_misc.h" +#include "io_write.h" +#include "subvolume.h" +#include "trace.h" + +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT + +static bool bch2_target_congested(struct bch_fs *c, u16 target) +{ + const struct bch_devs_mask *devs; + unsigned d, nr = 0, total = 0; + u64 now = local_clock(), last; + s64 congested; + struct bch_dev *ca; + + if (!target) + return false; + + rcu_read_lock(); + devs = bch2_target_to_mask(c, target) ?: + &c->rw_devs[BCH_DATA_user]; + + for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { + ca = rcu_dereference(c->devs[d]); + if (!ca) + continue; + + congested = atomic_read(&ca->congested); + last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + total += max(congested, 0LL); + nr++; + } + rcu_read_unlock(); + + return bch2_rand_range(nr * CONGESTED_MAX) < total; +} + +#else + +static bool bch2_target_congested(struct bch_fs *c, u16 target) +{ + return false; +} + +#endif + +/* Cache promotion on read */ + +struct promote_op { + struct rcu_head rcu; + u64 start_time; + + struct rhash_head hash; + struct bpos pos; + + struct data_update write; + struct bio_vec bi_inline_vecs[0]; /* must be last */ +}; + +static const struct rhashtable_params bch_promote_params = { + .head_offset = offsetof(struct promote_op, hash), + .key_offset = offsetof(struct promote_op, pos), + .key_len = sizeof(struct bpos), +}; + +static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, + struct bpos pos, + struct bch_io_opts opts, + unsigned flags) +{ + if (!(flags & BCH_READ_MAY_PROMOTE)) + return false; + + if (!opts.promote_target) + return false; + + if (bch2_bkey_has_target(c, k, opts.promote_target)) + return false; + + if (bkey_extent_is_unwritten(k)) + return false; + + if (bch2_target_congested(c, opts.promote_target)) { + /* XXX trace this */ + return false; + } + + if (rhashtable_lookup_fast(&c->promote_table, &pos, + bch_promote_params)) + return false; + + return true; +} + +static void promote_free(struct bch_fs *c, struct promote_op *op) +{ + int ret; + + bch2_data_update_exit(&op->write); + + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params); + BUG_ON(ret); + bch2_write_ref_put(c, BCH_WRITE_REF_promote); + kfree_rcu(op, rcu); +} + +static void promote_done(struct bch_write_op *wop) +{ + struct promote_op *op = + container_of(wop, struct promote_op, write.op); + struct bch_fs *c = op->write.op.c; + + bch2_time_stats_update(&c->times[BCH_TIME_data_promote], + op->start_time); + promote_free(c, op); +} + +static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) +{ + struct bio *bio = &op->write.op.wbio.bio; + + trace_and_count(op->write.op.c, read_promote, &rbio->bio); + + /* we now own pages: */ + BUG_ON(!rbio->bounce); + BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); + + memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, + sizeof(struct bio_vec) * rbio->bio.bi_vcnt); + swap(bio->bi_vcnt, rbio->bio.bi_vcnt); + + bch2_data_update_read_done(&op->write, rbio->pick.crc); +} + +static struct promote_op *__promote_alloc(struct btree_trans *trans, + enum btree_id btree_id, + struct bkey_s_c k, + struct bpos pos, + struct extent_ptr_decoded *pick, + struct bch_io_opts opts, + unsigned sectors, + struct bch_read_bio |
