summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-09-10 18:05:17 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:10:12 -0400
commit1809b8cba756d32bd6e976ed4ee64efdf66c6d94 (patch)
treef25287cf7f337aa8b8a093a91e0478a909833b8c
parentcbf57db53f311b09de2c17b514e104d421d72871 (diff)
downloadlinux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.tar.gz
linux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.tar.bz2
linux-1809b8cba756d32bd6e976ed4ee64efdf66c6d94.zip
bcachefs: Break up io.c
More reorganization, this splits up io.c into - io_read.c - io_misc.c - fallocate, fpunch, truncate - io_write.c Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/Makefile4
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/btree_io.h2
-rw-r--r--fs/bcachefs/compress.c1
-rw-r--r--fs/bcachefs/data_update.c2
-rw-r--r--fs/bcachefs/data_update.h2
-rw-r--r--fs/bcachefs/debug.c1
-rw-r--r--fs/bcachefs/ec.c3
-rw-r--r--fs/bcachefs/errcode.c7
-rw-r--r--fs/bcachefs/errcode.h4
-rw-r--r--fs/bcachefs/error.c1
-rw-r--r--fs/bcachefs/fs-io-buffered.c3
-rw-r--r--fs/bcachefs/fs-io-direct.c3
-rw-r--r--fs/bcachefs/fs-io.c3
-rw-r--r--fs/bcachefs/fs-io.h2
-rw-r--r--fs/bcachefs/fs.c2
-rw-r--r--fs/bcachefs/io.h202
-rw-r--r--fs/bcachefs/io_misc.c215
-rw-r--r--fs/bcachefs/io_misc.h12
-rw-r--r--fs/bcachefs/io_read.c1207
-rw-r--r--fs/bcachefs/io_read.h158
-rw-r--r--fs/bcachefs/io_write.c (renamed from fs/bcachefs/io.c)1389
-rw-r--r--fs/bcachefs/io_write.h110
-rw-r--r--fs/bcachefs/io_write_types.h (renamed from fs/bcachefs/io_types.h)75
-rw-r--r--fs/bcachefs/journal_io.c1
-rw-r--r--fs/bcachefs/migrate.c2
-rw-r--r--fs/bcachefs/move.c3
-rw-r--r--fs/bcachefs/move.h1
-rw-r--r--fs/bcachefs/movinggc.c8
-rw-r--r--fs/bcachefs/rebalance.c2
-rw-r--r--fs/bcachefs/reflink.c4
-rw-r--r--fs/bcachefs/super-io.c1
-rw-r--r--fs/bcachefs/super.c9
34 files changed, 1751 insertions, 1692 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 0a4d2fed66c1..9c00dabb26ac 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -46,7 +46,9 @@ bcachefs-y := \
fs-io-pagecache.o \
fsck.o \
inode.o \
- io.o \
+ io_read.o \
+ io_misc.o \
+ io_write.o \
journal.o \
journal_io.o \
journal_reclaim.o \
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index e02749ddc362..8e1888a89011 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -25,7 +25,7 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "io.h"
+#include "io_write.h"
#include "journal.h"
#include "movinggc.h"
#include "nocow_locking.h"
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 0edbb73a5ec8..00f53cb5d44b 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -14,7 +14,7 @@
#include "debug.h"
#include "error.h"
#include "extents.h"
-#include "io.h"
+#include "io_write.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "recovery.h"
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index cd99bbb00a5a..7e03dd76fb38 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -7,7 +7,7 @@
#include "btree_locking.h"
#include "checksum.h"
#include "extents.h"
-#include "io_types.h"
+#include "io_write_types.h"
struct bch_fs;
struct btree_write;
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 6b17f7cc5860..f1651807c2b7 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -3,7 +3,6 @@
#include "checksum.h"
#include "compress.h"
#include "extents.h"
-#include "io.h"
#include "super-io.h"
#include <linux/lz4.h>
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 81518f20d37d..29576c4c109d 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -9,7 +9,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
-#include "io.h"
+#include "io_write.h"
#include "keylist.h"
#include "move.h"
#include "nocow_locking.h"
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 49e9055cbb52..7ca1f98d7e94 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -4,7 +4,7 @@
#define _BCACHEFS_DATA_UPDATE_H
#include "bkey_buf.h"
-#include "io_types.h"
+#include "io_write_types.h"
struct moving_context;
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index ae47e1854b80..5f3e65f9069e 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -19,7 +19,6 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
-#include "io.h"
#include "super.h"
#include <linux/console.h>
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 67a5453a36d9..40e72b96745a 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -11,10 +11,11 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
+#include "checksum.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "io.h"
+#include "io_read.h"
#include "keylist.h"
#include "recovery.h"
#include "replicas.h"
diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c
index dc906fc9176f..8d58f2cca260 100644
--- a/fs/bcachefs/errcode.c
+++ b/fs/bcachefs/errcode.c
@@ -61,3 +61,10 @@ int __bch2_err_class(int err)
return -err;
}
+
+const char *bch2_blk_status_to_str(blk_status_t status)
+{
+ if (status == BLK_STS_REMOVED)
+ return "device removed";
+ return blk_status_to_str(status);
+}
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index f7fa87442e98..379d9d7ed333 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -249,4 +249,8 @@ static inline long bch2_err_class(long err)
return err < 0 ? __bch2_err_class(err) : err;
}
+#define BLK_STS_REMOVED ((__force blk_status_t)128)
+
+const char *bch2_blk_status_to_str(blk_status_t);
+
#endif /* _BCACHFES_ERRCODE_H */
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 39009cf0c448..2a5af8872613 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
-#include "io.h"
#include "super.h"
#define FSCK_ERR_RATELIMIT_NR 10
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index dc22182d532f..2034d635c718 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -8,7 +8,8 @@
#include "fs-io-buffered.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include <linux/backing-dev.h>
#include <linux/pagemap.h>
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 2b29abd24d56..219bc1124477 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -7,7 +7,8 @@
#include "fs-io.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
-#include "io.h"
+#include "io_read.h"
+#include "io_write.h"
#include <linux/kthread.h>
#include <linux/pagemap.h>
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index ceab12fb8a8f..0b0b3b0d6c7d 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@@ -16,7 +17,7 @@
#include "fsck.h"
#include "inode.h"
#include "journal.h"
-#include "io.h"
+#include "io_misc.h"
#include "keylist.h"
#include "quota.h"
#include "reflink.h"
diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
index bb5b709fa8cf..bc6e8439d40b 100644
--- a/fs/bcachefs/fs-io.h
+++ b/fs/bcachefs/fs-io.h
@@ -6,7 +6,7 @@
#include "buckets.h"
#include "fs.h"
-#include "io_types.h"
+#include "io_write_types.h"
#include "quota.h"
#include <linux/uio.h>
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 08f810992a1b..0648874d54f3 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -19,7 +19,7 @@
#include "fs-io-pagecache.h"
#include "fsck.h"
#include "inode.h"
-#include "io.h"
+#include "io_read.h"
#include "journal.h"
#include "keylist.h"
#include "quota.h"
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
deleted file mode 100644
index 831e3f1b7e41..000000000000
--- a/fs/bcachefs/io.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_IO_H
-#define _BCACHEFS_IO_H
-
-#include "checksum.h"
-#include "bkey_buf.h"
-#include "io_types.h"
-
-#define to_wbio(_bio) \
- container_of((_bio), struct bch_write_bio, bio)
-
-#define to_rbio(_bio) \
- container_of((_bio), struct bch_read_bio, bio)
-
-void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-
-#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-void bch2_latency_acct(struct bch_dev *, u64, int);
-#else
-static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
-#endif
-
-void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
- enum bch_data_type, const struct bkey_i *, bool);
-
-#define BLK_STS_REMOVED ((__force blk_status_t)128)
-
-const char *bch2_blk_status_to_str(blk_status_t);
-
-#define BCH_WRITE_FLAGS() \
- x(ALLOC_NOWAIT) \
- x(CACHED) \
- x(DATA_ENCODED) \
- x(PAGES_STABLE) \
- x(PAGES_OWNED) \
- x(ONLY_SPECIFIED_DEVS) \
- x(WROTE_DATA_INLINE) \
- x(FROM_INTERNAL) \
- x(CHECK_ENOSPC) \
- x(SYNC) \
- x(MOVE) \
- x(IN_WORKER) \
- x(DONE) \
- x(IO_ERROR) \
- x(CONVERT_UNWRITTEN)
-
-enum __bch_write_flags {
-#define x(f) __BCH_WRITE_##f,
- BCH_WRITE_FLAGS()
-#undef x
-};
-
-enum bch_write_flags {
-#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
- BCH_WRITE_FLAGS()
-#undef x
-};
-
-static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-{
- return op->watermark == BCH_WATERMARK_copygc
- ? op->c->copygc_wq
- : op->c->btree_update_wq;
-}
-
-int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, bool *, s64 *, s64 *);
-int bch2_extent_update(struct btree_trans *, subvol_inum,
- struct btree_iter *, struct bkey_i *,
- struct disk_reservation *, u64, s64 *, bool);
-int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
- unsigned, struct bch_io_opts, s64 *,
- struct write_point_specifier);
-
-int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
- subvol_inum, u64, s64 *);
-int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
-
-static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
- struct bch_io_opts opts)
-{
- op->c = c;
- op->end_io = NULL;
- op->flags = 0;
- op->written = 0;
- op->error = 0;
- op->csum_type = bch2_data_checksum_type(c, opts);
- op->compression_opt = opts.compression;
- op->nr_replicas = 0;
- op->nr_replicas_required = c->opts.data_replicas_required;
- op->watermark = BCH_WATERMARK_normal;
- op->incompressible = 0;
- op->open_buckets.nr = 0;
- op->devs_have.nr = 0;
- op->target = 0;
- op->opts = opts;
- op->subvol = 0;
- op->pos = POS_MAX;
- op->version = ZERO_VERSION;
- op->write_point = (struct write_point_specifier) { 0 };
- op->res = (struct disk_reservation) { 0 };
- op->new_i_size = U64_MAX;
- op->i_sectors_delta = 0;
- op->devs_need_flush = NULL;
-}
-
-void bch2_write(struct closure *);
-
-void bch2_write_point_do_index_updates(struct work_struct *);
-
-static inline struct bch_write_bio *wbio_init(struct bio *bio)
-{
- struct bch_write_bio *wbio = to_wbio(bio);
-
- memset(&wbio->wbio, 0, sizeof(wbio->wbio));
- return wbio;
-}
-
-void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
-
-struct bch_devs_mask;
-struct cache_promote_op;
-struct extent_ptr_decoded;
-
-int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
- struct bkey_buf *);
-
-static inline int bch2_read_indirect_extent(struct btree_trans *trans,
- enum btree_id *data_btree,
- unsigned *offset_into_extent,
- struct bkey_buf *k)
-{
- if (k->k->k.type != KEY_TYPE_reflink_p)
- return 0;
-
- *data_btree = BTREE_ID_reflink;
- return __bch2_read_indirect_extent(trans, offset_into_extent, k);
-}
-
-enum bch_read_flags {
- BCH_READ_RETRY_IF_STALE = 1 << 0,
- BCH_READ_MAY_PROMOTE = 1 << 1,
- BCH_READ_USER_MAPPED = 1 << 2,
- BCH_READ_NODECODE = 1 << 3,
- BCH_READ_LAST_FRAGMENT = 1 << 4,
-
- /* internal: */
- BCH_READ_MUST_BOUNCE = 1 << 5,
- BCH_READ_MUST_CLONE = 1 << 6,
- BCH_READ_IN_RETRY = 1 << 7,
-};
-
-int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
- struct bvec_iter, struct bpos, enum btree_id,
- struct bkey_s_c, unsigned,
- struct bch_io_failures *, unsigned);
-
-static inline void bch2_read_extent(struct btree_trans *trans,
- struct bch_read_bio *rbio, struct bpos read_pos,
- enum btree_id data_btree, struct bkey_s_c k,
- unsigned offset_into_extent, unsigned flags)
-{
- __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
- data_btree, k, offset_into_extent, NULL, flags);
-}
-
-void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
- subvol_inum, struct bch_io_failures *, unsigned flags);
-
-static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
- subvol_inum inum)
-{
- struct bch_io_failures failed = { .nr = 0 };
-
- BUG_ON(rbio->_state);
-
- rbio->c = c;
- rbio->start_time = local_clock();
- rbio->subvol = inum.subvol;
-
- __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
- BCH_READ_RETRY_IF_STALE|
- BCH_READ_MAY_PROMOTE|
- BCH_READ_USER_MAPPED);
-}
-
-static inline struct bch_read_bio *rbio_init(struct bio *bio,
- struct bch_io_opts opts)
-{
- struct bch_read_bio *rbio = to_rbio(bio);
-
- rbio->_state = 0;
- rbio->promote = NULL;
- rbio->opts = opts;
- return rbio;
-}
-
-void bch2_fs_io_exit(struct bch_fs *);
-int bch2_fs_io_init(struct bch_fs *);
-
-#endif /* _BCACHEFS_IO_H */
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
new file mode 100644
index 000000000000..c04e5dacfc8d
--- /dev/null
+++ b/fs/bcachefs/io_misc.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * io_misc.c - fallocate, fpunch, truncate:
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "extents.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "subvolume.h"
+
+/* Overwrites whatever was present with zeroes: */
+int bch2_extent_fallocate(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ unsigned sectors,
+ struct bch_io_opts opts,
+ s64 *i_sectors_delta,
+ struct write_point_specifier write_point)
+{
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = { 0 };
+ struct closure cl;
+ struct open_buckets open_buckets = { 0 };
+ struct bkey_s_c k;
+ struct bkey_buf old, new;
+ unsigned sectors_allocated = 0;
+ bool have_reservation = false;
+ bool unwritten = opts.nocow &&
+ c->sb.version >= bcachefs_metadata_version_unwritten_extents;
+ int ret;
+
+ bch2_bkey_buf_init(&old);
+ bch2_bkey_buf_init(&new);
+ closure_init_stack(&cl);
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
+
+ if (!have_reservation) {
+ unsigned new_replicas =
+ max(0, (int) opts.data_replicas -
+ (int) bch2_bkey_nr_ptrs_fully_allocated(k));
+ /*
+ * Get a disk reservation before (in the nocow case) calling
+ * into the allocator:
+ */
+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
+ if (unlikely(ret))
+ goto err;
+
+ bch2_bkey_buf_reassemble(&old, c, k);
+ }
+
+ if (have_reservation) {
+ if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
+ goto err;
+
+ bch2_key_resize(&new.k->k, sectors);
+ } else if (!unwritten) {
+ struct bkey_i_reservation *reservation;
+
+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
+ reservation = bkey_reservation_init(new.k);
+ reservation->k.p = iter->pos;
+ bch2_key_resize(&reservation->k, sectors);
+ reservation->v.nr_replicas = opts.data_replicas;
+ } else {
+ struct bkey_i_extent *e;
+ struct bch_devs_list devs_have;
+ struct write_point *wp;
+ struct bch_extent_ptr *ptr;
+
+ devs_have.nr = 0;
+
+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
+
+ e = bkey_extent_init(new.k);
+ e->k.p = iter->pos;
+
+ ret = bch2_alloc_sectors_start_trans(trans,
+ opts.foreground_target,
+ false,
+ write_point,
+ &devs_have,
+ opts.data_replicas,
+ opts.data_replicas,
+ BCH_WATERMARK_normal, 0, &cl, &wp);
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ ret = -BCH_ERR_transaction_restart_nested;
+ if (ret)
+ goto err;
+
+ sectors = min(sectors, wp->sectors_free);
+ sectors_allocated = sectors;
+
+ bch2_key_resize(&e->k, sectors);
+
+ bch2_open_bucket_get(c, wp, &open_buckets);
+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
+ bch2_alloc_sectors_done(c, wp);
+
+ extent_for_each_ptr(extent_i_to_s(e), ptr)
+ ptr->unwritten = true;
+ }
+
+ have_reservation = true;
+
+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
+ 0, i_sectors_delta, true);
+err:
+ if (!ret && sectors_allocated)
+ bch2_increment_clock(c, sectors_allocated, WRITE);
+
+ bch2_open_buckets_put(c, &open_buckets);
+ bch2_disk_reservation_put(c, &disk_res);
+ bch2_bkey_buf_exit(&new, c);
+ bch2_bkey_buf_exit(&old, c);
+
+ if (closure_nr_remaining(&cl) != 1) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
+ */
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+ subvol_inum inum, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bpos end_pos = POS(inum.inum, end);
+ struct bkey_s_c k;
+ int ret = 0, ret2 = 0;
+ u32 snapshot;
+
+ while (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete;
+
+ if (ret)
+ ret2 = ret;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(iter, snapshot);
+
+ /*
+ * peek_upto() doesn't have ideal semantics for extents:
+ */
+ k = bch2_btree_iter_peek_upto(iter, end_pos);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k);
+ if (ret)
+ continue;
+
+ bkey_init(&delete.k);
+ delete.k.p = iter->pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end_pos, &delete);
+
+ ret = bch2_extent_update(trans, inum, iter, &delete,
+ &disk_res, 0, i_sectors_delta, false);
+ bch2_disk_reservation_put(c, &disk_res);
+ }
+
+ return ret ?: ret2;
+}
+
+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, start),
+ BTREE_ITER_INTENT);
+
+ ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta);
+
+ bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_exit(&trans);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+
+ return ret;
+}
diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
new file mode 100644
index 000000000000..46e9ce3251d6
--- /dev/null
+++ b/fs/bcachefs/io_misc.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_MISC_H
+#define _BCACHEFS_IO_MISC_H
+
+int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
+ unsigned, struct bch_io_opts, s64 *,
+ struct write_point_specifier);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+ subvol_inum, u64, s64 *);
+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
+
+#endif /* _BCACHEFS_IO_MISC_H */
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
new file mode 100644
index 000000000000..cd62bf730396
--- /dev/null
+++ b/fs/bcachefs/io_read.c
@@ -0,0 +1,1207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Some low level IO code, and hacks for various block layer limitations
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "data_update.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io_read.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "subvolume.h"
+#include "trace.h"
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ const struct bch_devs_mask *devs;
+ unsigned d, nr = 0, total = 0;
+ u64 now = local_clock(), last;
+ s64 congested;
+ struct bch_dev *ca;
+
+ if (!target)
+ return false;
+
+ rcu_read_lock();
+ devs = bch2_target_to_mask(c, target) ?:
+ &c->rw_devs[BCH_DATA_user];
+
+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
+ ca = rcu_dereference(c->devs[d]);
+ if (!ca)
+ continue;
+
+ congested = atomic_read(&ca->congested);
+ last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ total += max(congested, 0LL);
+ nr++;
+ }
+ rcu_read_unlock();
+
+ return bch2_rand_range(nr * CONGESTED_MAX) < total;
+}
+
+#else
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ return false;
+}
+
+#endif
+
+/* Cache promotion on read */
+
+struct promote_op {
+ struct rcu_head rcu;
+ u64 start_time;
+
+ struct rhash_head hash;
+ struct bpos pos;
+
+ struct data_update write;
+ struct bio_vec bi_inline_vecs[0]; /* must be last */
+};
+
+static const struct rhashtable_params bch_promote_params = {
+ .head_offset = offsetof(struct promote_op, hash),
+ .key_offset = offsetof(struct promote_op, pos),
+ .key_len = sizeof(struct bpos),
+};
+
+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
+ struct bpos pos,
+ struct bch_io_opts opts,
+ unsigned flags)
+{
+ if (!(flags & BCH_READ_MAY_PROMOTE))
+ return false;
+
+ if (!opts.promote_target)
+ return false;
+
+ if (bch2_bkey_has_target(c, k, opts.promote_target))
+ return false;
+
+ if (bkey_extent_is_unwritten(k))
+ return false;
+
+ if (bch2_target_congested(c, opts.promote_target)) {
+ /* XXX trace this */
+ return false;
+ }
+
+ if (rhashtable_lookup_fast(&c->promote_table, &pos,
+ bch_promote_params))
+ return false;
+
+ return true;
+}
+
+static void promote_free(struct bch_fs *c, struct promote_op *op)
+{
+ int ret;
+
+ bch2_data_update_exit(&op->write);
+
+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ kfree_rcu(op, rcu);
+}
+
+static void promote_done(struct bch_write_op *wop)
+{
+ struct promote_op *op =
+ container_of(wop, struct promote_op, write.op);
+ struct bch_fs *c = op->write.op.c;
+
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
+ op->start_time);
+ promote_free(c, op);
+}
+
+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+{
+ struct bio *bio = &op->write.op.wbio.bio;
+
+ trace_and_count(op->write.op.c, read_promote, &rbio->bio);
+
+ /* we now own pages: */
+ BUG_ON(!rbio->bounce);
+ BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+
+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
+ swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
+
+ bch2_data_update_read_done(&op->write, rbio->pick.crc);
+}
+
+static struct promote_op *__promote_alloc(struct btree_trans *trans,
+ enum btree_id btree_id,
+ struct bkey_s_c k,
+ struct bpos pos,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned sectors,
+ struct bch_read_bio