summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 12:54:27 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 12:54:27 -0800
commit885ce48739189fac6645ff42d736ee0de0b5917d (patch)
treed69300909ff5b4a0291a7311cd25cf90801a650a /fs/btrfs
parent274978f173276c5720a3cd8d0b6047d2c0d3a684 (diff)
parent964a54e5e1a0d70cd80bd5a0885a1938463625b1 (diff)
downloadlinux-885ce48739189fac6645ff42d736ee0de0b5917d.tar.gz
linux-885ce48739189fac6645ff42d736ee0de0b5917d.tar.bz2
linux-885ce48739189fac6645ff42d736ee0de0b5917d.zip
Merge tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The usual mix of performance improvements and new features. The core change is reworking how checksums are processed, with followup cleanups and simplifications. There are two minor changes in block layer and iomap code. Features: - block group allocation class heuristics: - pack files by size (up to 128k, up to 8M, more) to avoid fragmentation in block groups, assuming that file size and life time is correlated, in particular this may help during balance - with tracepoints and extensible in the future Performance: - send: cache directory utimes and only emit the command when necessary - speedup up to 10x - smaller final stream produced (no redundant utimes commands issued) - compatibility not affected - fiemap: skip backref checks for shared leaves - speedup 3x on sample filesystem with all leaves shared (e.g. on snapshots) - micro optimized b-tree key lookup, speedup in metadata operations (sample benchmark: fs_mark +10% of files/sec) Core changes: - change where checksumming is done in the io path: - checksum and read repair does verification at lower layer - cascaded cleanups and simplifications - raid56 refactoring and cleanups Fixes: - sysfs: make sure that a run-time change of a feature is correctly tracked by the feature files - scrub: better reporting of tree block errors Other: - locally enable -Wmaybe-uninitialized after fixing all warnings - misc cleanups, spelling fixes Other code: - block: export bio_split_rw - iomap: remove IOMAP_F_ZONE_APPEND" * tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (109 commits) btrfs: make kobj_type structures constant btrfs: remove the bdev argument to btrfs_rmap_block btrfs: don't rely on unchanging ->bi_bdev for zone append remaps btrfs: never return true for reads in btrfs_use_zone_append btrfs: pass a btrfs_bio to btrfs_use_append btrfs: set bbio->file_offset in alloc_new_bio btrfs: use file_offset to limit bios size in calc_bio_boundaries btrfs: do unsigned integer division in the extent buffer binary search loop btrfs: eliminate extra call when doing binary search on extent buffer btrfs: raid56: handle endio in scrub_rbio btrfs: raid56: handle endio in recover_rbio btrfs: raid56: handle endio in rmw_rbio btrfs: raid56: submit the read bios from scrub_assemble_read_bios btrfs: raid56: fold rmw_read_wait_recover into rmw_read_bios btrfs: raid56: fold recover_assemble_read_bios into recover_rbio btrfs: raid56: add a bio_list_put helper btrfs: raid56: wait for I/O completion in submit_read_bios btrfs: raid56: simplify code flow in rmw_rbio btrfs: raid56: simplify error handling and code flow in raid56_parity_write btrfs: replace btrfs_wait_tree_block_writeback by wait_on_extent_buffer_writeback ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile6
-rw-r--r--fs/btrfs/backref.c33
-rw-r--r--fs/btrfs/bio.c557
-rw-r--r--fs/btrfs/bio.h67
-rw-r--r--fs/btrfs/block-group.c273
-rw-r--r--fs/btrfs/block-group.h24
-rw-r--r--fs/btrfs/btrfs_inode.h22
-rw-r--r--fs/btrfs/compression.c276
-rw-r--r--fs/btrfs/compression.h3
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/defrag.c4
-rw-r--r--fs/btrfs/delayed-ref.c24
-rw-r--r--fs/btrfs/delayed-ref.h2
-rw-r--r--fs/btrfs/discard.c41
-rw-r--r--fs/btrfs/disk-io.c225
-rw-r--r--fs/btrfs/disk-io.h14
-rw-r--r--fs/btrfs/extent-io-tree.c10
-rw-r--r--fs/btrfs/extent-io-tree.h1
-rw-r--r--fs/btrfs/extent-tree.c181
-rw-r--r--fs/btrfs/extent-tree.h81
-rw-r--r--fs/btrfs/extent_io.c582
-rw-r--r--fs/btrfs/extent_io.h36
-rw-r--r--fs/btrfs/file-item.c72
-rw-r--r--fs/btrfs/file-item.h8
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/fs.c4
-rw-r--r--fs/btrfs/fs.h11
-rw-r--r--fs/btrfs/inode.c641
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/lru_cache.c166
-rw-r--r--fs/btrfs/lru_cache.h80
-rw-r--r--fs/btrfs/lzo.c2
-rw-r--r--fs/btrfs/messages.c30
-rw-r--r--fs/btrfs/messages.h34
-rw-r--r--fs/btrfs/ordered-data.c25
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/raid56.c334
-rw-r--r--fs/btrfs/raid56.h4
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c51
-rw-r--r--fs/btrfs/send.c684
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/sysfs.c41
-rw-r--r--fs/btrfs/sysfs.h3
-rw-r--r--fs/btrfs/tests/extent-map-tests.c2
-rw-r--r--fs/btrfs/transaction.c34
-rw-r--r--fs/btrfs/transaction.h31
-rw-r--r--fs/btrfs/tree-log.c87
-rw-r--r--fs/btrfs/tree-log.h9
-rw-r--r--fs/btrfs/volumes.c116
-rw-r--r--fs/btrfs/volumes.h18
-rw-r--r--fs/btrfs/zoned.c146
-rw-r--r--fs/btrfs/zoned.h20
56 files changed, 2344 insertions, 2864 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 555c962fdad6..90d53209755b 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -11,7 +11,8 @@ condflags := \
$(call cc-option, -Wunused-but-set-variable) \
$(call cc-option, -Wunused-const-variable) \
$(call cc-option, -Wpacked-not-aligned) \
- $(call cc-option, -Wstringop-truncation)
+ $(call cc-option, -Wstringop-truncation) \
+ $(call cc-option, -Wmaybe-uninitialized)
subdir-ccflags-y += $(condflags)
# The following turn off the warnings enabled by -Wextra
subdir-ccflags-y += -Wno-missing-field-initializers
@@ -31,7 +32,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
- subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o
+ subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
+ lru_cache.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 46851511b661..90e40d5ceccd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1252,8 +1252,12 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ct
struct btrfs_root *root,
u64 bytenr, int level, bool *is_shared)
{
+ const struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_backref_shared_cache_entry *entry;
+ if (!current->journal_info)
+ lockdep_assert_held(&fs_info->commit_root_sem);
+
if (!ctx->use_path_cache)
return false;
@@ -1288,7 +1292,7 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ct
* could be a snapshot sharing this extent buffer.
*/
if (entry->is_shared &&
- entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
+ entry->gen != btrfs_get_last_root_drop_gen(fs_info))
return false;
*is_shared = entry->is_shared;
@@ -1318,9 +1322,13 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx
struct btrfs_root *root,
u64 bytenr, int level, bool is_shared)
{
+ const struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
+ if (!current->journal_info)
+ lockdep_assert_held(&fs_info->commit_root_sem);
+
if (!ctx->use_path_cache)
return;
@@ -1336,7 +1344,7 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx
ASSERT(level >= 0);
if (is_shared)
- gen = btrfs_get_last_root_drop_gen(root->fs_info);
+ gen = btrfs_get_last_root_drop_gen(fs_info);
else
gen = btrfs_root_last_snapshot(&root->root_item);
@@ -1864,6 +1872,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
.have_delayed_delete_refs = false,
};
int level;
+ bool leaf_cached;
+ bool leaf_is_shared;
for (int i = 0; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) {
if (ctx->prev_extents_cache[i].bytenr == bytenr)
@@ -1885,6 +1895,23 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
walk_ctx.time_seq = elem.seq;
}
+ ctx->use_path_cache = true;
+
+ /*
+ * We may have previously determined that the current leaf is shared.
+ * If it is, then we have a data extent that is shared due to a shared
+ * subtree (caused by snapshotting) and we don't need to check for data
+ * backrefs. If the leaf is not shared, then we must do backref walking
+ * to determine if the data extent is shared through reflinks.
+ */
+ leaf_cached = lookup_backref_shared_cache(ctx, root,
+ ctx->curr_leaf_bytenr, 0,
+ &leaf_is_shared);
+ if (leaf_cached && leaf_is_shared) {
+ ret = 1;
+ goto out_trans;
+ }
+
walk_ctx.ignore_extent_item_pos = true;
walk_ctx.trans = trans;
walk_ctx.fs_info = fs_info;
@@ -1893,7 +1920,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
- ctx->use_path_cache = true;
while (1) {
bool is_shared;
bool cached;
@@ -1964,6 +1990,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
ctx->prev_extents_cache_slot = slot;
}
+out_trans:
if (trans) {
btrfs_put_tree_mod_seq(fs_info, &elem);
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 8affc88b0e0a..d8b90f95b157 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -14,19 +14,31 @@
#include "dev-replace.h"
#include "rcu-string.h"
#include "zoned.h"
+#include "file-item.h"
static struct bio_set btrfs_bioset;
+static struct bio_set btrfs_clone_bioset;
+static struct bio_set btrfs_repair_bioset;
+static mempool_t btrfs_failed_bio_pool;
+
+struct btrfs_failed_bio {
+ struct btrfs_bio *bbio;
+ int num_copies;
+ atomic_t repair_count;
+};
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-static inline void btrfs_bio_init(struct btrfs_bio *bbio,
- btrfs_bio_end_io_t end_io, void *private)
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,
+ btrfs_bio_end_io_t end_io, void *private)
{
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
+ bbio->inode = inode;
bbio->end_io = end_io;
bbio->private = private;
+ atomic_set(&bbio->pending_ios, 1);
}
/*
@@ -37,32 +49,235 @@ static inline void btrfs_bio_init(struct btrfs_bio *bbio,
* a mempool.
*/
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
+ struct btrfs_inode *inode,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
- btrfs_bio_init(btrfs_bio(bio), end_io, private);
+ btrfs_bio_init(btrfs_bio(bio), inode, end_io, private);
return bio;
}
-struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
- btrfs_bio_end_io_t end_io, void *private)
+static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
+ struct bio *orig, u64 map_length,
+ bool use_append)
{
+ struct btrfs_bio *orig_bbio = btrfs_bio(orig);
struct bio *bio;
- struct btrfs_bio *bbio;
- ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
+ if (use_append) {
+ unsigned int nr_segs;
+
+ bio = bio_split_rw(orig, &fs_info->limits, &nr_segs,
+ &btrfs_clone_bioset, map_length);
+ } else {
+ bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
+ &btrfs_clone_bioset);
+ }
+ btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
- bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
- bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, end_io, private);
+ btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
+ if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED))
+ orig_bbio->file_offset += map_length;
- bio_trim(bio, offset >> 9, size >> 9);
- bbio->iter = bio->bi_iter;
+ atomic_inc(&orig_bbio->pending_ios);
return bio;
}
+static void btrfs_orig_write_end_io(struct bio *bio);
+
+static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
+ struct btrfs_bio *orig_bbio)
+{
+ /*
+ * For writes we tolerate nr_mirrors - 1 write failures, so we can't
+ * just blindly propagate a write failure here. Instead increment the
+ * error count in the original I/O context so that it is guaranteed to
+ * be larger than the error tolerance.
+ */
+ if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
+ struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
+ struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
+
+ atomic_add(orig_bioc->max_errors, &orig_bioc->error);
+ } else {
+ orig_bbio->bio.bi_status = bbio->bio.bi_status;
+ }
+}
+
+static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
+{
+ if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
+ struct btrfs_bio *orig_bbio = bbio->private;
+
+ if (bbio->bio.bi_status)
+ btrfs_bbio_propagate_error(bbio, orig_bbio);
+ bio_put(&bbio->bio);
+ bbio = orig_bbio;
+ }
+
+ if (atomic_dec_and_test(&bbio->pending_ios))
+ bbio->end_io(bbio);
+}
+
+static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+{
+ if (cur_mirror == fbio->num_copies)
+ return cur_mirror + 1 - fbio->num_copies;
+ return cur_mirror + 1;
+}
+
+static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+{
+ if (cur_mirror == 1)
+ return fbio->num_copies;
+ return cur_mirror - 1;
+}
+
+static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
+{
+ if (atomic_dec_and_test(&fbio->repair_count)) {
+ btrfs_orig_bbio_end_io(fbio->bbio);
+ mempool_free(fbio, &btrfs_failed_bio_pool);
+ }
+}
+
+static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
+ struct btrfs_device *dev)
+{
+ struct btrfs_failed_bio *fbio = repair_bbio->private;
+ struct btrfs_inode *inode = repair_bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
+ int mirror = repair_bbio->mirror_num;
+
+ if (repair_bbio->bio.bi_status ||
+ !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
+ bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
+ repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
+
+ mirror = next_repair_mirror(fbio, mirror);
+ if (mirror == fbio->bbio->mirror_num) {
+ btrfs_debug(fs_info, "no mirror left");
+ fbio->bbio->bio.bi_status = BLK_STS_IOERR;
+ goto done;
+ }
+
+ btrfs_submit_bio(&repair_bbio->bio, mirror);
+ return;
+ }
+
+ do {
+ mirror = prev_repair_mirror(fbio, mirror);
+ btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
+ repair_bbio->file_offset, fs_info->sectorsize,
+ repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
+ bv->bv_page, bv->bv_offset, mirror);
+ } while (mirror != fbio->bbio->mirror_num);
+
+done:
+ btrfs_repair_done(fbio);
+ bio_put(&repair_bbio->bio);
+}
+
+/*
+ * Try to kick off a repair read to the next available mirror for a bad sector.
+ *
+ * This primarily tries to recover good data to serve the actual read request,
+ * but also tries to write the good data back to the bad mirror(s) when a
+ * read succeeded to restore the redundancy.
+ */
+static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
+ u32 bio_offset,
+ struct bio_vec *bv,
+ struct btrfs_failed_bio *fbio)
+{
+ struct btrfs_inode *inode = failed_bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 sectorsize = fs_info->sectorsize;
+ const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
+ struct btrfs_bio *repair_bbio;
+ struct bio *repair_bio;
+ int num_copies;
+ int mirror;
+
+ btrfs_debug(fs_info, "repair read error: read error at %llu",
+ failed_bbio->file_offset + bio_offset);
+
+ num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
+ if (num_copies == 1) {
+ btrfs_debug(fs_info, "no copy to repair from");
+ failed_bbio->bio.bi_status = BLK_STS_IOERR;
+ return fbio;
+ }
+
+ if (!fbio) {
+ fbio = mempool_alloc(&btrfs_failed_bio_pool, GFP_NOFS);
+ fbio->bbio = failed_bbio;
+ fbio->num_copies = num_copies;
+ atomic_set(&fbio->repair_count, 1);
+ }
+
+ atomic_inc(&fbio->repair_count);
+
+ repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
+ &btrfs_repair_bioset);
+ repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
+ bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+
+ repair_bbio = btrfs_bio(repair_bio);
+ btrfs_bio_init(repair_bbio, failed_bbio->inode, NULL, fbio);
+ repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
+
+ mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
+ btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
+ btrfs_submit_bio(repair_bio, mirror);
+ return fbio;
+}
+
+static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *dev)
+{
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u32 sectorsize = fs_info->sectorsize;
+ struct bvec_iter *iter = &bbio->saved_iter;
+ blk_status_t status = bbio->bio.bi_status;
+ struct btrfs_failed_bio *fbio = NULL;
+ u32 offset = 0;
+
+ /*
+ * Hand off repair bios to the repair code as there is no upper level
+ * submitter for them.
+ */
+ if (bbio->bio.bi_pool == &btrfs_repair_bioset) {
+ btrfs_end_repair_bio(bbio, dev);
+ return;
+ }
+
+ /* Clear the I/O error. A failed repair will reset it. */
+ bbio->bio.bi_status = BLK_STS_OK;
+
+ while (iter->bi_size) {
+ struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
+
+ bv.bv_len = min(bv.bv_len, sectorsize);
+ if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv))
+ fbio = repair_one_sector(bbio, offset, &bv, fbio);
+
+ bio_advance_iter_single(&bbio->bio, iter, sectorsize);
+ offset += sectorsize;
+ }
+
+ if (bbio->csum != bbio->csum_inline)
+ kfree(bbio->csum);
+
+ if (fbio)
+ btrfs_repair_done(fbio);
+ else
+ btrfs_orig_bbio_end_io(bbio);
+}
+
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
@@ -90,24 +305,31 @@ static void btrfs_end_bio_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
- bbio->end_io(bbio);
+ /* Metadata reads are checked and repaired by the submitter. */
+ if (bbio->bio.bi_opf & REQ_META)
+ bbio->end_io(bbio);
+ else
+ btrfs_check_read_bio(bbio, bbio->bio.bi_private);
}
static void btrfs_simple_end_io(struct bio *bio)
{
- struct btrfs_fs_info *fs_info = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_device *dev = bio->bi_private;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
- btrfs_log_dev_io_error(bio, bbio->device);
+ btrfs_log_dev_io_error(bio, dev);
if (bio_op(bio) == REQ_OP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- bbio->end_io(bbio);
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ btrfs_record_physical_zoned(bbio);
+ btrfs_orig_bbio_end_io(bbio);
}
}
@@ -118,7 +340,10 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
- bbio->end_io(bbio);
+ if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
+ btrfs_check_read_bio(bbio, NULL);
+ else
+ btrfs_orig_bbio_end_io(bbio);
btrfs_put_bioc(bioc);
}
@@ -145,7 +370,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
else
bio->bi_status = BLK_STS_OK;
- bbio->end_io(bbio);
+ btrfs_orig_bbio_end_io(bbio);
btrfs_put_bioc(bioc);
}
@@ -181,16 +406,10 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 zone_start = round_down(physical, dev->fs_info->zone_size);
- if (btrfs_dev_is_sequential(dev, physical)) {
- u64 zone_start = round_down(physical,
- dev->fs_info->zone_size);
-
- bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
- } else {
- bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
- bio->bi_opf |= REQ_OP_WRITE;
- }
+ ASSERT(btrfs_dev_is_sequential(dev, physical));
+ bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
}
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -224,41 +443,21 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
-void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
+static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
+ struct btrfs_io_stripe *smap, int mirror_num)
{
- u64 logical = bio->bi_iter.bi_sector << 9;
- u64 length = bio->bi_iter.bi_size;
- u64 map_length = length;
- struct btrfs_io_context *bioc = NULL;
- struct btrfs_io_stripe smap;
- int ret;
-
- btrfs_bio_counter_inc_blocked(fs_info);
- ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- &bioc, &smap, &mirror_num, 1);
- if (ret) {
- btrfs_bio_counter_dec(fs_info);
- btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
- return;
- }
-
- if (map_length < length) {
- btrfs_crit(fs_info,
- "mapping failed logical %llu bio len %llu len %llu",
- logical, length, map_length);
- BUG();
- }
+ /* Do not leak our private flag into the block layer. */
+ bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
if (!bioc) {
- /* Single mirror read/write fast path */
+ /* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
- btrfs_bio(bio)->device = smap.dev;
- bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
- bio->bi_private = fs_info;
+ bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
+ bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
- btrfs_submit_dev_bio(smap.dev, bio);
<