From 0e47b25cafb29338722f68e8c5a260aaf18ce92c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Dec 2022 15:15:52 -0500 Subject: btrfs: fix uninitialized variable warning in btrfs_cleanup_ordered_extents We can conditionally pass in a locked page, and then we'll use that page range to skip marking errors as that will happen in another layer. However this causes the compiler to complain because it doesn't understand we only use these values when we have the page. Make the compiler stop complaining by setting these values to 0. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 98a800b8bd43..77c2acc06891 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -228,7 +228,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, { unsigned long index = offset >> PAGE_SHIFT; unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; - u64 page_start, page_end; + u64 page_start = 0, page_end = 0; struct page *page; if (locked_page) { -- cgit v1.2.3 From d31de3785047a24959eda835b0bafb1f8629f8a9 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Mon, 9 Jan 2023 21:08:31 +0100 Subject: btrfs: go to matching label when cleaning em in btrfs_submit_direct When btrfs_get_chunk_map fails to allocate a new em the cleanup does not need to be done so the goto target is out_err, which is consistent with current coding style. Signed-off-by: Peng Hao Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 77c2acc06891..7fa1db6a474a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8080,7 +8080,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, if (IS_ERR(em)) { status = errno_to_blk_status(PTR_ERR(em)); em = NULL; - goto out_err_em; + goto out_err; } ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio), logical, &geom); -- cgit v1.2.3 From ce394a7f39032bc2a85b070af608e3ae8b2cefda Mon Sep 17 00:00:00 2001 From: Yushan Zhou Date: Tue, 3 Jan 2023 13:11:37 +0800 Subject: btrfs: use PAGE_{ALIGN, ALIGNED, ALIGN_DOWN} macro The header file linux/mm.h provides PAGE_ALIGN, PAGE_ALIGNED, PAGE_ALIGN_DOWN macros. Use these macros to make code more concise. Signed-off-by: Yushan Zhou Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7fa1db6a474a..49a2e118f561 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10995,9 +10995,8 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, return 0; max_pages = sis->max - bsi->nr_pages; - first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; - next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, - PAGE_SIZE) >> PAGE_SHIFT; + first_ppage = PAGE_ALIGN(bsi->block_start) >> PAGE_SHIFT; + next_ppage = PAGE_ALIGN_DOWN(bsi->block_start + bsi->block_len) >> PAGE_SHIFT; if (first_ppage >= next_ppage) return 0; -- cgit v1.2.3 From 36d4556745fe60e0e3c8d9933c2610b1c641b2f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 08:12:43 +0100 Subject: btrfs: remove the wait argument to btrfs_start_ordered_extent Given that wait is always set to 1, so remove the argument. Last use of wait with 0 was in 0c304304feab ("Btrfs: remove csum_bytes_left"). Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 49a2e118f561..3c49742f0d45 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2969,7 +2969,7 @@ again: unlock_extent(&inode->io_tree, page_start, page_end, &cached_state); unlock_page(page); - btrfs_start_ordered_extent(ordered, 1); + btrfs_start_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); goto again; } @@ -4987,7 +4987,7 @@ again: unlock_extent(io_tree, block_start, block_end, &cached_state); unlock_page(page); put_page(page); - btrfs_start_ordered_extent(ordered, 1); + btrfs_start_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); goto again; } @@ -7392,7 +7392,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, */ if (writing || test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) - btrfs_start_ordered_extent(ordered, 1); + btrfs_start_ordered_extent(ordered); else ret = nowait ? -EAGAIN : -ENOTBLK; btrfs_put_ordered_extent(ordered); @@ -8552,7 +8552,7 @@ again: unlock_extent(io_tree, page_start, page_end, &cached_state); unlock_page(page); up_read(&BTRFS_I(inode)->i_mmap_lock); - btrfs_start_ordered_extent(ordered, 1); + btrfs_start_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); goto again; } -- cgit v1.2.3 From d0e5cb2be7703172d98699275d722c4081241144 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:00 +0100 Subject: btrfs: add a btrfs_inode pointer to struct btrfs_bio All btrfs_bio I/Os are associated with an inode. Add a pointer to that inode, which will allow to simplify a lot of calling conventions, and which will be needed in the I/O completion path in the future. This grow the btrfs_bio structure by a pointer, but that grows will be offset by the removal of the device pointer soon. Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c49742f0d45..0a85e42f114c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8097,7 +8097,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, * the allocation is backed by btrfs_bioset. */ bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len, - btrfs_end_dio_bio, dip); + BTRFS_I(inode), btrfs_end_dio_bio, + dip); btrfs_bio(bio)->file_offset = file_offset; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { @@ -10409,6 +10410,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, if (!bio) { bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, + inode, btrfs_encoded_read_endio, &priv); bio->bi_iter.bi_sector = -- cgit v1.2.3 From 5fa356531e33e7c7783ccd0d7938a070b5df8c22 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:01 +0100 Subject: btrfs: remove the direct I/O read checksum lookup optimization To prepare for pending changes drop the optimization to only look up csums once per bio that is submitted from the iomap layer. In the short run this does cause additional lookups for fragmented direct reads, but later in the series, the bio based lookup will be used on the entire bio submitted from iomap, restoring the old behavior in common code. Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a85e42f114c..863a5527853c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -100,9 +100,6 @@ struct btrfs_dio_private { */ refcount_t refs; - /* Array of checksums */ - u8 *csums; - /* This must be last */ struct bio bio; }; @@ -7907,7 +7904,6 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) dip->file_offset + dip->bytes - 1, NULL); } - kfree(dip->csums); bio_endio(&dip->bio); } @@ -7990,7 +7986,6 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, u64 file_offset, int async_submit) { struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_dio_private *dip = btrfs_bio(bio)->private; blk_status_t ret; /* Save the original iter for read repair */ @@ -8017,8 +8012,11 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, return; } } else { - btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums, - file_offset - dip->file_offset); + ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL); + if (ret) { + btrfs_bio_end_io(btrfs_bio(bio), ret); + return; + } } map: btrfs_submit_bio(fs_info, bio, 0); @@ -8030,7 +8028,6 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, struct btrfs_dio_private *dip = container_of(dio_bio, struct btrfs_dio_private, bio); struct inode *inode = iter->inode; - const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const bool raid56 = (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK); @@ -8051,25 +8048,6 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, dip->file_offset = file_offset; dip->bytes = dio_bio->bi_iter.bi_size; refcount_set(&dip->refs, 1); - dip->csums = NULL; - - if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { - unsigned int nr_sectors = - (dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits); - - /* - * Load the csums up front to reduce csum tree searches and - * contention when submitting bios. - */ - status = BLK_STS_RESOURCE; - dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS); - if (!dip->csums) - goto out_err; - - status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums); - if (status != BLK_STS_OK) - goto out_err; - } start_sector = dio_bio->bi_iter.bi_sector; submit_len = dio_bio->bi_iter.bi_size; -- cgit v1.2.3 From 4ae2edf12d49fdbaea2dfda0bb2ec06501bd3493 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:02 +0100 Subject: btrfs: simplify parameters of btrfs_lookup_bio_sums The csums argument is always NULL now, so remove it and always allocate the csums array in the btrfs_bio. Also pass the btrfs_bio instead of inode + bio to document that this function requires a btrfs_bio and not just any bio. Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 863a5527853c..7c8f5349ed7a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2780,7 +2780,7 @@ void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, * Lookup bio sums does extra checks around whether we need to csum or * not, which is why we ignore skip_sum here. */ - ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL); + ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -8012,7 +8012,7 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, return; } } else { - ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL); + ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -10279,7 +10279,7 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, blk_status_t ret; if (!priv->skip_csum) { - ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL); + ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); if (ret) return ret; } -- cgit v1.2.3 From 7276aa7d38255b40e578267c3634ebc05f5d5236 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:04 +0100 Subject: btrfs: save the bio iter for checksum validation in common code All callers of btrfs_submit_bio that want to validate checksums currently have to store a copy of the iter in the btrfs_bio. Move the assignment into common code. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c8f5349ed7a..c368a45bc079 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2773,9 +2773,6 @@ void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, return; } - /* Save the original iter for read repair */ - btrfs_bio(bio)->iter = bio->bi_iter; - /* * Lookup bio sums does extra checks around whether we need to csum or * not, which is why we ignore skip_sum here. @@ -7988,10 +7985,6 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, struct btrfs_fs_info *fs_info = inode->root->fs_info; blk_status_t ret; - /* Save the original iter for read repair */ - if (btrfs_op(bio) == BTRFS_MAP_READ) - btrfs_bio(bio)->iter = bio->bi_iter; - if (inode->flags & BTRFS_INODE_NODATASUM) goto map; -- cgit v1.2.3 From 1c2b3ee3b0ec4bc971e23fe18d4c92333a6ad18a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:05 +0100 Subject: btrfs: pre-load data checksum for reads in btrfs_submit_bio Instead of calling btrfs_lookup_bio_sums in every caller of btrfs_submit_bio that reads data, do the call once in btrfs_submit_bio. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c368a45bc079..598897b0d661 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2762,7 +2762,6 @@ void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, enum btrfs_compression_type compress_type) { struct btrfs_fs_info *fs_info = inode->root->fs_info; - blk_status_t ret; if (compress_type != BTRFS_COMPRESS_NONE) { /* @@ -2773,16 +2772,6 @@ void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, return; } - /* - * Lookup bio sums does extra checks around whether we need to csum or - * not, which is why we ignore skip_sum here. - */ - ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); - if (ret) { - btrfs_bio_end_io(btrfs_bio(bio), ret); - return; - } - btrfs_submit_bio(fs_info, bio, mirror_num); } @@ -8004,12 +7993,6 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, btrfs_bio_end_io(btrfs_bio(bio), ret); return; } - } else { - ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); - if (ret) { - btrfs_bio_end_io(btrfs_bio(bio), ret); - return; - } } map: btrfs_submit_bio(fs_info, bio, 0); @@ -10269,13 +10252,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, { struct btrfs_encoded_read_private *priv = btrfs_bio(bio)->private; struct btrfs_fs_info *fs_info = inode->root->fs_info; - blk_status_t ret; - - if (!priv->skip_csum) { - ret = btrfs_lookup_bio_sums(btrfs_bio(bio)); - if (ret) - return ret; - } atomic_inc(&priv->pending); btrfs_submit_bio(fs_info, bio, mirror_num); -- cgit v1.2.3 From e52190441bd6b268aed6ecc0efe3614c4222014e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:06 +0100 Subject: btrfs: add a btrfs_data_csum_ok helper Add a new checksumming helper that wraps btrfs_check_data_csum and does all the checks to if we're dealing with some form of nodatacsum I/O. This helper will be used by the new storage layer checksum validation and repair code. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 598897b0d661..ba90f90e3d87 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3495,6 +3495,44 @@ zeroit: return -EIO; } +/* + * Verify the checksum of a single data sector. + * + * @bbio: btrfs_io_bio which contains the csum + * @dev: device the sector is on + * @bio_offset: offset to the beginning of the bio (in bytes) + * @bv: bio_vec to check + * + * Check if the checksum on a data block is valid. When a checksum mismatch is + * detected, report the error and fill the corrupted range with zero. + * + * Return %true if the sector is ok or had no checksum to start with, else %false. + */ +bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, + u32 bio_offset, struct bio_vec *bv) +{ + struct btrfs_inode *inode = bbio->inode; + u64 file_offset = bbio->file_offset + bio_offset; + u64 end = file_offset + bv->bv_len - 1; + + if (!bbio->csum) + return true; + + if (btrfs_is_data_reloc_root(inode->root) && + test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM, + 1, NULL)) { + /* Skip the range without csum for data reloc inode */ + clear_extent_bits(&inode->io_tree, file_offset, end, + EXTENT_NODATASUM); + return true; + } + + if (btrfs_check_data_csum(inode, bbio, bio_offset, bv->bv_page, + bv->bv_offset) < 0) + return false; + return true; +} + /* * When reads are done, we need to check csums to verify the data is correct. * if there's a match, we allow the bio to finish. If not, the code in -- cgit v1.2.3 From 7609afac677546b225d8327d726cc558d3666496 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:07 +0100 Subject: btrfs: handle checksum validation and repair at the storage layer Currently btrfs handles checksum validation and repair in the end I/O handler for the btrfs_bio. This leads to a lot of duplicate code plus issues with varying semantics or bugs, e.g. - the until recently broken repair for compressed extents - the fact that encoded reads validate the checksums but do not kick of read repair - the inconsistent checking of the BTRFS_FS_STATE_NO_CSUMS flag This commit revamps the checksum validation and repair code to instead work below the btrfs_submit_bio interfaces. In case of a checksum failure (or a plain old I/O error), the repair is now kicked off before the upper level ->end_io handler is invoked. Progress of an in-progress repair is tracked by a small structure that is allocated using a mempool for each original bio with failed sectors, which holds a reference to the original bio. This new structure is allocated using a mempool to guarantee forward progress even under memory pressure. The mempool will be replenished when the repair completes, just as the mempools backing the bios. There is one significant behavior change here: If repair fails or is impossible to start with, the whole bio will be failed to the upper layer. This is the behavior that all I/O submitters except for buffered I/O already emulated in their end_io handler. For buffered I/O this now means that a large readahead request can fail due to a single bad sector, but as readahead errors are ignored the following readpage if the sector is actually accessed will still be able to read. This also matches the I/O failure handling in other file systems. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 81 +++----------------------------------------------------- 1 file changed, 4 insertions(+), 77 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ba90f90e3d87..237513508fcc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7941,39 +7941,6 @@ void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int btrfs_submit_bio(inode->root->fs_info, bio, mirror_num); } -static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, - struct btrfs_bio *bbio, - const bool uptodate) -{ - struct inode *inode = &dip->inode->vfs_inode; - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; - const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); - blk_status_t err = BLK_STS_OK; - struct bvec_iter iter; - struct bio_vec bv; - u32 offset; - - btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) { - u64 start = bbio->file_offset + offset; - - if (uptodate && - (!csum || !btrfs_check_data_csum(BTRFS_I(inode), bbio, offset, - bv.bv_page, bv.bv_offset))) { - btrfs_clean_io_failure(BTRFS_I(inode), start, - bv.bv_page, bv.bv_offset); - } else { - int ret; - - ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset, - bv.bv_page, bv.bv_offset, false); - if (ret) - err = errno_to_blk_status(ret); - } - } - - return err; -} - blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode, struct bio *bio, u64 dio_file_offset) @@ -7987,18 +7954,14 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio) struct bio *bio = &bbio->bio; blk_status_t err = bio->bi_status; - if (err) + if (err) { btrfs_warn(dip->inode->root->fs_info, "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); - - if (bio_op(bio) == REQ_OP_READ) - err = btrfs_check_read_dio_bio(dip, bbio, !err); - - if (err) dip->bio.bi_status = err; + } btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio); @@ -10282,7 +10245,6 @@ struct btrfs_encoded_read_private { wait_queue_head_t wait; atomic_t pending; blk_status_t status; - bool skip_csum; }; static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, @@ -10296,44 +10258,11 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, return BLK_STS_OK; } -static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio) -{ - const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK); - struct btrfs_encoded_read_private *priv = bbio->private; - struct btrfs_inode *inode = priv->inode; - struct btrfs_fs_info *fs_info = inode->root->fs_info; - u32 sectorsize = fs_info->sectorsize; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - u32 bio_offset = 0; - - if (priv->skip_csum || !uptodate) - return bbio->bio.bi_status; - - bio_for_each_segment_all(bvec, &bbio->bio, iter_all) { - unsigned int i, nr_sectors, pgoff; - - nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len); - pgoff = bvec->bv_offset; - for (i = 0; i < nr_sectors; i++) { - ASSERT(pgoff < PAGE_SIZE); - if (btrfs_check_data_csum(inode, bbio, bio_offset, - bvec->bv_page, pgoff)) - return BLK_STS_IOERR; - bio_offset += sectorsize; - pgoff += sectorsize; - } - } - return BLK_STS_OK; -} - static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) { struct btrfs_encoded_read_private *priv = bbio->private; - blk_status_t status; - status = btrfs_encoded_read_verify_csum(bbio); - if (status) { + if (bbio->bio.bi_status) { /* * The memory barrier implied by the atomic_dec_return() here * pairs with the memory barrier implied by the @@ -10342,11 +10271,10 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) * write is observed before the load of status in * btrfs_encoded_read_regular_fill_pages(). */ - WRITE_ONCE(priv->status, status); + WRITE_ONCE(priv->status, bbio->bio.bi_status); } if (!atomic_dec_return(&priv->pending)) wake_up(&priv->wait); - btrfs_bio_free_csum(bbio); bio_put(&bbio->bio); } @@ -10359,7 +10287,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, .inode = inode, .file_offset = file_offset, .pending = ATOMIC_INIT(1), - .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM), }; unsigned long i = 0; u64 cur = 0; -- cgit v1.2.3 From 3d49d0d31237d11268959c7873c56aab166be07b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:10 +0100 Subject: btrfs: remove now unused checksumming helpers Remove the unused btrfs_verify_data_csum helper, and fold btrfs_check_data_csum into its only caller. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 124 +++++++------------------------------------------------ 1 file changed, 16 insertions(+), 108 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 237513508fcc..9987d16626f7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3456,45 +3456,6 @@ static u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums, u64 of return csums + offset_in_sectors * fs_info->csum_size; } -/* - * check_data_csum - verify checksum of one sector of uncompressed data - * @inode: inode - * @bbio: btrfs_bio which contains the csum - * @bio_offset: offset to the beginning of the bio (in bytes) - * @page: page where is the data to be verified - * @pgoff: offset inside the page - * - * The length of such check is always one sector size. - * - * When csum mismatch is detected, we will also report the error and fill the - * corrupted range with zero. (Thus it needs the extra parameters) - */ -int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio, - u32 bio_offset, struct page *page, u32 pgoff) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - u32 len = fs_info->sectorsize; - u8 *csum_expected; - u8 csum[BTRFS_CSUM_SIZE]; - - ASSERT(pgoff + len <= PAGE_SIZE); - - csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset); - - if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected)) - goto zeroit; - return 0; - -zeroit: - btrfs_print_data_csum_error(inode, bbio->file_offset + bio_offset, - csum, csum_expected, bbio->mirror_num); - if (bbio->device) - btrfs_dev_stat_inc_and_print(bbio->device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - memzero_page(page, pgoff, len); - return -EIO; -} - /* * Verify the checksum of a single data sector. * @@ -3512,8 +3473,13 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, u32 bio_offset, struct bio_vec *bv) { struct btrfs_inode *inode = bbio->inode; + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 file_offset = bbio->file_offset + bio_offset; u64 end = file_offset + bv->bv_len - 1; + u8 *csum_expected; + u8 csum[BTRFS_CSUM_SIZE]; + + ASSERT(bv->bv_len == fs_info->sectorsize); if (!bbio->csum) return true; @@ -3527,77 +3493,19 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, return true; } - if (btrfs_check_data_csum(inode, bbio, bio_offset, bv->bv_page, - bv->bv_offset) < 0) - return false; + csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset); + if (btrfs_check_sector_csum(fs_info, bv->bv_page, bv->bv_offset, csum, + csum_expected)) + goto zeroit; return true; -} - -/* - * When reads are done, we need to check csums to verify the data is correct. - * if there's a match, we allow the bio to finish. If not, the code in - * extent_io.c will try to find good copies for us. - * - * @bio_offset: offset to the beginning of the bio (in bytes) - * @start: file offset of the range start - * @end: file offset of the range end (inclusive) - * - * Return a bitmap where bit set means a csum mismatch, and bit not set means - * csum match. - */ -unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, - u32 bio_offset, struct page *page, - u64 start, u64 end) -{ - struct btrfs_inode *inode = BTRFS_I(page->mapping->host); - struct btrfs_root *root = inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct extent_io_tree *io_tree = &inode->io_tree; - const u32 sectorsize = root->fs_info->sectorsize; - u32 pg_off; - unsigned int result = 0; - - /* - * This only happens for NODATASUM or compressed read. - * Normally this should be covered by above check for compressed read - * or the next check for NODATASUM. Just do a quicker exit here. - */ - if (bbio->csum == NULL) - return 0; - - if (inode->flags & BTRFS_INODE_NODATASUM) - return 0; - - if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))) - return 0; - - ASSERT(page_offset(page) <= start && - end <= page_offset(page) + PAGE_SIZE - 1); - for (pg_off = offset_in_page(start); - pg_off < offset_in_page(end); - pg_off += sectorsize, bio_offset += sectorsize) { - u64 file_offset = pg_off + page_offset(page); - int ret; - - if (btrfs_is_data_reloc_root(root) && - test_range_bit(io_tree, file_offset, - file_offset + sectorsize - 1, - EXTENT_NODATASUM, 1, NULL)) { - /* Skip the range without csum for data reloc inode */ - clear_extent_bits(io_tree, file_offset, - file_offset + sectorsize - 1, - EXTENT_NODATASUM); - continue; - } - ret = btrfs_check_data_csum(inode, bbio, bio_offset, page, pg_off); - if (ret < 0) { - const int nr_bit = (pg_off - offset_in_page(start)) >> - root->fs_info->sectorsize_bits; - result |= (1U << nr_bit); - } - } - return result; +zeroit: + btrfs_print_data_csum_error(inode, file_offset, csum, csum_expected, + bbio->mirror_num); + if (dev) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); + memzero_bvec(bv); + return false; } /* -- cgit v1.2.3 From 0571b6357c5e414cd5db8e03150074a5ca1c5c12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:12 +0100 Subject: btrfs: remove the io_failure_record infrastructure struct io_failure_record and the io_failure_tree tree are unused now, so remove them. This in turn makes struct btrfs_inode smaller by 16 bytes. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9987d16626f7..8e1d61b731ed 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3249,8 +3249,6 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ordered_extent->disk_num_bytes); } - btrfs_free_io_failure_record(inode, start, end); - if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { truncated = true; logical_len = ordered_extent->truncated_len; @@ -5395,8 +5393,6 @@ void btrfs_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; - btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1); - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) goto no_delete; @@ -7839,16 +7835,6 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) bio_endio(&dip->bio); } -void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) -{ - struct btrfs_dio_private *dip = btrfs_bio(bio)->private; - - BUG_ON(bio_op(bio) == REQ_OP_WRITE); - - refcount_inc(&dip->refs); - btrfs_submit_bio(inode->root->fs_info, bio, mirror_num); -} - blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode, struct bio *bio, u64 dio_file_offset) @@ -8714,7 +8700,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_log_commit = 0; spin_lock_init(&ei->lock); - spin_lock_init(&ei->io_failure_lock); ei->outstanding_extents = 0; if (sb->s_magic != BTRFS_TEST_MAGIC) btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv, @@ -8734,7 +8719,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->io_tree.inode = ei; extent_io_tree_init(fs_info, &ei->file_extent_tree, IO_TREE_INODE_FILE_EXTENT); - ei->io_failure_tree = RB_ROOT; atomic_set(&ei->sync_writers, 0); mutex_init(&ei->log_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); -- cgit v1.2.3 From deb6216fa0b6b66304fc81e19b509af1b8203f98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:15 +0100 Subject: btrfs: open code the submit_bio_start helpers The submit helpers are now trivial and can be called directly. Note that btree_csum_one_bio has to be moved up in the file a bit to avoid a forward declaration. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e1d61b731ed..cdb0f8cb0d4f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2532,19 +2532,6 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, } } -/* - * in order to insert checksums into the metadata in large chunks, - * we wait until bio submission time. All the pages in the bio are - * checksummed and sums are attached onto the ordered extent record. - * - * At IO completion time the cums attached on the ordered extent record - * are inserted into the btree - */ -blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio) -{ - return btrfs_csum_one_bio(inode, bio, (u64)-1, false); -} - /* * Split an extent_map at [start, start + len] * @@ -7835,13 +7822,6 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) bio_endio(&dip->bio); } -blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode, - struct bio *bio, - u64 dio_file_offset) -{ - return btrfs_csum_one_bio(inode, bio, dio_file_offset, false); -} - static void btrfs_end_dio_bio(struct btrfs_bio *bbio) { struct btrfs_dio_private *dip = bbio->private; -- cgit v1.2.3 From f8c44673e5a5f5131773d4a6974fb8ea4db033f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:16 +0100 Subject: btrfs: simplify the btrfs_csum_one_bio calling convention To prepare for further bio submission changes btrfs_csum_one_bio should be able to take all it's arguments from the btrfs_bio structure. It can always use the bbio->inode already, and once the compression code is updated to set ->file_offset that one can be used unconditionally as well instead of looking at the page mapping now that btrfs doesn't allow ordered extents to span discontiguous data ranges. The only slightly tricky bit is the one_ordered flag set by the compressed writes. Replace that one with the driver private bio flag, which gets cleared before the bio is handed off to the block layer so that we don't get in the way of driver use. Note: this leaves an argument and a flag to btrfs_wq_submit_bio unused. But that whole mechanism will be removed in its current form in the next patch. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cdb0f8cb0d4f..f542d539b831 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2736,7 +2736,7 @@ void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA)) return; - ret = btrfs_csum_one_bio(inode, bio, (u64)-1, false); + ret = btrfs_csum_one_bio(btrfs_bio(bio)); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; @@ -7863,7 +7863,7 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, * If we aren't doing async submit, calculate the csum of the * bio now. */ - ret = btrfs_csum_one_bio(inode, bio, file_offset, false); + ret = btrfs_csum_one_bio(btrfs_bio(bio)); if (ret) { btrfs_bio_end_io(btrfs_bio(bio), ret); return; -- cgit v1.2.3 From f8a53bb58ec7e2150f9b03f210675ba3e6d8b919 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:17 +0100 Subject: btrfs: handle checksum generation in the storage layer Instead of letting the callers of btrfs_submit_bio deal with checksumming the (meta)data in the bio and making decisions on when to offload the checksumming to the bio, leave that to btrfs_submit_bio. Do do so the existing btrfs_submit_bio function is split into an upper and a lower half, so that the lower half can be offloaded to a workqueue. Note that this changes the behavior for direct writes to raid56 volumes so that async checksum offloading is not skipped when more I/O is expected. This runs counter to the argument explaining why it was done, although I can't measure any affects of the change. Commits later in this series will make sure the entire direct writes is offloaded to the workqueue at once and thus make sure it is sent to the raid56 code from a single thread. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 67 ++------------------------------------------------------ 1 file changed, 2 insertions(+), 65 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f542d539b831..b9cd088ded82 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2721,27 +2721,6 @@ void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int } } - /* - * If we need to checksum, and the I/O is not issued by fsync and - * friends, that is ->sync_writers != 0, defer the submission to a - * workqueue to parallelize it. - * - * Csum items for reloc roots have already been cloned at this point, - * so they are handled as part of the no-checksum case. - */ - if (!(inode->flags & BTRFS_INODE_NODATASUM) && - !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) && - !btrfs_is_data_reloc_root(inode->root)) { - if (!atomic_read(&inode->sync_writers) && - btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA)) - return; - - ret = btrfs_csum_one_bio(btrfs_bio(bio)); - if (ret) { - btrfs_bio_end_io(btrfs_bio(bio), ret); - return; - } - } btrfs_submit_bio(fs_info, bio, mirror_num); } @@ -7843,36 +7822,6 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio) btrfs_dio_private_put(dip); } -static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode, - u64 file_offset, int async_submit) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - blk_status_t ret; - - if (inode->flags & BTRFS_INODE_NODATASUM) - goto map; - - if (btrfs_op(bio) == BTRFS_MAP_WRITE) { - /* Check btrfs_submit_data_write_bio() for async submit rules */ - if (async_submit && !atomic_read(&inode->sync_writers) && - btrfs_wq_submit_bio(inode, bio, 0, file_offset, - WQ_SUBMIT_DATA_DIO)) - return; - - /* - * If we aren't doing async submit, calculate the csum of the - * bio now. - */ - ret = btrfs_csum_one_bio(btrfs_bio(bio)); - if (ret) { - btrfs_bio_end_io(btrfs_bio(bio), ret); - return; - } - } -map: - btrfs_submit_bio(fs_info, bio, 0); -} - static void btrfs_submit_direct(const struct iomap_iter *iter, struct bio *dio_bio, loff_t file_offset) { @@ -7880,11 +7829,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, container_of(dio_bio, struct btrfs_dio_private, bio); struct inode *inode = iter->inode; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - const bool raid56 = (btrfs_data_alloc_profile(fs_info) & - BTRFS_BLOCK_GROUP_RAID56_MASK); struct bio *bio; u64 start_sector; - int async_submit = 0; u64 submit_len; u64 clone_offset = 0; u64 clone_len; @@ -7951,19 +7897,10 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, * We transfer the initial reference to the last bio, so we * don't need to increment the reference count for the last one. */ - if (submit_len > 0) { + if (submit_len > 0) refcount_inc(&dip->refs); - /* - * If we are submitting more than one bio, submit them - * all asynchronously. The exception is RAID 5 or 6, as - * asynchronous checksums make it difficult to collect - * full stripe writes. - */ - if (!raid56) - async_submit = 1; - } - btrfs_submit_dio_bio(bio, BTRFS_I(inode), file_offset, async_submit); + btrfs_submit_bio(fs_info, bio, 0); dio_data->submitted += clone_len; clone_offset += clone_len; -- cgit v1.2.3 From 69ccf3f4244abc5f6d73ca5d8caf6b42a1db42c6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:18 +0100 Subject: btrfs: handle recording of zoned writes in the storage layer Move the code that splits the ordered extents and records the physical location for them to the storage layer so that the higher level consumers don't have to care about physical block numbers at all. This will also allow to eventually remove accounting for the zone append write sizes in the upper layer with a little bit more block layer work. Reviewed-by: Naohiro Aota Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b9cd088ded82..90e3fd7e10ea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2647,19 +2647,19 @@ out: return ret; } -static blk_status_t extract_ordered_extent(struct btrfs_inode *inode, - struct bio *bio, loff_t file_offset) +blk_status_t btrfs_extract_ordered_extent(struct btrfs_bio *bbio) { + u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; + u64 len = bbio->bio.bi_iter.bi_size; + struct btrfs_inode *inode = bbio->inode; struct btrfs_ordered_extent *ordered; - u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT; u64 file_len; - u64 len = bio->bi_iter.bi_size; u64 end = start + len; u64 ordered_end; u64 pre, post; int ret = 0; - ordered = btrfs_lookup_ordered_extent(inode, file_offset); + ordered = btrfs_lookup_ordered_extent(inode, bbio->file_offset); if (WARN_ON_ONCE(!ordered)) return BLK_STS_IOERR; @@ -2699,7 +2699,7 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode, ret = btrfs_split_ordered_extent(ordered, pre, post); if (ret) goto out; - ret = split_zoned_em(inode, file_offset, file_len, pre, post); + ret = split_zoned_em(inode, bbio->file_offset, file_len, pre, post); out: btrfs_put_ordered_extent(ordered); @@ -2709,19 +2709,7 @@ out: void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; - blk_status_t ret; - - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - ret = extract_ordered_extent(inode, bio, - page_offset(bio_first_bvec_all(bio)->bv_page)); - if (ret) { - btrfs_bio_end_io(btrfs_bio(bio), ret); - return; - } - } - - btrfs_submit_bio(fs_info, bio, mirror_num); + btrfs_submit_bio(inode->root->fs_info, bio, mirror_num); } void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, @@ -7816,8 +7804,6 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio) dip->bio.bi_status = err; } - btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio); - bio_put(bio); btrfs_dio_private_put(dip); } @@ -7876,15 +7862,6 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, dip); btrfs_bio(bio)->file_offset = file_offset; - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - status = extract_ordered_extent(BTRFS_I(inode), bio, - file_offset); - if (status) { - bio_put(bio); - goto out_err; - } - } - ASSERT(submit_len >= clone_len); submit_len -= clone_len; -- cgit v1.2.3 From 67d66982509043962cf15457051e1b840578a323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:21 +0100 Subject: btrfs: pass the iomap bio to btrfs_submit_bio Now that btrfs_submit_bio splits the bio when crossing stripe boundaries, there is no need for the higher level code to do that manually. For direct I/O this is really helpful, as btrfs_submit_io can now simply take the bio allocated by iomap and send it on to btrfs_submit_bio instead of allocating clones. For that to work, the bio embedded into struct btrfs_dio_private needs to become a full btrfs_bio as expected by btrfs_submit_bio. With this change there is a single work item to offload the entire iomap bio so the heuristics to skip async processing for bios that were split isn't needed anymore either. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 161 +++++++++++-------------------------------------------- 1 file changed, 32 insertions(+), 129 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90e3fd7e10ea..4ac9b34ad377 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -84,24 +84,12 @@ struct btrfs_dio_data { }; struct btrfs_dio_private { - struct btrfs_inode *inode; - - /* - * Since DIO can use anonymous page, we cannot use page_offset() to - * grab the file offset, thus need a dedicated member for file offset. - */ + /* Range of I/O */ u64 file_offset; - /* Used for bio::bi_size */ u32 bytes; - /* - * References to this structure. There is one reference per in-flight - * bio plus one while we're still setting up. - */ - refcount_t refs; - /* This must be last */ - struct bio bio; + struct btrfs_bio bbio; }; static struct bio_set btrfs_dio_bioset; @@ -7767,132 +7755,47 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, return ret; } -static void btrfs_dio_private_put(struct btrfs_dio_private *dip) +static void btrfs_dio_end_io(struct btrfs_bio *bbio) { - /* - * This implies a barrier so that stores to dio_bio->bi_status before - * this and loads of dio_bio->bi_status after this are fully ordered. - */ - if (!refcount_dec_and_test(&dip->refs)) - return; - - if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) { - btrfs_mark_ordered_io_finished(dip->inode, NULL, - dip->file_offset, dip->bytes, - !dip->bio.bi_status); - } else { - unlock_extent(&dip->inode->io_tree, - dip->file_offset, - dip->file_offset + dip->bytes - 1, NULL); - } - - bio_endio(&dip->bio); -} - -static void btrfs_end_dio_bio(struct btrfs_bio *bbio) -{ - struct btrfs_dio_private *dip = bbio->private; + struct btrfs_dio_private *dip = + container_of(bbio, struct btrfs_dio_private, bbio); + struct btrfs_inode *inode = bbio->inode; struct bio *bio = &bbio->bio; - blk_status_t err = bio->bi_status; - if (err) { - btrfs_warn(dip->inode->root->fs_info, - "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio_op(bio), - bio->bi_opf, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, err); - dip->bio.bi_status = err; + if (bio->bi_status) { + btrfs_warn(inode->root->fs_info, + "direct IO failed ino %llu op 0x%0x offset %#llx len %u err no %d", + btrfs_ino(inode), bio->bi_opf, + dip->file_offset, dip->bytes, bio->bi_status); } - bio_put(bio); - btrfs_dio_private_put(dip); + if (btrfs_op(bio) == BTRFS_MAP_WRITE) + btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset, + dip->bytes, !bio->bi_status); + else + unlock_extent(&inode->io_tree, dip->file_offset, + dip->file_offset + dip->bytes - 1, NULL); + + bbio->bio.bi_private = bbio->private; + iomap_dio_bio_end_io(bio); } -static void btrfs_submit_direct(const struct iomap_iter *iter, - struct bio *dio_bio, loff_t file_offset) +static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset) { + struct btrfs_bio *bbio = btrfs_bio(bio); struct btrfs_dio_private *dip = - container_of(dio_bio, struct btrfs_dio_private, bio); - struct inode *inode = iter->inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct bio *bio; - u64 start_sector; - u64 submit_len; - u64 clone_offset = 0; - u64 clone_len; - u64 logical; - int ret; - blk_status_t status; - struct btrfs_io_geometry geom; + container_of(bbio, struct btrfs_dio_private, bbio); struct btrfs_dio_data *dio_data = iter->private; - struct extent_map *em = NULL; - dip->inode = BTRFS_I(inode); - dip->file_offset = file_offset; - dip->bytes = dio_bio->bi_iter.bi_size; - refcount_set(&dip->refs, 1); + btrfs_bio_init(bbio, BTRFS_I(iter->inode), btrfs_dio_end_io, bio->bi_private); + bbio->file_offset = file_offset; - start_sector = dio_bio->bi_iter.bi_sector; - submit_len = dio_bio->bi_iter.bi_size; - - do { - logical = start_sector << 9; - em = btrfs_get_chunk_map(fs_info, logical, submit_len); - if (IS_ERR(em)) { - status = errno_to_blk_status(PTR_ERR(em)); - em = NULL; - goto out_err; - } - ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio), - logical, &geom); - if (ret) { - status = errno_to_blk_status(ret); - goto out_err_em; - } - - clone_len = min(submit_len, geom.len); - ASSERT(clone_len <= UINT_MAX); - - /* - * This will never fail as it's passing GPF_NOFS and - * the allocation is backed by btrfs_bioset. - */ - bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len, - BTRFS_I(inode), btrfs_end_dio_bio, - dip); - btrfs_bio(bio)->file_offset = file_offset; - - ASSERT(submit_len >= clone_len); - submit_len -= clone_len; - - /* - * Increase the count before we submit the bio so we know - * the end IO handler won't happen before we increase the - * count. Otherwise, the dip might get freed before we're - * done setting it up. - * - * We transfer the initial reference to the last bio, so we - * don't need to increment the reference count for the last one. - */ - if (submit_len > 0) - refcount_inc(&dip->refs); - - btrfs_submit_bio(fs_info, bio, 0); - - dio_data->submitted += clone_len; - clone_offset += clone_len; - start_sector += clone_len >> 9; - file_offset += clone_len; - - free_extent_map(em); - } while (submit_len > 0); - return; + dip->file_offset = file_offset; + dip->bytes = bio->bi_iter.bi_size; -out_err_em: - free_extent_map(em); -out_err: - dio_bio->bi_status = status; - btrfs_dio_private_put(dip); + dio_data->submitted += bio->bi_iter.bi_size; + btrfs_submit_bio(btrfs_sb(iter->inode->i_sb), bio, 0); } static const struct iomap_ops btrfs_dio_iomap_ops = { @@ -7901,7 +7804,7 @@ static const struct iomap_ops btrfs_dio_iomap_ops = { }; static const struct iomap_dio_ops btrfs_dio_ops = { - .submit_io = btrfs_submit_direct, + .submit_io = btrfs_dio_submit_io, .bio_set = &btrfs_dio_bioset, }; @@ -8736,7 +8639,7 @@ int __init btrfs_init_cachep(void) goto fail; if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE, - offsetof(struct btrfs_dio_private, bio), + offsetof(struct btrfs_dio_private, bbio.bio), BIOSET_NEED_BVECS)) goto fail; -- cgit v1.2.3 From a34e4c3f884cc592f105d214d21baee9f9c6bae8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 21 Jan 2023 07:50:24 +0100 Subject: btrfs: remove stripe boundary calculation for encoded I/O Stop looking at the stripe boundary in btrfs_encoded_read_regular_fill_pages() now that btrfs_submit_bio can split bios. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4ac9b34ad377..0fd26719d321 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9971,7 +9971,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 disk_io_size, struct page **pages) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_encoded_read_private priv = { .inode = inode, .file_offset = file_offset, @@ -9979,33 +9978,13 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, }; unsigned long i = 0; u64 cur = 0; - int ret; init_waitqueue_head(&priv.wait); - /* - * Submit bios for the extent, splitting due to bio or stripe limits as - * necessary. - */ + /* Submit bios for the extent, splitting due to bio limits as necessary. */ while (cur < disk_io_size) { - struct extent_map *em; - struct btrfs_io_geometry geom; struct bio *bio = NULL; - u64 remaining; + u64 remaining = disk_io_size - cur; - em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur, - disk_io_size - cur); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - } else { - ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ, - disk_bytenr + cur, &geom); - free_extent_map(em); - } - if (ret) { - WRITE_ONCE(priv.status, errno_to_blk_status(ret)); - break; - } - remaining = min(geom.len, disk_io_size - cur); while (bio || remaining) { size_t bytes = min_t(u64, remaining, PAGE_SIZE); -- cgit v1.2.3 From 48253076c3a93f795fcd84ffdc97c5e763709dee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:26 +0100 Subject: btrfs: open code submit_encoded_read_bio Open code the functionality in the only caller and remove the now superfluous error handling there. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0fd26719d321..be122e4d4952 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9936,17 +9936,6 @@ struct btrfs_encoded_read_private { blk_status_t status; }; -static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, - struct bio *bio, int mirror_num) -{ - struct btrfs_encoded_read_private *priv = btrfs_bio(bio)->private; - struct btrfs_fs_info *fs_info = inode->root->fs_info; - - atomic_inc(&priv->pending); - btrfs_submit_bio(fs_info, bio, mirror_num); - return BLK_STS_OK; -} - static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) { struct btrfs_encoded_read_private *priv = bbio->private; @@ -9971,6 +9960,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 disk_io_size, struct page **pages) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_encoded_read_private priv = { .inode = inode, .file_offset = file_offset, @@ -9999,14 +9989,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, if (!bytes || bio_add_page(bio, pages[i], bytes, 0) < bytes) { - blk_status_t status; - - status = submit_encoded_read_bio(inode, bio, 0); - if (status) { - WRITE_ONCE(priv.status, status); - bio_put(bio); - goto out; - } + atomic_inc(&priv.pending); + btrfs_submit_bio(fs_info, bio, 0); bio = NULL; continue; } @@ -10017,7 +10001,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, } } -out: if (atomic_dec_return(&priv.pending)) io_wait_event(priv.wait, !atomic_read(&priv.pending)); /* See btrfs_encoded_read_endio() for ordering. */ -- cgit v1.2.3 From 285599b6fe15d642df643fd4383ab3a278374e35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:27 +0100 Subject: btrfs: remove the fs_info argument to btrfs_submit_bio btrfs_submit_bio can derive it trivially from bbio->inode, so stop bothering in the callers. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index be122e4d4952..bd83633f8ad2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2697,14 +2697,12 @@ out: void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { - btrfs_submit_bio(inode->root->fs_info, bio, mirror_num); + btrfs_submit_bio(bio, mirror_num); } void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, enum btrfs_compression_type compress_type) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; - if (compress_type != BTRFS_COMPRESS_NONE) { /* * btrfs_submit_compressed_read will handle completing the bio @@ -2714,7 +2712,7 @@ void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, return; } - btrfs_submit_bio(fs_info, bio, mirror_num); + btrfs_submit_bio(bio, mirror_num); } /* @@ -7795,7 +7793,7 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, dip->bytes = bio->bi_iter.bi_size; dio_data->submitted += bio->bi_iter.bi_size; - btrfs_submit_bio(btrfs_sb(iter->inode->i_sb), bio, 0); + btrfs_submit_bio(bio, 0); } static const struct iomap_ops btrfs_dio_iomap_ops = { @@ -9960,7 +9958,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 disk_io_size, struct page **pages) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_encoded_read_private priv = { .inode = inode, .file_offset = file_offset, @@ -9990,7 +9987,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, if (!bytes || bio_add_page(bio, pages[i], bytes, 0) < bytes) { atomic_inc(&priv.pending); - btrfs_submit_bio(fs_info, bio, 0); + btrfs_submit_bio(bio, 0); bio = NULL; continue; } -- cgit v1.2.3 From 35a8d7da3ca87d8612fa86a21fab4e07a70d35cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:28 +0100 Subject: btrfs: remove now spurious bio submission helpers Call btrfs_submit_bio and btrfs_submit_compressed_read directly from submit_one_bio now that all additional functionality has moved into btrfs_submit_bio. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bd83633f8ad2..74b45b2a3bee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2695,26 +2695,6 @@ out: return errno_to_blk_status(ret); } -void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) -{ - btrfs_submit_bio(bio, mirror_num); -} - -void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio, - int mirror_num, enum btrfs_compression_type compress_type) -{ - if (compress_type != BTRFS_COMPRESS_NONE) { - /* - * btrfs_submit_compressed_read will handle completing the bio - * if there were any errors, so just return here. - */ - btrfs_submit_compressed_read(&inode->vfs_inode, bio, mirror_num); - return; - } - - btrfs_submit_bio(bio, mirror_num); -} - /* * given a list of ordered sums record them in the inode. This happens * at IO completion time based on sums calculated at bio submission time. -- cgit v1.2.3 From d5e4377d505189c30df50d54f9944d7fb8d528bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 21 Jan 2023 07:50:30 +0100 Subject: btrfs: split zone append bios in btrfs_submit_bio The current btrfs zoned device support is a little cumbersome in the data I/O path as it requires the callers to not issue I/O larger than the supported ZONE_APPEND size of the underlying device. This leads to a lot of extra accounting. Instead change btrfs_submit_bio so that it can take write bios of arbitrary size and form from the upper layers, and just split them internally to the ZONE_APPEND queue limits. Then remove all the upper layer warts catering to limited write sized on zoned devices, including the extra refcount in the compressed_bio. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 74b45b2a3bee..2fd518afc4f3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7678,10 +7678,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->offset = start; iomap->bdev = fs_info->fs_devices->latest_dev->bdev; iomap->length = len; - - if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start)) - iomap->flags |= IOMAP_F_ZONE_APPEND; - free_extent_map(em); return 0; -- cgit v1.2.3 From 04f0847c4552b898ec5867a6b36f1e953330beae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 08:37:23 +0100 Subject: btrfs: don't rely on unchanging ->bi_bdev for zone append remaps btrfs_record_physical_zoned relies on a bio->bi_bdev samples in the bio_end_io handler to find the reverse map for remapping the zone append write, but stacked block device drivers can and usually do change bi_bdev when sending on the bio to a lower device. This can happen e.g. with the nvme-multipath driver when a NVMe SSD sets the shared namespace bit. But there is no real need for the bdev in btrfs_record_physical_zoned, as it is only passed to btrfs_rmap_block, which uses it to pick the mapping to report if there are multiple reverse mappings. As zone writes can only do simple non-mirror writes right now, and anything more complex will use the stripe tree there is no chance of the multiple mappings case actually happening. Instead open code the subset of btrfs_rmap_block in btrfs_record_physical_zoned, which also removes a memory