diff options
| author | Christoph Hellwig <hch@lst.de> | 2023-01-21 07:50:07 +0100 |
|---|---|---|
| committer | David Sterba <dsterba@suse.com> | 2023-02-15 19:38:51 +0100 |
| commit | 7609afac677546b225d8327d726cc558d3666496 (patch) | |
| tree | 03387fff02a0211bc38f33474d66f06f0216ed20 /fs/btrfs/extent_io.c | |
| parent | e52190441bd6b268aed6ecc0efe3614c4222014e (diff) | |
| download | linux-7609afac677546b225d8327d726cc558d3666496.tar.gz linux-7609afac677546b225d8327d726cc558d3666496.tar.bz2 linux-7609afac677546b225d8327d726cc558d3666496.zip | |
btrfs: handle checksum validation and repair at the storage layer
Currently btrfs handles checksum validation and repair in the end I/O
handler for the btrfs_bio. This leads to a lot of duplicate code
plus issues with varying semantics or bugs, e.g.
- the until recently broken repair for compressed extents
- the fact that encoded reads validate the checksums but do not kick
of read repair
- the inconsistent checking of the BTRFS_FS_STATE_NO_CSUMS flag
This commit revamps the checksum validation and repair code to instead
work below the btrfs_submit_bio interfaces.
In case of a checksum failure (or a plain old I/O error), the repair
is now kicked off before the upper level ->end_io handler is invoked.
Progress of an in-progress repair is tracked by a small structure
that is allocated using a mempool for each original bio with failed
sectors, which holds a reference to the original bio. This new
structure is allocated using a mempool to guarantee forward progress
even under memory pressure. The mempool will be replenished when
the repair completes, just as the mempools backing the bios.
There is one significant behavior change here: If repair fails or
is impossible to start with, the whole bio will be failed to the
upper layer. This is the behavior that all I/O submitters except
for buffered I/O already emulated in their end_io handler. For
buffered I/O this now means that a large readahead request can
fail due to a single bad sector, but as readahead errors are ignored
the following readpage if the sector is actually accessed will
still be able to read. This also matches the I/O failure handling
in other file systems.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
| -rw-r--r-- | fs/btrfs/extent_io.c | 124 |
1 files changed, 8 insertions, 116 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1e28ae62d0ce..88def7fd8598 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -803,79 +803,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) btrfs_subpage_end_reader(fs_info, page, start, len); } -static void end_sector_io(struct page *page, u64 offset, bool uptodate) -{ - struct btrfs_inode *inode = BTRFS_I(page->mapping->host); - const u32 sectorsize = inode->root->fs_info->sectorsize; - - end_page_read(page, uptodate, offset, sectorsize); - unlock_extent(&inode->io_tree, offset, offset + sectorsize - 1, NULL); -} - -static void submit_data_read_repair(struct inode *inode, - struct btrfs_bio *failed_bbio, - u32 bio_offset, const struct bio_vec *bvec, - unsigned int error_bitmap) -{ - const unsigned int pgoff = bvec->bv_offset; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct page *page = bvec->bv_page; - const u64 start = page_offset(bvec->bv_page) + bvec->bv_offset; - const u64 end = start + bvec->bv_len - 1; - const u32 sectorsize = fs_info->sectorsize; - const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits; - int i; - - BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE); - - /* This repair is only for data */ - ASSERT(is_data_inode(inode)); - - /* We're here because we had some read errors or csum mismatch */ - ASSERT(error_bitmap); - - /* - * We only get called on buffered IO, thus page must be mapped and bio - * must not be cloned. - */ - ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED)); - - /* Iterate through all the sectors in the range */ - for (i = 0; i < nr_bits; i++) { - const unsigned int offset = i * sectorsize; - bool uptodate = false; - int ret; - - if (!(error_bitmap & (1U << i))) { - /* - * This sector has no error, just end the page read - * and unlock the range. - */ - uptodate = true; - goto next; - } - - ret = btrfs_repair_one_sector(BTRFS_I(inode), failed_bbio, - bio_offset + offset, page, pgoff + offset, - true); - if (!ret) { - /* - * We have submitted the read repair, the page release - * will be handled by the endio function of the - * submitted repair bio. - * Thus we don't need to do any thing here. - */ - continue; - } - /* - * Continue on failed repair, otherwise the remaining sectors - * will not be properly unlocked. - */ -next: - end_sector_io(page, start + offset, uptodate); - } -} - /* lots and lots of room for performance fixes in the end_bio funcs */ void end_extent_writepage(struct page *page, int err, u64 start, u64 end) @@ -1093,8 +1020,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) struct inode *inode = page->mapping->host; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const u32 sectorsize = fs_info->sectorsize; - unsigned int error_bitmap = (unsigned int)-1; - bool repair = false; u64 start; u64 end; u32 len; @@ -1126,25 +1051,14 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) len = bvec->bv_len; mirror = bbio->mirror_num; - if (likely(uptodate)) { - if (is_data_inode(inode)) { - error_bitmap = btrfs_verify_data_csum(bbio, - bio_offset, page, start, end); - if (error_bitmap) - uptodate = false; - } else { - if (btrfs_validate_metadata_buffer(bbio, - page, start, end, mirror)) - uptodate = false; - } - } + if (uptodate && !is_data_inode(inode) && + btrfs_validate_metadata_buffer(bbio, page, start, end, mirror)) + uptodate = false; if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_SHIFT; - btrfs_clean_io_failure(BTRFS_I(inode), start, page, 0); - /* * Zero out the remaining part if this range straddles * i_size. @@ -1161,19 +1075,7 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) zero_user_segment(page, zero_start, offset_in_page(end) + 1); } - } else if (is_data_inode(inode)) { - /* - * Only try to repair bios that actually made it to a - * device. If the bio failed to be submitted mirror - * is 0 and we need to fail it without retrying. - * - * This also includes the high level bios for compressed - * extents - these never make it to a device and repair - * is already handled on the lower compressed bio. - */ - if (mirror > 0) - repair = true; - } else { + } else if (!is_data_inode(inode)) { struct extent_buffer *eb; eb = find_extent_buffer_readpage(fs_info, page, start); @@ -1182,19 +1084,10 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) atomic_dec(&eb->io_pages); } - if (repair) { - /* - * submit_data_read_repair() will handle all the good - * and bad sectors, we just continue to the next bvec. - */ - submit_data_read_repair(inode, bbio, bio_offset, bvec, - error_bitmap); - } else { - /* Update page status and unlock */ - end_page_read(page, uptodate, start, len); - endio_readpage_release_extent(&processed, BTRFS_I(inode), - start, end, PageUptodate(page)); - } + /* Update page status and unlock. */ + end_page_read(page, uptodate, start, len); + endio_readpage_release_extent(&processed, BTRFS_I(inode), + start, end, PageUptodate(page)); ASSERT(bio_offset + len > bio_offset); bio_offset += len; @@ -1202,7 +1095,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) } /* Release the last extent */ endio_readpage_release_extent(&processed, NULL, 0, 0, false); - btrfs_bio_free_csum(bbio); bio_put(bio); } |
