diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-14 08:55:43 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-14 08:55:43 -0700 |
| commit | 0d28544117fa9dcd0d202aeb4459bb15f42bb7de (patch) | |
| tree | 8761202e32bca24dc7425a1188b6df75cf5225a9 | |
| parent | fa4bff165070dc40a3de35b78e4f8da8e8d85ec5 (diff) | |
| parent | 2777e654371dd4207a3a7f4fb5fa39550053a080 (diff) | |
| download | linux-0d28544117fa9dcd0d202aeb4459bb15f42bb7de.tar.gz linux-0d28544117fa9dcd0d202aeb4459bb15f42bb7de.tar.bz2 linux-0d28544117fa9dcd0d202aeb4459bb15f42bb7de.zip | |
Merge tag 'f2fs-for-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"Another round of various bug fixes came in. Damien improved SMR drive
support a bit, and Chao replaced BUG_ON() with reporting errors to
user since we've not hit from users but did hit from crafted images.
We've found a disk layout bug in large_nat_bits feature which supports
very large NAT entries enabled at mkfs. If the feature is enabled, it
will give a notice to run fsck to correct the on-disk layout.
Enhancements:
- reduce memory consumption for SMR drive
- better discard handling for multiple partitions
- tracepoints for f2fs_file_write_iter/f2fs_filemap_fault
- allow to change CP_CHKSUM_OFFSET
- detect wrong layout of large_nat_bitmap feature
- enhance checking valid data indices
Bug fixes:
- Multiple partition support for SMR drive
- deadlock problem in f2fs_balance_fs_bg
- add boundary checks to fix abnormal behaviors on fuzzed images
- inline_xattr space calculations
- replace f2fs_bug_on with errors
In addition, this series contains various memory boundary check and
sanity check of on-disk consistency"
* tag 'f2fs-for-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits)
f2fs: fix to avoid accessing xattr across the boundary
f2fs: fix to avoid potential race on sbi->unusable_block_count access/update
f2fs: add tracepoint for f2fs_filemap_fault()
f2fs: introduce DATA_GENERIC_ENHANCE
f2fs: fix to handle error in f2fs_disable_checkpoint()
f2fs: remove redundant check in f2fs_file_write_iter()
f2fs: fix to be aware of readonly device in write_checkpoint()
f2fs: fix to skip recovery on readonly device
f2fs: fix to consider multiple device for readonly check
f2fs: relocate chksum_offset for large_nat_bitmap feature
f2fs: allow unfixed f2fs_checkpoint.checksum_offset
f2fs: Replace spaces with tab
f2fs: insert space before the open parenthesis '('
f2fs: allow address pointer number of dnode aligning to specified size
f2fs: introduce f2fs_read_single_page() for cleanup
f2fs: mark is_extension_exist() inline
f2fs: fix to set FI_UPDATE_WRITE correctly
f2fs: fix to avoid panic in f2fs_inplace_write_data()
f2fs: fix to do sanity check on valid block count of segment
f2fs: fix to do sanity check on valid node/block count
...
| -rw-r--r-- | fs/f2fs/acl.c | 4 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 108 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 285 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 127 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 76 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 16 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 17 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 12 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 43 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 37 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 71 | ||||
| -rw-r--r-- | fs/f2fs/segment.h | 16 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 70 | ||||
| -rw-r--r-- | fs/f2fs/xattr.c | 36 | ||||
| -rw-r--r-- | fs/f2fs/xattr.h | 2 | ||||
| -rw-r--r-- | include/linux/f2fs_fs.h | 11 | ||||
| -rw-r--r-- | include/trace/events/f2fs.h | 57 |
18 files changed, 688 insertions, 302 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 63e599524085..217b290ae3a5 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -285,7 +285,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* assert(atomic_read(acl->a_refcount) == 1); */ FOREACH_ACL_ENTRY(pa, acl, pe) { - switch(pa->e_tag) { + switch (pa->e_tag) { case ACL_USER_OBJ: pa->e_perm &= (mode >> 6) | ~S_IRWXO; mode &= (pa->e_perm << 6) | ~S_IRWXU; @@ -326,7 +326,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) } *mode_p = (*mode_p & ~S_IRWXUGO) | mode; - return not_equiv; + return not_equiv; } static int f2fs_acl_create(struct inode *dir, umode_t *mode, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a98e1b02279e..ed70b68b2b38 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -66,7 +66,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_meta = is_meta, + .is_por = !is_meta, }; int err; @@ -130,6 +130,30 @@ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } +static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, + int type) +{ + struct seg_entry *se; + unsigned int segno, offset; + bool exist; + + if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + return true; + + segno = GET_SEGNO(sbi, blkaddr); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + + exist = f2fs_test_bit(offset, se->cur_valid_map); + if (!exist && type == DATA_GENERIC_ENHANCE) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + } + return exist; +} + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -151,15 +175,22 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return false; break; case META_POR: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; case DATA_GENERIC: + case DATA_GENERIC_ENHANCE: + case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) { - if (type == DATA_GENERIC) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); - WARN_ON(1); - } + blkaddr < MAIN_BLKADDR(sbi))) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); return false; + } else { + return __is_bitmap_valid(sbi, blkaddr, type); } break; case META_GENERIC: @@ -189,7 +220,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, - .is_meta = (type != META_POR), + .is_por = (type == META_POR), }; struct blk_plug plug; @@ -644,6 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (bdev_read_only(sbi->sb->s_bdev)) { + f2fs_msg(sbi->sb, KERN_INFO, "write access " + "unavailable, skipping orphan cleanup"); + return 0; + } + if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; @@ -758,13 +795,27 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) } } +static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, + struct f2fs_checkpoint *ckpt) +{ + unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); + __u32 chksum; + + chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + if (chksum_ofs < CP_CHKSUM_OFFSET) { + chksum_ofs += sizeof(chksum); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); + } + return chksum; +} + static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, struct f2fs_checkpoint **cp_block, struct page **cp_page, unsigned long long *version) { - unsigned long blk_size = sbi->blocksize; size_t crc_offset = 0; - __u32 crc = 0; + __u32 crc; *cp_page = f2fs_get_meta_page(sbi, cp_addr); if (IS_ERR(*cp_page)) @@ -773,15 +824,27 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset > (blk_size - sizeof(__le32))) { + if (crc_offset < CP_MIN_CHKSUM_OFFSET || + crc_offset > CP_CHKSUM_OFFSET) { f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = cur_cp_crc(*cp_block); - if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { + if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { + if (crc_offset != CP_MIN_CHKSUM_OFFSET) { + f2fs_put_page(*cp_page, 1); + f2fs_msg(sbi->sb, KERN_WARNING, + "layout of large_nat_bitmap is deprecated, " + "run fsck to repair, chksum_offset: %zu", + crc_offset); + return -EINVAL; + } + } + + crc = f2fs_checkpoint_chksum(sbi, *cp_block); + if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); return -EINVAL; @@ -1009,13 +1072,11 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - if (is_dir) - F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->cp_task = current; filemap_fdatawrite(inode->i_mapping); - if (is_dir) - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ @@ -1391,7 +1452,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset)); + crc32 = f2fs_checkpoint_chksum(sbi, ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1475,7 +1536,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + + spin_lock(&sbi->stat_lock); sbi->unusable_block_count = 0; + spin_unlock(&sbi->stat_lock); + __set_cp_next_pack(sbi); /* @@ -1500,6 +1565,9 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) + return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (cpc->reason != CP_PAUSE) return 0; @@ -1516,10 +1584,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) err = -EIO; goto out; } - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 64040e998439..eda4181d2092 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -218,12 +218,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, struct block_device *bdev = sbi->sb->s_bdev; int i; - for (i = 0; i < sbi->s_ndevs; i++) { - if (FDEV(i).start_blk <= blk_addr && - FDEV(i).end_blk >= blk_addr) { - blk_addr -= FDEV(i).start_blk; - bdev = FDEV(i).bdev; - break; + if (f2fs_is_multi_device(sbi)) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } } } if (bio) { @@ -237,6 +239,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; @@ -420,7 +425,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); + __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, @@ -448,7 +453,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + fio->is_por ? META_POR : (__is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFAULT; trace_f2fs_submit_page_bio(page, fio); @@ -498,9 +504,7 @@ next: spin_unlock(&io->io_lock); } - if (__is_valid_data_blkaddr(fio->old_blkaddr)) - verify_block_addr(fio, fio->old_blkaddr); - verify_block_addr(fio, fio->new_blkaddr); + verify_fio_blkaddr(fio); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -557,9 +561,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) - return ERR_PTR(-EFAULT); - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); @@ -587,8 +588,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio; + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -600,8 +603,8 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - __submit_bio(F2FS_I_SB(inode), bio, DATA); + inc_page_count(sbi, F2FS_RD_DATA); + __submit_bio(sbi, bio, DATA); return 0; } @@ -729,6 +732,11 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto put_err; + } goto got_it; } @@ -742,6 +750,13 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, err = -ENOENT; goto put_err; } + if (dn.data_blkaddr != NEW_ADDR && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + dn.data_blkaddr, + DATA_GENERIC_ENHANCE)) { + err = -EFAULT; + goto put_err; + } got_it: if (PageUptodate(page)) { unlock_page(page); @@ -1084,12 +1099,12 @@ next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto sync_out; } - if (is_valid_data_blkaddr(sbi, blkaddr)) { + if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { @@ -1499,6 +1514,118 @@ out: return ret; } +static int f2fs_read_single_page(struct inode *inode, struct page *page, + unsigned nr_pages, + struct f2fs_map_blocks *map, + struct bio **bio_ret, + sector_t *last_block_in_bio, + bool is_readahead) +{ + struct bio *bio = *bio_ret; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + int ret = 0; + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; + /* + * Map blocks using the previous result first. + */ + if ((map->m_flags & F2FS_MAP_MAPPED) && + block_in_file > map->m_lblk && + block_in_file < (map->m_lblk + map->m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map->m_lblk = block_in_file; + map->m_len = last_block - block_in_file; + + ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); + if (ret) + goto out; +got_it: + if ((map->m_flags & F2FS_MAP_MAPPED)) { + block_nr = map->m_pblk + block_in_file - map->m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC_ENHANCE_READ)) { + ret = -EFAULT; + goto out; + } + } else { +zero_out: + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + goto out; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (*last_block_in_bio != block_nr - 1 || + !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { +submit_and_realloc: + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + if (bio == NULL) { + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + goto out; + } + } + + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = block_nr; + goto out; +confused: + if (bio) { + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + unlock_page(page); +out: + *bio_ret = bio; + return ret; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1515,13 +1642,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping, struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - sector_t block_in_file; - sector_t last_block; - sector_t last_block_in_file; - sector_t block_nr; struct f2fs_map_blocks map; + int ret = 0; map.m_pblk = 0; map.m_lblk = 0; @@ -1544,98 +1666,13 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - block_in_file = (sector_t)page->index; - last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> - blkbits; - if (last_block > last_block_in_file) - last_block = last_block_in_file; - - /* just zeroing out page which is beyond EOF */ - if (block_in_file >= last_block) - goto zero_out; - /* - * Map blocks using the previous result first. - */ - if ((map.m_flags & F2FS_MAP_MAPPED) && - block_in_file > map.m_lblk && - block_in_file < (map.m_lblk + map.m_len)) - goto got_it; - - /* - * Then do more f2fs_map_blocks() calls until we are - * done with this page. - */ - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; -got_it: - if ((map.m_flags & F2FS_MAP_MAPPED)) { - block_nr = map.m_pblk + block_in_file - map.m_lblk; - SetPageMappedToDisk(page); - - if (!PageUptodate(page) && !cleancache_get_page(page)) { - SetPageUptodate(page); - goto confused; - } - - if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, - DATA_GENERIC)) - goto set_error_page; - } else { -zero_out: + ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, + &last_block_in_bio, is_readahead); + if (ret) { + SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); unlock_page(page); - goto next_page; } - - /* - * This page will go to BIO. Do we need to send this - * BIO off first? - */ - if (bio && (last_block_in_bio != block_nr - 1 || - !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { -submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); - if (IS_ERR(bio)) { - bio = NULL; - goto set_error_page; - } - } - - /* - * If the page is under writeback, we need to wait for - * its completion to see the correct decrypted data. - */ - f2fs_wait_on_block_writeback(inode, block_nr); - - if (bio_add_page(bio, page, blocksize, 0) < blocksize) - goto submit_and_realloc; - - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - ClearPageError(page); - last_block_in_bio = block_nr; - goto next_page; -set_error_page: - SetPageError(page); - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); - goto next_page; -confused: - if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - unlock_page(page); next_page: if (pages) put_page(page); @@ -1643,7 +1680,7 @@ next_page: BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); - return 0; + return pages ? 0 : ret; } static int f2fs_read_data_page(struct file *file, struct page *page) @@ -1813,7 +1850,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) + DATA_GENERIC_ENHANCE)) return -EFAULT; ipu_force = true; @@ -1840,7 +1877,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) { + DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto out_writepage; } @@ -1848,7 +1885,8 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && + if (ipu_force || + (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) @@ -1866,9 +1904,10 @@ got_it: true); if (PageWriteback(page)) end_page_writeback(page); + } else { + set_inode_flag(inode, FI_UPDATE_WRITE); } trace_f2fs_do_write_data_page(fio->page, IPU); - set_inode_flag(inode, FI_UPDATE_WRITE); return err; } @@ -2030,7 +2069,8 @@ out: } unlock_page(page); - if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2491,6 +2531,11 @@ repeat: zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto fail; + } err = f2fs_submit_page_read(inode, page, blkaddr); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bacf5c2a8850..06b89a9862ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -210,7 +210,14 @@ enum { META_SSA, META_MAX, META_POR, - DATA_GENERIC, + DATA_GENERIC, /* check range only */ + DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */ + DATA_GENERIC_ENHANCE_READ, /* + * strong check on range and segment + * bitmap but no warning due to race + * condition of read on truncated area + * by extent_cache + */ META_GENERIC, }; @@ -1041,7 +1048,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ - bool is_meta; /* indicate borrow meta inode mapping or not */ + bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ @@ -1068,8 +1075,8 @@ struct f2fs_dev_info { block_t start_blk; block_t end_blk; #ifdef CONFIG_BLK_DEV_ZONED - unsigned int nr_blkz; /* Total number of zones */ - u8 *blkz_type; /* Array of zones type */ + unsigned int nr_blkz; /* Total number of zones */ + unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ #endif }; @@ -1366,6 +1373,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } #endif +/* + * Test if the mounted volume is a multi-device volume. + * - For a single regular disk volume, sbi->s_ndevs is 0. + * - For a single zoned disk volume, sbi->s_ndevs is 1. + * - For a multi-device volume, sbi->s_ndevs is always 2 or more. + */ +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) +{ + return sbi->s_ndevs > 1; +} + /* For write statistics. Suppose sector size is 512 bytes, * and the return value is in kbytes. s is of struct f2fs_sb_info. */ @@ -1777,6 +1795,7 @@ enospc: return -ENOSPC; } +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1785,13 +1804,21 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); + if (unlikely(inode->i_blocks < sectors)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, count, false, true); } @@ -1889,7 +1916,11 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { offset = (flag == SIT_BITMAP) ? le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + /* + * if large_nat_bitmap feature is enabled, leave checksum + * protection for all nat/sit bitmaps. + */ + return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); } if (__cp_payload(sbi) > 0) { @@ -2008,7 +2039,6 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); sbi->total_valid_node_count--; sbi->total_valid_block_count--; @@ -2018,10 +2048,19 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (is_inode) + if (is_inode) { dquot_free_inode(inode); - else + } else { + if (unlikely(inode->i_blocks == 0)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return; + } f2fs_i_blocks_write(inode, 1, false, true); + } } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -2545,7 +2584,14 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); + unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode); + return ALIGN_DOWN(addrs, 1); +} + +static inline unsigned int addrs_per_block(struct inode *inode) +{ + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2558,7 +2604,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page) static inline int inline_xattr_size(struct inode *inode) { - return get_inline_xattr_addrs(inode) * sizeof(__le32); + if (f2fs_has_inline_xattr(inode)) + return get_inline_xattr_addrs(inode) * sizeof(__le32); + return 0; } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2800,12 +2848,10 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) -#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \ - (!is_read_io((fio)->op) || (fio)->is_meta)) +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -2824,15 +2870,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) return true; } -static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr) -{ - if (!__is_valid_data_blkaddr(blkaddr)) - return false; - verify_blkaddr(sbi, blkaddr, DATA_GENERIC); - return true; -} - static inline void f2fs_set_page_private(struct page *page, unsigned long data) { @@ -3530,16 +3567,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED -static inline int get_blkz_type(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkaddr) +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) { unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; - int i; - for (i = 0; i < sbi->s_ndevs; i++) - if (FDEV(i).bdev == bdev) - return FDEV(i).blkz_type[zno]; - return -EINVAL; + return test_bit(zno, FDEV(devi).blkz_seq); } #endif @@ -3548,9 +3581,23 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) return f2fs_sb_has_blkzoned(sbi); } +static inline bool f2fs_bdev_support_discard(struct block_device *bdev) +{ + return blk_queue_discard(bdev_get_queue(bdev)) || + bdev_is_zoned(bdev) |
