diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 80 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.rst | 2 | ||||
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 37 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 24 | ||||
-rw-r--r-- | fs/f2fs/data.c | 624 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 64 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 4 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 60 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 128 | ||||
-rw-r--r-- | fs/f2fs/file.c | 173 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 22 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 2 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 14 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 78 | ||||
-rw-r--r-- | fs/f2fs/iostat.c | 186 | ||||
-rw-r--r-- | fs/f2fs/iostat.h | 19 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 5 | ||||
-rw-r--r-- | fs/f2fs/node.c | 9 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 225 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 41 | ||||
-rw-r--r-- | fs/f2fs/super.c | 63 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 49 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 2 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 2 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 104 |
26 files changed, 1090 insertions, 928 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 9e3756625a81..94132745ecbe 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -49,16 +49,23 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> Description: Controls the in-place-update policy. updates in f2fs. User can set: - ==== ================= - 0x01 F2FS_IPU_FORCE - 0x02 F2FS_IPU_SSR - 0x04 F2FS_IPU_UTIL - 0x08 F2FS_IPU_SSR_UTIL - 0x10 F2FS_IPU_FSYNC - 0x20 F2FS_IPU_ASYNC - 0x40 F2FS_IPU_NOCACHE - 0x80 F2FS_IPU_HONOR_OPU_WRITE - ==== ================= + ===== =============== =================================================== + value policy description + 0x00 DISABLE disable IPU(=default option in LFS mode) + 0x01 FORCE all the time + 0x02 SSR if SSR mode is activated + 0x04 UTIL if FS utilization is over threashold + 0x08 SSR_UTIL if SSR mode is activated and FS utilization is over + threashold + 0x10 FSYNC activated in fsync path only for high performance + flash storages. IPU will be triggered only if the + # of dirty pages over min_fsync_blocks. + (=default option) + 0x20 ASYNC do IPU given by asynchronous write requests + 0x40 NOCACHE disable IPU bio cache + 0x80 HONOR_OPU_WRITE use OPU write prior to IPU write if inode has + FI_OPU_WRITE flag + ===== =============== =================================================== Refer segment.h for details. @@ -669,3 +676,56 @@ Contact: "Ping Xiong" <xiongping1@xiaomi.com> Description: When DATA SEPARATION is on, it controls the age threshold to indicate the data blocks as warm. By default it was initialized as 2621440 blocks (equals to 10GB). + +What: /sys/fs/f2fs/<disk>/fault_rate +Date: May 2016 +Contact: "Sheng Yong" <shengyong@oppo.com> +Contact: "Chao Yu" <chao@kernel.org> +Description: Enable fault injection in all supported types with + specified injection rate. + +What: /sys/fs/f2fs/<disk>/fault_type +Date: May 2016 +Contact: "Sheng Yong" <shengyong@oppo.com> +Contact: "Chao Yu" <chao@kernel.org> +Description: Support configuring fault injection type, should be + enabled with fault_injection option, fault type value + is shown below, it supports single or combined type. + + =================== =========== + Type_Name Type_Value + =================== =========== + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 (obsolete) + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 + FAULT_SLAB_ALLOC 0x000008000 + FAULT_DQUOT_INIT 0x000010000 + FAULT_LOCK_OP 0x000020000 + FAULT_BLKADDR 0x000040000 + =================== =========== + +What: /sys/fs/f2fs/<disk>/discard_io_aware_gran +Date: January 2023 +Contact: "Yangtao Li" <frank.li@vivo.com> +Description: Controls background discard granularity of inner discard thread + when is not in idle. Inner thread will not issue discards with size that + is smaller than granularity. The unit size is one block(4KB), now only + support configuring in range of [0, 512]. + Default: 512 + +What: /sys/fs/f2fs/<disk>/last_age_weight +Date: January 2023 +Contact: "Ping Xiong" <xiongping1@xiaomi.com> +Description: When DATA SEPARATION is on, it controls the weight of last data block age. diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 220f3e0d3f55..2055e72871fe 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -158,7 +158,7 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. -barrier If this option is set, cache_flush commands are allowed to be +barrier If this option is set, cache_flush commands are allowed to be issued. fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance diff --git a/MAINTAINERS b/MAINTAINERS index edd3d562beee..b0db911207ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7795,6 +7795,7 @@ M: Chao Yu <chao@kernel.org> L: linux-f2fs-devel@lists.sourceforge.net S: Maintained W: https://f2fs.wiki.kernel.org/ +Q: https://patchwork.kernel.org/project/f2fs/list/ B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git F: Documentation/ABI/testing/sysfs-fs-f2fs diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5a5515d83a1b..c3e058e0a018 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -70,7 +70,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_por = !is_meta, + .is_por = !is_meta ? 1 : 0, }; int err; @@ -171,10 +171,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (time_to_inject(sbi, FAULT_BLKADDR)) { - f2fs_show_injection_info(sbi, FAULT_BLKADDR); + if (time_to_inject(sbi, FAULT_BLKADDR)) return false; - } switch (type) { case META_NAT: @@ -239,8 +237,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, - .in_list = false, - .is_por = (type == META_POR), + .in_list = 0, + .is_por = (type == META_POR) ? 1 : 0, }; struct blk_plug plug; int err; @@ -625,7 +623,6 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } @@ -798,7 +795,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) */ head = &im->ino_list; - /* loop for each orphan inode entry and write them in Jornal block */ + /* loop for each orphan inode entry and write them in journal block */ list_for_each_entry(orphan, head, list) { if (!page) { page = f2fs_grab_meta_page(sbi, start_blk++); @@ -1128,7 +1125,7 @@ retry: } else { /* * We should submit bio, since it exists several - * wribacking dentry pages in the freeing inode. + * writebacking dentry pages in the freeing inode. */ f2fs_submit_merged_write(sbi, DATA); cond_resched(); @@ -1476,20 +1473,18 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { - ckpt->cur_node_segno[i] = - cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); - ckpt->cur_node_blkoff[i] = - cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); - ckpt->alloc_type[i + CURSEG_HOT_NODE] = - curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); + struct curseg_info *curseg = CURSEG_I(sbi, i + CURSEG_HOT_NODE); + + ckpt->cur_node_segno[i] = cpu_to_le32(curseg->segno); + ckpt->cur_node_blkoff[i] = cpu_to_le16(curseg->next_blkoff); + ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg->alloc_type; } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { - ckpt->cur_data_segno[i] = - cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); - ckpt->cur_data_blkoff[i] = - cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); - ckpt->alloc_type[i + CURSEG_HOT_DATA] = - curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); + struct curseg_info *curseg = CURSEG_I(sbi, i + CURSEG_HOT_DATA); + + ckpt->cur_data_segno[i] = cpu_to_le32(curseg->segno); + ckpt->cur_data_blkoff[i] = cpu_to_le16(curseg->next_blkoff); + ckpt->alloc_type[i + CURSEG_HOT_DATA] = curseg->alloc_type; } /* 2 cp + n data seg summary + orphan inode blocks */ diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 2532f369cb10..b40dec3d7f79 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -241,7 +241,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) unsigned int size = LZ4_MEM_COMPRESS; #ifdef CONFIG_F2FS_FS_LZ4HC - if (F2FS_I(cc->inode)->i_compress_flag >> COMPRESS_LEVEL_OFFSET) + if (F2FS_I(cc->inode)->i_compress_level) size = LZ4HC_MEM_COMPRESS; #endif @@ -267,8 +267,7 @@ static void lz4_destroy_compress_ctx(struct compress_ctx *cc) #ifdef CONFIG_F2FS_FS_LZ4HC static int lz4hc_compress_pages(struct compress_ctx *cc) { - unsigned char level = F2FS_I(cc->inode)->i_compress_flag >> - COMPRESS_LEVEL_OFFSET; + unsigned char level = F2FS_I(cc->inode)->i_compress_level; int len; if (level) @@ -340,8 +339,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) zstd_cstream *stream; void *workspace; unsigned int workspace_size; - unsigned char level = F2FS_I(cc->inode)->i_compress_flag >> - COMPRESS_LEVEL_OFFSET; + unsigned char level = F2FS_I(cc->inode)->i_compress_level; if (!level) level = F2FS_ZSTD_DEFAULT_CLEVEL; @@ -564,7 +562,7 @@ module_param(num_compress_pages, uint, 0444); MODULE_PARM_DESC(num_compress_pages, "Number of intermediate compress pages to preallocate"); -int f2fs_init_compress_mempool(void) +int __init f2fs_init_compress_mempool(void) { compress_page_pool = mempool_create_page_pool(num_compress_pages, 0); return compress_page_pool ? 0 : -ENOMEM; @@ -690,9 +688,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) vm_unmap_ram(cc->cbuf, cc->nr_cpages); vm_unmap_ram(cc->rbuf, cc->cluster_size); - for (i = 0; i < cc->nr_cpages; i++) { - if (i < new_nr_cpages) - continue; + for (i = new_nr_cpages; i < cc->nr_cpages; i++) { f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -1070,7 +1066,7 @@ retry: if (ret) goto out; if (bio) - f2fs_submit_bio(sbi, bio, DATA); + f2fs_submit_read_bio(sbi, bio, DATA); ret = f2fs_init_compress_ctx(cc); if (ret) @@ -1215,10 +1211,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = false, + .submitted = 0, .io_type = io_type, .io_wbc = wbc, - .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode), + .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? + 1 : 0, }; struct dnode_of_data dn; struct node_info ni; @@ -1228,7 +1225,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - /* we should bypass data pages to proceed the kworkder jobs */ + /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { mapping_set_error(cc->rpages[0]->mapping, -EIO); goto out_free; @@ -1813,6 +1810,7 @@ unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) const struct address_space_operations f2fs_compress_aops = { .release_folio = f2fs_release_folio, .invalidate_folio = f2fs_invalidate_folio, + .migrate_folio = filemap_migrate_folio, }; struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 41addc605350..06b552a0aba2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -292,13 +292,11 @@ static void f2fs_read_end_io(struct bio *bio) struct bio_post_read_ctx *ctx; bool intask = in_task(); - iostat_update_and_unbind_ctx(bio, 0); + iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; - if (time_to_inject(sbi, FAULT_READ_IO)) { - f2fs_show_injection_info(sbi, FAULT_READ_IO); + if (time_to_inject(sbi, FAULT_READ_IO)) bio->bi_status = BLK_STS_IOERR; - } if (bio->bi_status) { f2fs_finish_read_bio(bio, intask); @@ -332,13 +330,11 @@ static void f2fs_write_end_io(struct bio *bio) struct bio_vec *bvec; struct bvec_iter_all iter_all; - iostat_update_and_unbind_ctx(bio, 1); + iostat_update_and_unbind_ctx(bio); sbi = bio->bi_private; - if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(sbi, FAULT_WRITE_IO); + if (time_to_inject(sbi, FAULT_WRITE_IO)) bio->bi_status = BLK_STS_IOERR; - } bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; @@ -507,65 +503,66 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, return fscrypt_mergeable_bio(bio, inode, next_idx); } -static inline void __submit_bio(struct f2fs_sb_info *sbi, - struct bio *bio, enum page_type type) +void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, + enum page_type type) { - if (!is_read_io(bio_op(bio))) { - unsigned int start; + WARN_ON_ONCE(!is_read_io(bio_op(bio))); + trace_f2fs_submit_read_bio(sbi->sb, type, bio); - if (type != DATA && type != NODE) - goto submit_io; + iostat_update_submit_ctx(bio, type); + submit_bio(bio); +} - if (f2fs_lfs_mode(sbi) && current->plug) - blk_finish_plug(current->plug); +static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio) +{ + unsigned int start = + (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi); - if (!F2FS_IO_ALIGNED(sbi)) - goto submit_io; + if (start == 0) + return; - start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; - start %= F2FS_IO_SIZE(sbi); + /* fill dummy pages */ + for (; start < F2FS_IO_SIZE(sbi); start++) { + struct page *page = + mempool_alloc(sbi->write_io_dummy, + GFP_NOIO | __GFP_NOFAIL); + f2fs_bug_on(sbi, !page); - if (start == 0) - goto submit_io; + lock_page(page); - /* fill dummy pages */ - for (; start < F2FS_IO_SIZE(sbi); start++) { - struct page *page = - mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_NOFAIL); - f2fs_bug_on(sbi, !page); + zero_user_segment(page, 0, PAGE_SIZE); + set_page_private_dummy(page); - lock_page(page); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + f2fs_bug_on(sbi, 1); + } +} - zero_user_segment(page, 0, PAGE_SIZE); - set_page_private_dummy(page); +static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, + enum page_type type) +{ + WARN_ON_ONCE(is_read_io(bio_op(bio))); + + if (type == DATA || type == NODE) { + if (f2fs_lfs_mode(sbi) && current->plug) + blk_finish_plug(current->plug); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) - f2fs_bug_on(sbi, 1); + if (F2FS_IO_ALIGNED(sbi)) { + f2fs_align_write_bio(sbi, bio); + /* + * In the NODE case, we lose next block address chain. + * So, we need to do checkpoint in f2fs_sync_file. + */ + if (type == NODE) + set_sbi_flag(sbi, SBI_NEED_CP); } - /* - * In the NODE case, we lose next block address chain. So, we - * need to do checkpoint in f2fs_sync_file. - */ - if (type == NODE) - set_sbi_flag(sbi, SBI_NEED_CP); } -submit_io: - if (is_read_io(bio_op(bio))) - trace_f2fs_submit_read_bio(sbi->sb, type, bio); - else - trace_f2fs_submit_write_bio(sbi->sb, type, bio); + trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); } -void f2fs_submit_bio(struct f2fs_sb_info *sbi, - struct bio *bio, enum page_type type) -{ - __submit_bio(sbi, bio, type); -} - static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -573,12 +570,13 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; - if (is_read_io(fio->op)) + if (is_read_io(fio->op)) { trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); - else + f2fs_submit_read_bio(io->sbi, io->bio, fio->type); + } else { trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); - - __submit_bio(io->sbi, io->bio, fio->type); + f2fs_submit_write_bio(io->sbi, io->bio, fio->type); + } io->bio = NULL; } @@ -655,6 +653,9 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, f2fs_down_write(&io->io_rwsem); + if (!io->bio) + goto unlock_out; + /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; @@ -663,6 +664,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); +unlock_out: f2fs_up_write(&io->io_rwsem); } @@ -741,12 +743,15 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page) : WB_DATA_TYPE(fio->page)); - __submit_bio(fio->sbi, bio, fio->type); + if (is_read_io(bio_op(bio))) + f2fs_submit_read_bio(fio->sbi, bio, fio->type); + else + f2fs_submit_write_bio(fio->sbi, bio, fio->type); return 0; } @@ -848,7 +853,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, /* page can't be merged into bio; submit the bio */ del_bio_entry(be); - __submit_bio(sbi, *bio, DATA); + f2fs_submit_write_bio(sbi, *bio, DATA); break; } f2fs_up_write(&io->bio_list_lock); @@ -911,7 +916,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, } if (found) - __submit_bio(sbi, target, DATA); + f2fs_submit_write_bio(sbi, target, DATA); if (bio && *bio) { bio_put(*bio); *bio = NULL; @@ -948,7 +953,7 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, WB_DATA_TYPE(page)); @@ -991,7 +996,7 @@ next: bio_page = fio->page; /* set submitted = true as a return value */ - fio->submitted = true; + fio->submitted = 1; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -1007,7 +1012,7 @@ alloc_new: (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = true; + fio->retry = 1; goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_VECS); @@ -1022,7 +1027,7 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; @@ -1107,7 +1112,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); - __submit_bio(sbi, bio, DATA); + f2fs_submit_read_bio(sbi, bio, DATA); return 0; } @@ -1207,19 +1212,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) -{ - struct extent_info ei = {0, }; - struct inode *inode = dn->inode; - - if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { - dn->data_blkaddr = ei.blk + index - ei.fofs; - return 0; - } - - return f2fs_reserve_block(dn, index); -} - struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs) @@ -1227,15 +1219,14 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei = {0, }; int err; page = f2fs_grab_cache_page(mapping, index, for_write); if (!page) return ERR_PTR(-ENOMEM); - if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { - dn.data_blkaddr = ei.blk + index - ei.fofs; + if (f2fs_lookup_read_extent_cache_block(inode, index, + &dn.data_blkaddr)) { if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; @@ -1432,13 +1423,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; dn->data_blkaddr = f2fs_data_blkaddr(dn); - if (dn->data_blkaddr != NULL_ADDR) - goto alloc; - - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) - return err; + if (dn->data_blkaddr == NULL_ADDR) { + err = inc_valid_block_count(sbi, dn->inode, &count); + if (unlikely(err)) + return err; + } -alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, @@ -1452,19 +1442,91 @@ alloc: return 0; } -void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) { - if (flag == F2FS_GET_BLOCK_PRE_AIO) { - if (lock) - f2fs_down_read(&sbi->node_change); - else - f2fs_up_read(&sbi->node_change); + if (flag == F2FS_GET_BLOCK_PRE_AIO) + f2fs_down_read(&sbi->node_change); + else + f2fs_lock_op(sbi); +} + +static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag) +{ + if (flag == F2FS_GET_BLOCK_PRE_AIO) + f2fs_up_read(&sbi->node_change); + else + f2fs_unlock_op(sbi); +} + +int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + int err = 0; + + f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); + if (!f2fs_lookup_read_extent_cache_block(dn->inode, index, + &dn->data_blkaddr)) + err = f2fs_reserve_block(dn, index); + f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO); + + return err; +} + +static int f2fs_map_no_dnode(struct inode *inode, + struct f2fs_map_blocks *map, struct dnode_of_data *dn, + pgoff_t pgoff) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * There is one exceptional case that read_node_page() may return + * -ENOENT due to filesystem has been shutdown or cp_error, return + * -EIO in that case. + */ + if (map->m_may_create && + (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi))) + return -EIO; + + if (map->m_next_pgofs) + *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff); + if (map->m_next_extent) + *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff); + return 0; +} + +static bool f2fs_map_blocks_cached(struct inode *inode, + struct f2fs_map_blocks *map, int flag) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int maxblocks = map->m_len; + pgoff_t pgoff = (pgoff_t)map->m_lblk; + struct extent_info ei = {}; + + if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei)) + return false; + + map->m_pblk = ei.blk + pgoff - ei.fofs; + map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff); + map->m_flags = F2FS_MAP_MAPPED; + if (map->m_next_extent) + *map->m_next_extent = pgoff + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + + if (f2fs_allow_multi_device_dio(sbi, flag)) { + int bidx = f2fs_target_device_index(sbi, map->m_pblk); + struct f2fs_dev_info *dev = &sbi->devs[bidx]; + + map->m_bdev = dev->bdev; + map->m_pblk -= dev->start_blk; + map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); } else { - if (lock) - f2fs_lock_op(sbi); - else - f2fs_unlock_op(sbi); + map->m_bdev = inode->i_sb->s_bdev; } + return true; } /* @@ -1472,8 +1534,7 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) * maps continuous logical blocks to physical blocks, and return such * info via f2fs_map_blocks structure. */ -int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, - int create, int flag) +int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) { unsigned int maxblocks = map->m_len; struct dnode_of_data dn; @@ -1483,14 +1544,17 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; - struct extent_info ei = {0, }; block_t blkaddr; unsigned int start_pgofs; int bidx = 0; + bool is_hole; if (!maxblocks) return 0; + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) + goto out; + map->m_bdev = inode->i_sb->s_bdev; map->m_multidev_dio = f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); @@ -1502,42 +1566,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; - if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) { - if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && - map->m_may_create) - goto next_dnode; - - map->m_pblk = ei.blk + pgofs - ei.fofs; - map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); - map->m_flags = F2FS_MAP_MAPPED; - if (map->m_next_extent) - *map->m_next_extent = pgofs + map->m_len; - - /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) - f2fs_wait_on_block_writeback_range(inode, - map->m_pblk, map->m_len); - - if (map->m_multidev_dio) { - block_t blk_addr = map->m_pblk; - - bidx = f2fs_target_device_index(sbi, map->m_pblk); - - map->m_bdev = FDEV(bidx).bdev; - map->m_pblk -= FDEV(bidx).start_blk; - map->m_len = min(map->m_len, - FDEV(bidx).end_blk + 1 - map->m_pblk); - - if (map->m_may_create) - f2fs_update_device_state(sbi, inode->i_ino, - blk_addr, map->m_len); - } - goto out; - } - next_dnode: if (map->m_may_create) - f2fs_do_map_lock(sbi, flag, true); + f2fs_map_lock(sbi, flag); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1545,29 +1576,8 @@ next_dnode: if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; - - if (err == -ENOENT) { - /* - * There is one exceptional case that read_node_page() - * may return -ENOENT due to filesystem has been - * shutdown or cp_error, so force to convert error - * number to EIO for such case. - */ - if (map->m_may_create && - (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || - f2fs_cp_error(sbi))) { - err = -EIO; - goto unlock_out; - } - - err = 0; - if (map->m_next_pgofs) - *map->m_next_pgofs = - f2fs_get_next_page_offset(&dn, pgofs); - if (map->m_next_extent) - *map->m_next_extent = - f2fs_get_next_page_offset(&dn, pgofs); - } + if (err == -ENOENT) + err = f2fs_map_no_dnode(inode, map, &dn, pgofs); goto unlock_out; } @@ -1578,78 +1588,76 @@ next_dnode: next_block: blkaddr = f2fs_data_blkaddr(&dn); - - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { + is_hole = !__is_valid_data_blkaddr(blkaddr); + if (!is_hole && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto sync_out; } - if (__is_valid_data_blkaddr(blkaddr)) { - /* use out-place-update for driect IO under LFS mode */ - if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && - map->m_may_create) { + /* use out-place-update for direct IO under LFS mod |