diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 10:36:31 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 10:36:31 -0700 |
| commit | 4fc29c1aa375353ffe7c8fa171bf941b71ce29ef (patch) | |
| tree | 5d4fa5f8c95831655cb89f1a37c0ea5daf847fbd | |
| parent | 0e6acf0204da5b8705722a5f6806a4f55ed379d6 (diff) | |
| parent | 5302fb000def84100740a84d7f176c0e167b2141 (diff) | |
| download | linux-4fc29c1aa375353ffe7c8fa171bf941b71ce29ef.tar.gz linux-4fc29c1aa375353ffe7c8fa171bf941b71ce29ef.tar.bz2 linux-4fc29c1aa375353ffe7c8fa171bf941b71ce29ef.zip | |
Merge tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"The major change in this version is mitigating cpu overheads on write
paths by replacing redundant inode page updates with mark_inode_dirty
calls. And we tried to reduce lock contentions as well to improve
filesystem scalability. Other feature is setting F2FS automatically
when detecting host-managed SMR.
Enhancements:
- ioctl to move a range of data between files
- inject orphan inode errors
- avoid flush commands congestion
- support lazytime
Bug fixes:
- return proper results for some dentry operations
- fix deadlock in add_link failure
- disable extent_cache for fcollapse/finsert"
* tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (68 commits)
f2fs: clean up coding style and redundancy
f2fs: get victim segment again after new cp
f2fs: handle error case with f2fs_bug_on
f2fs: avoid data race when deciding checkpoin in f2fs_sync_file
f2fs: support an ioctl to move a range of data blocks
f2fs: fix to report error number of f2fs_find_entry
f2fs: avoid memory allocation failure due to a long length
f2fs: reset default idle interval value
f2fs: use blk_plug in all the possible paths
f2fs: fix to avoid data update racing between GC and DIO
f2fs: add maximum prefree segments
f2fs: disable extent_cache for fcollapse/finsert inodes
f2fs: refactor __exchange_data_block for speed up
f2fs: fix ERR_PTR returned by bio
f2fs: avoid mark_inode_dirty
f2fs: move i_size_write in f2fs_write_end
f2fs: fix to avoid redundant discard during fstrim
f2fs: avoid mismatching block range for discard
f2fs: fix incorrect f_bfree calculation in ->statfs
f2fs: use percpu_rw_semaphore
...
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 7 | ||||
| -rw-r--r-- | fs/f2fs/acl.c | 9 | ||||
| -rw-r--r-- | fs/f2fs/acl.h | 2 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 80 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 311 | ||||
| -rw-r--r-- | fs/f2fs/debug.c | 5 | ||||
| -rw-r--r-- | fs/f2fs/dir.c | 123 | ||||
| -rw-r--r-- | fs/f2fs/extent_cache.c | 50 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 286 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 499 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 52 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 92 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 53 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 147 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 144 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 14 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 23 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 59 | ||||
| -rw-r--r-- | fs/f2fs/segment.h | 22 | ||||
| -rw-r--r-- | fs/f2fs/shrinker.c | 5 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 134 | ||||
| -rw-r--r-- | fs/f2fs/xattr.c | 20 |
22 files changed, 1387 insertions, 750 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e1c9f0849da6..ecd808088362 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -109,7 +109,9 @@ background_gc=%s Turn on/off cleaning operations, namely garbage disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) -discard Issue discard/TRIM commands when a segment is cleaned. +discard/nodiscard Enable/disable real-time discard in f2fs, if discard is + enabled, f2fs will issue discard/TRIM commands when a + segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while for node from the end of main area. @@ -151,6 +153,9 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +mode=%s Control block allocation mode which supports "adaptive" + and "lfs". In "lfs" mode, there should be no random + writes towards main area. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a31c7e859af6..4dcc9e28dc5c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -201,7 +201,6 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { - struct f2fs_inode_info *fi = F2FS_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -214,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; - set_acl_inode(fi, inode->i_mode); + set_acl_inode(inode, inode->i_mode); if (error == 0) acl = NULL; } @@ -233,7 +232,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -244,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return error; } @@ -385,6 +384,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; + f2fs_mark_inode_dirty_sync(inode); + if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 997ca8edb6cb..b2334d11dae8 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -37,7 +37,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int f2fs_set_acl(struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); #else diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 124b4a3017b5..f94d01e7d001 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -48,7 +48,8 @@ repeat: goto repeat; } f2fs_wait_on_page_writeback(page, META, true); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); return page; } @@ -266,6 +267,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff, written; /* collect a number of dirty meta pages and write together */ @@ -278,7 +280,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); diff = nr_pages_to_write(sbi, META, wbc); + blk_start_plug(&plug); written = sync_meta_pages(sbi, META, wbc->nr_to_write); + blk_finish_plug(&plug); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -366,9 +370,10 @@ static int f2fs_set_meta_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, META); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); @@ -510,10 +515,11 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(sbi, ino, ORPHAN_INO); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); + update_inode_page(inode); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -761,28 +767,25 @@ fail_no_cp: static void __add_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (is_inode_flag_set(fi, flag)) + if (is_inode_flag_set(inode, flag)) return; - set_inode_flag(fi, flag); - list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); + set_inode_flag(inode, flag); + list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), flag)) + if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag)) return; - list_del_init(&fi->dirty_list); - clear_inode_flag(fi, flag); + list_del_init(&F2FS_I(inode)->dirty_list); + clear_inode_flag(inode, flag); stat_dec_dirty_inode(F2FS_I_SB(inode), type); } @@ -795,13 +798,12 @@ void update_dirty_page(struct inode *inode, struct page *page) !S_ISLNK(inode->i_mode)) return; - if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { - spin_lock(&sbi->inode_lock[type]); + spin_lock(&sbi->inode_lock[type]); + if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) __add_dirty_inode(inode, type); - spin_unlock(&sbi->inode_lock[type]); - } - inode_inc_dirty_pages(inode); + spin_unlock(&sbi->inode_lock[type]); + SetPagePrivate(page); f2fs_trace_pid(page); } @@ -864,6 +866,34 @@ retry: goto retry; } +int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[DIRTY_META]; + struct inode *inode; + struct f2fs_inode_info *fi; + s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); + + while (total--) { + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 0; + } + fi = list_entry(head->next, struct f2fs_inode_info, + gdirty_list); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + if (inode) { + update_inode_page(inode); + iput(inode); + } + }; + return 0; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -890,6 +920,14 @@ retry_flush_dents: goto retry_flush_dents; } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + f2fs_unlock_all(sbi); + err = f2fs_sync_inode_meta(sbi); + if (err) + goto out; + goto retry_flush_dents; + } + /* * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. @@ -914,6 +952,8 @@ out: static void unblock_operations(struct f2fs_sb_info *sbi) { up_write(&sbi->node_write); + + build_free_nids(sbi); f2fs_unlock_all(sbi); } @@ -954,7 +994,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * This avoids to conduct wrong roll-forward operations and uses * metapages, so should be called prior to sync_meta_pages below. */ - if (discard_next_dnode(sbi, discard_blk)) + if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk)) invalidate = true; /* Flush all the NAT/SIT pages */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ded224518978..e2624275d828 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,8 @@ #include <linux/bio.h> #include <linux/prefetch.h> #include <linux/uio.h> +#include <linux/mm.h> +#include <linux/memcontrol.h> #include <linux/cleancache.h> #include "f2fs.h" @@ -45,7 +47,8 @@ static void f2fs_read_end_io(struct bio *bio) struct page *page = bvec->bv_page; if (!bio->bi_error) { - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); @@ -97,10 +100,15 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, return bio; } -static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio) +static inline void __submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) { - if (!is_read_io(bio_op(bio))) + if (!is_read_io(bio_op(bio))) { atomic_inc(&sbi->nr_wb_bios); + if (f2fs_sb_mounted_hmsmr(sbi->sb) && + current->plug && (type == DATA || type == NODE)) + blk_finish_plug(current->plug); + } submit_bio(bio); } @@ -118,7 +126,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) bio_set_op_attrs(io->bio, fio->op, fio->op_flags); - __submit_bio(io->sbi, io->bio); + __submit_bio(io->sbi, io->bio, fio->type); io->bio = NULL; } @@ -240,7 +248,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio->bi_rw = fio->op_flags; bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio); + __submit_bio(fio->sbi, bio, fio->type); return 0; } @@ -326,7 +334,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (!count) return 0; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) return -ENOSPC; @@ -348,9 +356,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (set_page_dirty(dn->node_page)) dn->node_changed = true; - - mark_inode_dirty(dn->inode); - sync_inode_page(dn); return 0; } @@ -446,7 +451,8 @@ got_it: */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); return page; } @@ -505,14 +511,14 @@ repeat: /* wait for read completion */ lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } @@ -557,7 +563,8 @@ struct page *get_new_data_page(struct inode *inode, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { f2fs_put_page(page, 1); @@ -569,11 +576,8 @@ struct page *get_new_data_page(struct inode *inode, } got_it: if (new_i_size && i_size_read(inode) < - ((loff_t)(index + 1) << PAGE_SHIFT)) { - i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - /* Only the directory inode sets new_i_size */ - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); - } + ((loff_t)(index + 1) << PAGE_SHIFT)) + f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); return page; } @@ -586,7 +590,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) pgoff_t fofs; blkcnt_t count = 1; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); @@ -611,7 +615,7 @@ alloc: fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - i_size_write(dn->inode, + f2fs_i_size_write(dn->inode, ((loff_t)(fofs + 1) << PAGE_SHIFT)); return 0; } @@ -660,7 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; + int mode = create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -723,8 +727,7 @@ next_block: } else { err = __allocate_data_block(&dn); if (!err) { - set_inode_flag(F2FS_I(inode), - FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); allocated = true; } } @@ -795,8 +798,6 @@ skip: else if (dn.ofs_in_node < end_offset) goto next_block; - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); if (create) { @@ -807,8 +808,6 @@ skip: goto next_dnode; sync_out: - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); unlock_out: if (create) { @@ -968,6 +967,37 @@ out: return ret; } +struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct fscrypt_ctx *ctx = NULL; + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + } + + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + if (!bio) { + if (ctx) + fscrypt_release_ctx(ctx); + return ERR_PTR(-ENOMEM); + } + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + + return bio; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -986,7 +1016,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block; sector_t last_block_in_file; sector_t block_nr; - struct block_device *bdev = inode->i_sb->s_bdev; struct f2fs_map_blocks map; map.m_pblk = 0; @@ -1047,7 +1076,8 @@ got_it: } } else { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); goto next_page; } @@ -1058,35 +1088,15 @@ got_it: */ if (bio && (last_block_in_bio != block_nr - 1)) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; - - if (f2fs_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { - - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback( - F2FS_I_SB(inode), block_nr); - } - - bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + bio = f2fs_grab_bio(inode, block_nr, nr_pages); + if (IS_ERR(bio)) { + bio = NULL; goto set_error_page; } - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); - bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; bio_set_op_attrs(bio, REQ_OP_READ, 0); } @@ -1102,7 +1112,7 @@ set_error_page: goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } unlock_page(page); @@ -1112,7 +1122,7 @@ next_page: } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1201,14 +1211,14 @@ retry_encrypt: !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { rewrite_data_page(fio); - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); @@ -1223,6 +1233,7 @@ static int f2fs_write_data_page(struct page *page, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; + loff_t psize = (page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -1260,20 +1271,18 @@ write: available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + mapping_set_error(page->mapping, -EIO); + goto out; + } + /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; err = do_write_data_page(&fio); goto done; } - /* we should bypass data pages to proceed the kworkder jobs */ - if (unlikely(f2fs_cp_error(sbi))) { - SetPageError(page); - goto out; - } - if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) @@ -1285,6 +1294,8 @@ write: err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) err = do_write_data_page(&fio); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) @@ -1311,16 +1322,8 @@ out: redirty_out: redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - -static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = mapping->a_ops->writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; + unlock_page(page); + return err; } /* @@ -1329,8 +1332,7 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) + struct writeback_control *wbc) { int ret = 0; int done = 0; @@ -1343,10 +1345,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int cycled; int range_whole = 0; int tag; - int step = 0; pagevec_init(&pvec, 0); -next: + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1401,9 +1402,6 @@ continue_unlock: goto continue_unlock; } - if (step == is_cold_data(page)) - goto continue_unlock; - if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, @@ -1416,16 +1414,11 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = (*writepage)(page, wbc, data); + ret = mapping->a_ops->writepage(page, wbc); if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); - ret = 0; - } else { - done_index = page->index + 1; - done = 1; - break; - } + done_index = page->index + 1; + done = 1; + break; } if (--wbc->nr_to_write <= 0 && @@ -1438,11 +1431,6 @@ continue_unlock: cond_resched(); } - if (step < 1) { - step++; - goto next; - } - if (!cycled && !done) { cycled = 1; index = 0; @@ -1460,9 +1448,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool locked = false; + struct blk_plug plug; int ret; - long diff; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -1478,7 +1465,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, goto skip_write; /* skip writing during file defragment */ - if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG)) + if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; /* during POR, we don't need to trigger writepage at all. */ @@ -1487,20 +1474,16 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); - diff = nr_pages_to_write(sbi, DATA, wbc); - - if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) { - mutex_lock(&sbi->writepages); - locked = true; - } - ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); - if (locked) - mutex_unlock(&sbi->writepages); + blk_start_plug(&plug); + ret = f2fs_write_cache_pages(mapping, wbc); + blk_finish_plug(&plug); + /* + * if some pages were truncated, we cannot guarantee its mapping->host + * to detect pending bios. + */ + f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); - - wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return ret; skip_write: @@ -1558,7 +1541,7 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA) { read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_inline_node(ipage); } else { @@ -1668,39 +1651,35 @@ repeat: if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); } else { - struct f2fs_io_info fio = { - .sbi = sbi, - .type = DATA, - .op = REQ_OP_READ, - .op_flags = READ_SYNC, - .old_blkaddr = blkaddr, - .new_blkaddr = blkaddr, - .page = page, - .encrypted_page = NULL, - }; - err = f2fs_submit_page_bio(&fio); - if (err) - goto fail; + struct bio *bio; - lock_page(page); - if (unlikely(!PageUptodate(page))) { - err = -EIO; + bio = f2fs_grab_bio(inode, blkaddr, 1); + if (IS_ERR(bio)) { + err = PTR_ERR(bio); goto fail; } + bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + bio_put(bio); + err = -EFAULT; + goto fail; + } + + __submit_bio(sbi, bio, DATA); + + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } - - /* avoid symlink page */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - err = fscrypt_decrypt_page(page); - if (err) - goto fail; + if (unlikely(!PageUptodate(page))) { + err = -EIO; + goto fail; } } out_update: - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); out_clear: clear_cold_data(page); return 0; @@ -1721,13 +1700,11 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); + f2fs_put_page(page, 1); - if (pos + copied > i_size_read(inode)) { - i_size_write(inode, pos + copied); - mark_inode_dirty(inode); - } + if (pos + copied > i_size_read(inode)) + f2fs_i_size_write(inode, pos + copied); - f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } @@ -1752,6 +1729,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; + int rw = iov_iter_rw(iter); int err; err = check_direct_IO(inode, iter, offset); @@ -1760,18 +1738,23 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; + if (test_opt(F2FS_I_SB(inode), LFS)) + return 0; - trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); + down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - if (iov_iter_rw(iter) == WRITE) { + up_read(&F2FS_I(inode)->dio_rwsem[rw]); + + if (rw == WRITE) { if (err > 0) - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); else if (err < 0) f2fs_write_failed(mapping, offset + count); } - trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; } @@ -1818,6 +1801,35 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } +/* + * This was copied from __set_page_dirty_buffers which gives higher performance + * in very high speed storages. (e.g., pmem) + */ +void f2fs_set_page_dirty_nobuffers(struct page *page) +{ + struct address_space *mapping = page->mapping; + unsigned long flags; + + if (unlikely(!mapping)) + return; + + spin_lock(&mapping->private_lock); + lock_page_memcg(page); + SetPageDirty(page); + spin_unlock(&mapping->private_lock); + + spin_lock_irqsave(&mapping->tree_l |
