diff options
-rw-r--r-- | Documentation/admin-guide/ext4.rst | 10 | ||||
-rw-r--r-- | fs/ext4/bitmap.c | 8 | ||||
-rw-r--r-- | fs/ext4/dir.c | 14 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 31 | ||||
-rw-r--r-- | fs/ext4/extents.c | 823 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 240 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 28 | ||||
-rw-r--r-- | fs/ext4/fast_commit.c | 47 | ||||
-rw-r--r-- | fs/ext4/file.c | 20 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 35 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 7 | ||||
-rw-r--r-- | fs/ext4/inline.c | 46 | ||||
-rw-r--r-- | fs/ext4/inode.c | 292 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 25 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 7 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 90 | ||||
-rw-r--r-- | fs/ext4/namei.c | 16 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 16 | ||||
-rw-r--r-- | fs/ext4/resize.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 65 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 31 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 7 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 21 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 97 | ||||
-rw-r--r-- | include/linux/jbd2.h | 4 |
25 files changed, 989 insertions, 994 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 5740d85439ff..2418b0c2d3df 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted: that ext4's inode table readahead algorithm will pre-read into the buffer cache. The default value is 32 blocks. - nouser_xattr - Disables Extended User Attributes. See the attr(5) manual page for - more information about extended attributes. - - noacl - This option disables POSIX Access Control List support. If ACL support - is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL - is enabled by default on mount. See the acl(5) manual page for more - information about acl. - bsddf (*) Make 'df' act like BSD. diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index cd725bebe69e..2a135075468d 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -18,15 +18,17 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars) int ext4_inode_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) + struct buffer_head *bh) { __u32 hi; __u32 provided, calculated; struct ext4_sb_info *sbi = EXT4_SB(sb); + int sz; if (!ext4_has_metadata_csum(sb)) return 1; + sz = EXT4_INODES_PER_GROUP(sb) >> 3; provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { @@ -40,14 +42,16 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, void ext4_inode_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) + struct buffer_head *bh) { __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); + int sz; if (!ext4_has_metadata_csum(sb)) return; + sz = EXT4_INODES_PER_GROUP(sb) >> 3; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 13196afe55ce..ef6a3c8f3a9a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -280,12 +280,20 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct fscrypt_str de_name = FSTR_INIT(de->name, de->name_len); + u32 hash; + u32 minor_hash; + + if (IS_CASEFOLDED(inode)) { + hash = EXT4_DIRENT_HASH(de); + minor_hash = EXT4_DIRENT_MINOR_HASH(de); + } else { + hash = 0; + minor_hash = 0; + } /* Directory is encrypted */ err = fscrypt_fname_disk_to_usr(inode, - EXT4_DIRENT_HASH(de), - EXT4_DIRENT_MINOR_HASH(de), - &de_name, &fstr); + hash, minor_hash, &de_name, &fstr); de_name = fstr; fstr.len = save_len; if (err) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ecc15e5f1eba..44b0d418143c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1058,6 +1058,7 @@ struct ext4_inode_info { /* Number of ongoing updates on this inode */ atomic_t i_fc_updates; + atomic_t i_unwritten; /* Nr. of inflight conversions pending */ /* Fast commit wait queue for this inode */ wait_queue_head_t i_fc_wait; @@ -1106,6 +1107,10 @@ struct ext4_inode_info { /* mballoc */ atomic_t i_prealloc_active; + + /* allocation reservation info for delalloc */ + /* In case of bigalloc, this refer to clusters rather than blocks */ + unsigned int i_reserved_data_blocks; struct rb_root i_prealloc_node; rwlock_t i_prealloc_lock; @@ -1122,10 +1127,6 @@ struct ext4_inode_info { /* ialloc */ ext4_group_t i_last_alloc_group; - /* allocation reservation info for delalloc */ - /* In case of bigalloc, this refer to clusters rather than blocks */ - unsigned int i_reserved_data_blocks; - /* pending cluster reservations for bigalloc file systems */ struct ext4_pending_tree i_pending_tree; @@ -1149,7 +1150,6 @@ struct ext4_inode_info { */ struct list_head i_rsv_conversion_list; struct work_struct i_rsv_conversion_work; - atomic_t i_unwritten; /* Nr. of inflight conversions pending */ spinlock_t i_block_reservation_lock; @@ -2338,9 +2338,9 @@ struct ext4_dir_entry_2 { ((struct ext4_dir_entry_hash *) \ (((void *)(entry)) + \ ((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND))) -#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash) +#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash) #define EXT4_DIRENT_MINOR_HASH(entry) \ - le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash) + le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash) static inline bool ext4_hash_in_dirent(const struct inode *inode) { @@ -2462,6 +2462,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_TEA_UNSIGNED 5 #define DX_HASH_SIPHASH 6 +#define DX_HASH_LAST DX_HASH_SIPHASH static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, const void *address, unsigned int length) @@ -2695,10 +2696,10 @@ struct mmpd_data { extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); void ext4_inode_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); + struct buffer_head *bh); int ext4_inode_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); + struct buffer_head *bh); void ext4_block_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh); @@ -3712,11 +3713,12 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path); -extern int ext4_ext_insert_extent(handle_t *, struct inode *, - struct ext4_ext_path **, - struct ext4_extent *, int); +extern struct ext4_ext_path *ext4_ext_insert_extent( + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int gb_flags); extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, - struct ext4_ext_path **, + struct ext4_ext_path *, int flags); extern void ext4_free_ext_path(struct ext4_ext_path *); extern int ext4_ext_check_inode(struct inode *inode); @@ -3853,6 +3855,9 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh) return buffer_uptodate(bh); } +extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, + loff_t pos, unsigned len, + get_block_t *get_block); #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e067f2dd0335..34e25eee6521 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -84,12 +84,11 @@ static void ext4_extent_block_csum_set(struct inode *inode, et->et_checksum = ext4_extent_block_csum(inode, eh); } -static int ext4_split_extent_at(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags); +static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, int flags); static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { @@ -106,21 +105,27 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) return 0; } +static inline void ext4_ext_path_brelse(struct ext4_ext_path *path) +{ + brelse(path->p_bh); + path->p_bh = NULL; +} + static void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth, i; - if (!path) + if (IS_ERR_OR_NULL(path)) return; depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) { - brelse(path->p_bh); - path->p_bh = NULL; - } + for (i = 0; i <= depth; i++, path++) + ext4_ext_path_brelse(path); } void ext4_free_ext_path(struct ext4_ext_path *path) { + if (IS_ERR_OR_NULL(path)) + return; ext4_ext_drop_refs(path); kfree(path); } @@ -323,19 +328,18 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) return size; } -static inline int +static inline struct ext4_ext_path * ext4_force_split_extent_at(handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, ext4_lblk_t lblk, + struct ext4_ext_path *path, ext4_lblk_t lblk, int nofail) { - struct ext4_ext_path *path = *ppath; int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; if (nofail) flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; - return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? + return ext4_split_extent_at(handle, inode, path, lblk, unwritten ? EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, flags); } @@ -635,8 +639,7 @@ int ext4_ext_precache(struct inode *inode) */ if ((i == depth) || path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) { - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; continue; } @@ -689,7 +692,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) struct ext4_extent *ex; int i; - if (!path) + if (IS_ERR_OR_NULL(path)) return; eh = path[depth].p_hdr; @@ -881,11 +884,10 @@ void ext4_ext_tree_init(handle_t *handle, struct inode *inode) struct ext4_ext_path * ext4_find_extent(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_path **orig_path, int flags) + struct ext4_ext_path *path, int flags) { struct ext4_extent_header *eh; struct buffer_head *bh; - struct ext4_ext_path *path = orig_path ? *orig_path : NULL; short int depth, i, ppos = 0; int ret; gfp_t gfp_flags = GFP_NOFS; @@ -906,7 +908,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ext4_ext_drop_refs(path); if (depth > path[0].p_maxdepth) { kfree(path); - *orig_path = path = NULL; + path = NULL; } } if (!path) { @@ -961,8 +963,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, err: ext4_free_ext_path(path); - if (orig_path) - *orig_path = NULL; return ERR_PTR(ret); } @@ -1395,15 +1395,15 @@ out: * finds empty index and adds new leaf. * if no free index is found, then it requests in-depth growing. */ -static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - unsigned int mb_flags, - unsigned int gb_flags, - struct ext4_ext_path **ppath, - struct ext4_extent *newext) +static struct ext4_ext_path * +ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, + unsigned int mb_flags, unsigned int gb_flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { - struct ext4_ext_path *path = *ppath; struct ext4_ext_path *curp; int depth, i, err = 0; + ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block); repeat: i = depth = ext_depth(inode); @@ -1422,42 +1422,38 @@ repeat: * entry: create all needed subtree and add new leaf */ err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); if (err) - goto out; + goto errout; /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) - err = PTR_ERR(path); - } else { - /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, mb_flags); - if (err) - goto out; + path = ext4_find_extent(inode, ee_block, path, gb_flags); + return path; + } - /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } + /* tree is full, time to grow in depth */ + err = ext4_ext_grow_indepth(handle, inode, mb_flags); + if (err) + goto errout; - /* - * only first (depth 0 -> 1) produces free space; - * in all other cases we have to split the grown tree - */ - depth = ext_depth(inode); - if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { - /* now we need to split */ - goto repeat; - } + /* refill path */ + path = ext4_find_extent(inode, ee_block, path, gb_flags); + if (IS_ERR(path)) + return path; + + /* + * only first (depth 0 -> 1) produces free space; + * in all other cases we have to split the grown tree + */ + depth = ext_depth(inode); + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { + /* now we need to split */ + goto repeat; } -out: - return err; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } /* @@ -1749,12 +1745,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, break; err = ext4_ext_get_access(handle, inode, path + k); if (err) - break; + goto clean; path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) - break; + goto clean; } + return 0; + +clean: + /* + * The path[k].p_bh is either unmodified or with no verified bit + * set (see ext4_ext_get_access()). So just clear the verified bit + * of the successfully modified extents buffers, which will force + * these extents to be checked to avoid using inconsistent data. + */ + while (++k < depth) + clear_buffer_verified(path[k].p_bh); return err; } @@ -1876,7 +1883,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); path[0].p_hdr->eh_max = cpu_to_le16(max_root); - brelse(path[1].p_bh); + ext4_ext_path_brelse(path + 1); ext4_free_blocks(handle, inode, NULL, blk, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); } @@ -1964,16 +1971,15 @@ out: * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ -int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_extent *newext, int gb_flags) +struct ext4_ext_path * +ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int gb_flags) { - struct ext4_ext_path *path = *ppath; struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ - struct ext4_ext_path *npath = NULL; - int depth, len, err; + int depth, len, err = 0; ext4_lblk_t next; int mb_flags = 0, unwritten; @@ -1981,14 +1987,16 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto errout; } depth = ext_depth(inode); ex = path[depth].p_ext; eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto errout; } /* try to insert block into found extent and return */ @@ -2026,7 +2034,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + depth); if (err) - return err; + goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); @@ -2051,7 +2059,7 @@ prepend: err = ext4_ext_get_access(handle, inode, path + depth); if (err) - return err; + goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_block = newext->ee_block; @@ -2076,21 +2084,26 @@ prepend: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { + struct ext4_ext_path *npath; + ext_debug(inode, "next leaf block - %u\n", next); - BUG_ON(npath != NULL); npath = ext4_find_extent(inode, next, NULL, gb_flags); - if (IS_ERR(npath)) - return PTR_ERR(npath); + if (IS_ERR(npath)) { + err = PTR_ERR(npath); + goto errout; + } BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { ext_debug(inode, "next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); + ext4_free_ext_path(path); path = npath; goto has_space; } ext_debug(inode, "next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + ext4_free_ext_path(npath); } /* @@ -2099,10 +2112,10 @@ prepend: */ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) mb_flags |= EXT4_MB_USE_RESERVED; - err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, - ppath, newext); - if (err) - goto cleanup; + path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, + path, newext); + if (IS_ERR(path)) + return path; depth = ext_depth(inode); eh = path[depth].p_hdr; @@ -2111,7 +2124,7 @@ has_space: err = ext4_ext_get_access(handle, inode, path + depth); if (err) - goto cleanup; + goto errout; if (!nearex) { /* there is no extent in this leaf, create first one */ @@ -2169,17 +2182,20 @@ merge: if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(handle, inode, path, nearex); - /* time to correct all indexes above */ err = ext4_ext_correct_indexes(handle, inode, path); if (err) - goto cleanup; + goto errout; err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (err) + goto errout; -cleanup: - ext4_free_ext_path(npath); - return err; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } static int ext4_fill_es_cache_info(struct inode *inode, @@ -2279,27 +2295,26 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, { int err; ext4_fsblk_t leaf; + int k = depth - 1; /* free index block */ - depth--; - path = path + depth; - leaf = ext4_idx_pblock(path->p_idx); - if (unlikely(path->p_hdr->eh_entries == 0)) { - EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); + leaf = ext4_idx_pblock(path[k].p_idx); + if (unlikely(path[k].p_hdr->eh_entries == 0)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k); return -EFSCORRUPTED; } - err = ext4_ext_get_access(handle, inode, path); + err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; - if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { - int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; + if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) { + int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx; len *= sizeof(struct ext4_extent_idx); - memmove(path->p_idx, path->p_idx + 1, len); + memmove(path[k].p_idx, path[k].p_idx + 1, len); } - le16_add_cpu(&path->p_hdr->eh_entries, -1); - err = ext4_ext_dirty(handle, inode, path); + le16_add_cpu(&path[k].p_hdr->eh_entries, -1); + err = ext4_ext_dirty(handle, inode, path + k); if (err) return err; ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); @@ -2308,18 +2323,29 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); - while (--depth >= 0) { - if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + while (--k >= 0) { + if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr)) break; - path--; - err = ext4_ext_get_access(handle, inode, path); + err = ext4_ext_get_access(handle, inode, path + k); if (err) - break; - path->p_idx->ei_block = (path+1)->p_idx->ei_block; - err = ext4_ext_dirty(handle, inode, path); + goto clean; + path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path + k); if (err) - break; + goto clean; } + return 0; + +clean: + /* + * The path[k].p_bh is either unmodified or with no verified bit + * set (see ext4_ext_get_access()). So just clear the verified bit + * of the successfully modified extents buffers, which will force + * these extents to be checked to avoid using inconsistent data. + */ + while (++k < depth) + clear_buffer_verified(path[k].p_bh); + return err; } @@ -2872,11 +2898,12 @@ again: * fail removing space due to ENOSPC so try to use * reserved block if that happens. */ - err = ext4_force_split_extent_at(handle, inode, &path, - end + 1, 1); - if (err < 0) + path = ext4_force_split_extent_at(handle, inode, path, + end + 1, 1); + if (IS_ERR(path)) { + err = PTR_ERR(path); goto out; - + } } else if (sbi->s_cluster_ratio > 1 && end >= ex_end && partial.state == initial) { /* @@ -2934,8 +2961,7 @@ again: err = ext4_ext_rm_leaf(handle, inode, path, &partial, start, end); /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; continue; } @@ -2997,8 +3023,7 @@ again: err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; ext_debug(inode, "return to level %d\n", i); } @@ -3113,7 +3138,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) return; ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, - EXTENT_STATUS_WRITTEN); + EXTENT_STATUS_WRITTEN, 0); } /* FIXME!! we need to try to merge to left or right after zero-out */ @@ -3147,16 +3172,14 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * a> the extent are splitted into two extent. * b> split is not needed, and just mark the extent. * - * return 0 on success. + * Return an extent path pointer on success, or an error pointer on failure. */ -static int ext4_split_extent_at(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags) +static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, int flags) { - struct ext4_ext_path *path = *ppath; ext4_fsblk_t newblock; ext4_lblk_t ee_block; struct ext4_extent *ex, newex, orig_ex, zero_ex; @@ -3226,10 +3249,31 @@ static int ext4_split_extent_at(handle_t *handle, if (split_flag & EXT4_EXT_MARK_UNWRIT2) ext4_ext_mark_unwritten(ex2); - err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); - if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) + path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (!IS_ERR(path)) goto out; + err = PTR_ERR(path); + if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) + return path; + + /* + * Get a new path to try to zeroout or fix the extent length. + * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent() + * will not return -ENOMEM, otherwise -ENOMEM will cause a + * retry in do_writepages(), and a WARN_ON may be triggered + * in ext4_da_update_reserve_space() due to an incorrect + * ee_len causing the i_reserved_data_blocks exception. + */ + path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL); + if (IS_ERR(path)) { + EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld", + split, PTR_ERR(path)); + return path; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { @@ -3280,14 +3324,17 @@ fix_extent_len: * and err is a non-zero error code. */ ext4_ext_dirty(handle, inode, path + path->p_depth); - return err; out: + if (err) { + ext4_free_ext_path(path); + path = ERR_PTR(err); + } ext4_ext_show_leaf(inode, path); - return err; + return path; } /* - * ext4_split_extents() splits an extent and mark extent which is covered + * ext4_split_extent() splits an extent and mark extent which is covered * by @map as split_flags indicates * * It may result in splitting the extent into multiple extents (up to three) @@ -3297,21 +3344,18 @@ out: * c> Splits in three extents: Somone is splitting in middle of the extent * */ -static int ext4_split_extent(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_map_blocks *map, - int split_flag, - int flags) +static struct ext4_ext_path *ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, int flags, + unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len, depth; - int err = 0; int unwritten; int split_flag1, flags1; - int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3327,28 +3371,27 @@ static int ext4_split_extent(handle_t *handle, EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; - err = ext4_split_extent_at(handle, inode, ppath, + path = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); - if (err) - goto out; - } else { - allocated = ee_len - (map->m_lblk - ee_block); - } - /* - * Update path is required because previous ext4_split_extent_at() may - * result in split of original leaf or extent zeroout. - */ - path = ext4_find_extent(inode, map->m_lblk, ppath, flags); - if (IS_ERR(path)) - return PTR_ERR(path); - depth = ext_depth(inode); - ex = path[depth].p_ext; - if (!ex) { - EXT4_ERROR_INODE(inode, "unexpected hole at %lu", - (unsigned long) map->m_lblk); - return -EFSCORRUPTED; + if (IS_ERR(path)) + return path; + /* + * Update path is required because previous ext4_split_extent_at + * may result in split of original leaf or extent zeroout. + */ + path = ext4_find_extent(inode, map->m_lblk, path, flags); + if (IS_ERR(path)) + return path; + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) map->m_lblk); + ext4_free_ext_path(path); + return ERR_PTR(-EFSCORRUPTED); + } + unwritten = ext4_ext_is_unwritten(ex); } - unwritten = ext4_ext_is_unwritten(ex); if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; @@ -3357,15 +3400,20 @@ static int ext4_split_extent(handle_t *handle, split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | EXT4_EXT_MARK_UNWRIT2); } - err = ext4_split_extent_at(handle, inode, ppath, + path = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); - if (err) - goto out; + if (IS_ERR(path)) + return path; } + if (allocated) { + if (map->m_lblk + map->m_len > ee_block + ee_len) + *allocated = ee_len - (map->m_lblk - ee_block); + else + *allocated = map->m_len; + } ext4_ext_show_leaf(inode, path); -out: - return err ? err : allocated; + return path; } /* @@ -3388,13 +3436,11 @@ out: * that are allocated and initialized. * It is guaranteed to be >= map->m_len. */ -static int ext4_ext_convert_to_initialized(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path **ppath, - int flags) +static struct ext4_ext_path * +ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, struct ext4_ext_path *path, + int flags, unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; @@ -3404,7 +3450,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, unsigned int ee_len, depth, map_len = map->m_len; int err = 0; int split_flag = EXT4_EXT_DATA_VALID2; - int allocated = 0; unsigned int max_zeroout = 0; ext_debug(inode, "logical block %llu, max_blocks %u\n", @@ -3445,6 +3490,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - L2: we only attempt to merge with an extent stored in the * same extent tre |