diff options
-rw-r--r-- | Documentation/admin-guide/ext4.rst | 2 | ||||
-rw-r--r-- | Documentation/filesystems/fscrypt.rst | 6 | ||||
-rw-r--r-- | fs/ext4/Kconfig | 6 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 5 | ||||
-rw-r--r-- | fs/ext4/dir.c | 10 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 81 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 5 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 25 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 22 | ||||
-rw-r--r-- | fs/ext4/extents.c | 205 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 6 | ||||
-rw-r--r-- | fs/ext4/file.c | 203 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 6 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 26 | ||||
-rw-r--r-- | fs/ext4/inline.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 53 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 4 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 6 | ||||
-rw-r--r-- | fs/ext4/namei.c | 20 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 19 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 42 | ||||
-rw-r--r-- | fs/ext4/resize.c | 10 | ||||
-rw-r--r-- | fs/ext4/super.c | 122 | ||||
-rw-r--r-- | fs/ext4/sysfs.c | 88 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 6 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 2 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 4 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 119 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 4 | ||||
-rw-r--r-- | include/linux/jbd2.h | 1 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 27 |
32 files changed, 709 insertions, 432 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 9bc93f0ce0c9..9443fcef1876 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -92,6 +92,8 @@ Currently Available * efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force the ordering) * Case-insensitive file name lookups +* file-based encryption support (fscrypt) +* file-based verity support (fsverity) [1] Filesystems with a block size of 1k may see a limit imposed by the directory hash tree having a maximum depth of two. diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 01e909245fcd..bd9932344804 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -1016,9 +1016,9 @@ astute users may notice some differences in behavior: - Direct I/O is not supported on encrypted files. Attempts to use direct I/O on such files will fall back to buffered I/O. -- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, - FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported - on encrypted files and will fail with EOPNOTSUPP. +- The fallocate operations FALLOC_FL_COLLAPSE_RANGE and + FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will + fail with EOPNOTSUPP. - Online defragmentation of encrypted files is not supported. The EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 2de970cfc33c..2a592e38cdfe 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -4,12 +4,7 @@ # kernels after the removal of ext3 driver. config EXT3_FS tristate "The Extended 3 (ext3) filesystem" - # These must match EXT4_FS selects... select EXT4_FS - select JBD2 - select CRC16 - select CRYPTO - select CRYPTO_CRC32C help This config option is here only for backward compatibility. ext3 filesystem is now handled by the ext4 driver. @@ -33,7 +28,6 @@ config EXT3_FS_SECURITY config EXT4_FS tristate "The Extended 4 (ext4) filesystem" - # Please update EXT3_FS selects when changing these select JBD2 select CRC16 select CRYPTO diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0b202e00d93f..5f993a411251 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -371,7 +371,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, if (buffer_verified(bh)) goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh))) { + desc, bh) || + ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) { ext4_unlock_group(sb, block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); ext4_mark_group_bitmap_corrupted(sb, block_group, @@ -505,7 +506,9 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, if (!desc) return -EFSCORRUPTED; wait_on_buffer(bh); + ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO); if (!buffer_uptodate(bh)) { + ext4_set_errno(sb, EIO); ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, (unsigned long long) bh->b_blocknr); diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 4e093277c8bf..1f340743c9a8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -462,7 +462,6 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, new_fn->name_len = ent_name->len; new_fn->file_type = dirent->file_type; memcpy(new_fn->name, ent_name->name, ent_name->len); - new_fn->name[ent_name->len] = 0; while (*p) { parent = *p; @@ -672,9 +671,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { + if (!inode || !IS_CASEFOLDED(inode) || + !EXT4_SB(inode->i_sb)->s_encoding) { if (len != name->len) return -1; return memcmp(str, name->name, len); @@ -687,10 +688,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) { const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode) || !um) + if (!inode || !IS_CASEFOLDED(inode) || !um) return 0; norm = kmalloc(PATH_MAX, GFP_ATOMIC); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f8578caba40d..9a2ee2428ecc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1052,8 +1052,6 @@ struct ext4_inode_info { /* allocation reservation info for delalloc */ /* In case of bigalloc, this refer to clusters rather than blocks */ unsigned int i_reserved_data_blocks; - ext4_lblk_t i_da_metadata_calc_last_lblock; - int i_da_metadata_calc_len; /* pending cluster reservations for bigalloc file systems */ struct ext4_pending_tree i_pending_tree; @@ -1343,7 +1341,8 @@ struct ext4_super_block { __u8 s_lastcheck_hi; __u8 s_first_error_time_hi; __u8 s_last_error_time_hi; - __u8 s_pad[2]; + __u8 s_first_error_errcode; + __u8 s_last_error_errcode; __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ __le32 s_reserved[95]; /* Padding to the end of the block */ @@ -1556,6 +1555,9 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; struct dax_device *s_daxdev; +#ifdef CONFIG_EXT4_DEBUG + unsigned long s_simulate_fail; +#endif }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1575,6 +1577,66 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) } /* + * Simulate_fail codes + */ +#define EXT4_SIM_BBITMAP_EIO 1 +#define EXT4_SIM_BBITMAP_CRC 2 +#define EXT4_SIM_IBITMAP_EIO 3 +#define EXT4_SIM_IBITMAP_CRC 4 +#define EXT4_SIM_INODE_EIO 5 +#define EXT4_SIM_INODE_CRC 6 +#define EXT4_SIM_DIRBLOCK_EIO 7 +#define EXT4_SIM_DIRBLOCK_CRC 8 + +static inline bool ext4_simulate_fail(struct super_block *sb, + unsigned long code) +{ +#ifdef CONFIG_EXT4_DEBUG + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (unlikely(sbi->s_simulate_fail == code)) { + sbi->s_simulate_fail = 0; + return true; + } +#endif + return false; +} + +static inline void ext4_simulate_fail_bh(struct super_block *sb, + struct buffer_head *bh, + unsigned long code) +{ + if (!IS_ERR(bh) && ext4_simulate_fail(sb, code)) + clear_buffer_uptodate(bh); +} + +/* + * Error number codes for s_{first,last}_error_errno + * + * Linux errno numbers are architecture specific, so we need to translate + * them into something which is architecture independent. We don't define + * codes for all errno's; just the ones which are most likely to be the cause + * of an ext4_error() call. + */ +#define EXT4_ERR_UNKNOWN 1 +#define EXT4_ERR_EIO 2 +#define EXT4_ERR_ENOMEM 3 +#define EXT4_ERR_EFSBADCRC 4 +#define EXT4_ERR_EFSCORRUPTED 5 +#define EXT4_ERR_ENOSPC 6 +#define EXT4_ERR_ENOKEY 7 +#define EXT4_ERR_EROFS 8 +#define EXT4_ERR_EFBIG 9 +#define EXT4_ERR_EEXIST 10 +#define EXT4_ERR_ERANGE 11 +#define EXT4_ERR_EOVERFLOW 12 +#define EXT4_ERR_EBUSY 13 +#define EXT4_ERR_ENOTDIR 14 +#define EXT4_ERR_ENOTEMPTY 15 +#define EXT4_ERR_ESHUTDOWN 16 +#define EXT4_ERR_EFAULT 17 + +/* * Inode dynamic state flags */ enum { @@ -2628,7 +2690,6 @@ extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, @@ -2679,8 +2740,6 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb, extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); extern void ext4_superblock_csum_set(struct super_block *sb); -extern void *ext4_kvmalloc(size_t size, gfp_t flags); -extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup); extern const char *ext4_decode_error(struct super_block *sb, int errno, @@ -2688,6 +2747,7 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno, extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); +extern void ext4_set_errno(struct super_block *sb, int err); extern __printf(4, 5) void __ext4_error(struct super_block *, const char *, unsigned int, @@ -3254,7 +3314,6 @@ struct ext4_extent; #define EXT_MAX_BLOCKS 0xffffffff extern int ext4_ext_tree_init(handle_t *handle, struct inode *); -extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); @@ -3271,14 +3330,9 @@ extern int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern int ext4_ext_calc_metadata_amount(struct inode *inode, - ext4_lblk_t lblocks); extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path); -extern int ext4_can_extents_be_merged(struct inode *inode, - struct ext4_extent *ex1, - struct ext4_extent *ex2); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path **, struct ext4_extent *, int); @@ -3294,8 +3348,6 @@ extern int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); extern int ext4_ext_precache(struct inode *inode); -extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); -extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, ext4_lblk_t count, @@ -3390,6 +3442,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) } extern const struct iomap_ops ext4_iomap_ops; +extern const struct iomap_ops ext4_iomap_overwrite_ops; extern const struct iomap_ops ext4_iomap_report_ops; static inline int ext4_buffer_uptodate(struct buffer_head *bh) diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 98bd0e9ee7df..1c216fcc202a 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -267,10 +267,5 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, 0xffff); } -#define ext4_ext_dirty(handle, inode, path) \ - __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) -int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, - struct inode *inode, struct ext4_ext_path *path); - #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d3b8cdea5df7..1f53d64e42a5 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -7,6 +7,28 @@ #include <trace/events/ext4.h> +int ext4_inode_journal_mode(struct inode *inode) +{ + if (EXT4_JOURNAL(inode) == NULL) + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + /* We do not support data journalling with delayed allocation */ + if (!S_ISREG(inode->i_mode) || + ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) || + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC))) { + /* We do not support data journalling for encrypted data */ + if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + } + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + BUG(); +} + /* Just increment the non-pointer handle value */ static handle_t *ext4_get_nojournal(void) { @@ -58,6 +80,7 @@ static int ext4_journal_check_start(struct super_block *sb) * take the FS itself readonly cleanly. */ if (journal && is_journal_aborted(journal)) { + ext4_set_errno(sb, -journal->j_errno); ext4_abort(sb, "Detected aborted journal"); return -EROFS; } @@ -249,6 +272,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); + ext4_set_errno(inode->i_sb, -err); __ext4_abort(inode->i_sb, where, line, "error %d when attempting revoke", err); } @@ -320,6 +344,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_block = cpu_to_le64(bh->b_blocknr); + ext4_set_errno(inode->i_sb, EIO); ext4_error_inode(inode, where, line, bh->b_blocknr, "IO error syncing itable block"); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a6b9b66dbfad..7ea4f6fa173b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -463,27 +463,7 @@ int ext4_force_commit(struct super_block *sb); #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ -static inline int ext4_inode_journal_mode(struct inode *inode) -{ - if (EXT4_JOURNAL(inode) == NULL) - return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ - /* We do not support data journalling with delayed allocation */ - if (!S_ISREG(inode->i_mode) || - ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) || - test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || - (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && - !test_opt(inode->i_sb, DELALLOC))) { - /* We do not support data journalling for encrypted data */ - if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) - return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ - return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ - } - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ - BUG(); -} +int ext4_inode_journal_mode(struct inode *inode); static inline int ext4_should_journal_data(struct inode *inode) { diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e8708b77da6..954013d6076b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -161,8 +161,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, - struct inode *inode, struct ext4_ext_path *path) +static int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -179,6 +180,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, return err; } +#define ext4_ext_dirty(handle, inode, path) \ + __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) + static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) @@ -309,53 +313,6 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode, (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); } -/* - * Calculate the number of metadata blocks needed - * to allocate @blocks - * Worse case is one block per extent - */ -int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - int idxs; - - idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent_idx)); - - /* - * If the new delayed allocation block is contiguous with the - * previous da block, it can share index blocks with the - * previous block, so we only need to allocate a new index - * block every idxs leaf blocks. At ldxs**2 blocks, we need - * an additional index block, and at ldxs**3 blocks, yet - * another index blocks. - */ - if (ei->i_da_metadata_calc_len && - ei->i_da_metadata_calc_last_lblock+1 == lblock) { - int num = 0; - - if ((ei->i_da_metadata_calc_len % idxs) == 0) - num++; - if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) - num++; - if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { - num++; - ei->i_da_metadata_calc_len = 0; - } else - ei->i_da_metadata_calc_len++; - ei->i_da_metadata_calc_last_lblock++; - return num; - } - - /* - * In the worst case we need a new set of index blocks at - * every level of the inode's extent tree. - */ - ei->i_da_metadata_calc_len = 1; - ei->i_da_metadata_calc_last_lblock = lblock; - return ext_depth(inode) + 1; -} - static int ext4_ext_max_entries(struct inode *inode, int depth) { @@ -492,6 +449,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, return 0; corrupted: + ext4_set_errno(inode->i_sb, -err); ext4_error_inode(inode, function, line, 0, "pblk %llu bad header/extent: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", @@ -510,6 +468,30 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); } +static void ext4_cache_extents(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); + ext4_lblk_t prev = 0; + int i; + + for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { + unsigned int status = EXTENT_STATUS_WRITTEN; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); + int len = ext4_ext_get_actual_len(ex); + + if (prev && (prev != lblk)) + ext4_es_cache_extent(inode, prev, lblk - prev, ~0, + EXTENT_STATUS_HOLE); + + if (ext4_ext_is_unwritten(ex)) + status = EXTENT_STATUS_UNWRITTEN; + ext4_es_cache_extent(inode, lblk, len, + ext4_ext_pblock(ex), status); + prev = lblk + len; + } +} + static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, struct inode *inode, ext4_fsblk_t pblk, int depth, @@ -544,26 +526,7 @@ __read_extent_tree_block(const char *function, unsigned int line, */ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { struct ext4_extent_header *eh = ext_block_hdr(bh); - struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); - ext4_lblk_t prev = 0; - int i; - - for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { - unsigned int status = EXTENT_STATUS_WRITTEN; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - int len = ext4_ext_get_actual_len(ex); - - if (prev && (prev != lblk)) - ext4_es_cache_extent(inode, prev, - lblk - prev, ~0, - EXTENT_STATUS_HOLE); - - if (ext4_ext_is_unwritten(ex)) - status = EXTENT_STATUS_UNWRITTEN; - ext4_es_cache_extent(inode, lblk, len, - ext4_ext_pblock(ex), status); - prev = lblk + len; - } + ext4_cache_extents(inode, eh); } return bh; errout: @@ -649,8 +612,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) ext_debug("path:"); for (k = 0; k <= l; k++, path++) { if (path->p_idx) { - ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), - ext4_idx_pblock(path->p_idx)); + ext_debug(" %d->%llu", + le32_to_cpu(path->p_idx->ei_block), + ext4_idx_pblock(path->p_idx)); } else if (path->p_ext) { ext_debug(" %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), @@ -731,11 +695,12 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path) if (!path) return; depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) + for (i = 0; i <= depth; i++, path++) { if (path->p_bh) { brelse(path->p_bh); path->p_bh = NULL; } + } } /* @@ -777,8 +742,8 @@ ext4_ext_binsearch_idx(struct inode *inode, chix = ix = EXT_FIRST_INDEX(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { - if (k != 0 && - le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { + if (k != 0 && le32_to_cpu(ix->ei_block) <= + le32_to_cpu(ix[-1].ei_block)) { printk(KERN_DEBUG "k=%d, ix=0x%p, " "first=0x%p\n", k, ix, EXT_FIRST_INDEX(eh)); @@ -911,6 +876,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[0].p_bh = NULL; i = depth; + if (!(flags & EXT4_EX_NOCACHE) && depth == 0) + ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { ext_debug("depth %d: num %d, max %d\n", @@ -1632,17 +1599,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) return EXT_MAX_BLOCKS; while (depth >= 0) { + struct ext4_ext_path *p = &path[depth]; + if (depth == path->p_depth) { /* leaf */ - if (path[depth].p_ext && - path[depth].p_ext != - EXT_LAST_EXTENT(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_ext[1].ee_block); + if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr)) + return le32_to_cpu(p->p_ext[1].ee_block); } else { /* index */ - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_idx[1].ei_block); + if (p->p_idx != EXT_LAST_INDEX(p->p_hdr)) + return le32_to_cpu(p->p_idx[1].ei_block); } depth--; } @@ -1742,9 +1708,9 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, return err; } -int -ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - struct ext4_extent *ex2) +static int ext4_can_extents_be_merged(struct inode *inode, + struct ext4_extent *ex1, + struct ext4_extent *ex2) { unsigned short ext1_ee_len, ext2_ee_len; @@ -1758,11 +1724,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, le32_to_cpu(ex2->ee_block)) return 0; - /* - * To allow future support for preallocated extents to be added - * as an RO_COMPAT feature, refuse to merge to extents if - * this can result in the top bit of ee_len being set. - */ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; @@ -1870,13 +1831,14 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, } /* - * This function tries to merge the @ex extent to neighbours in the tree. - * return 1 if merge left else 0. + * This function tries to merge the @ex extent to neighbours in the tree, then + * tries to collapse the extent tree into the inode. */ static void ext4_ext_try_to_merge(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex) { + struct ext4_extent *ex) +{ struct ext4_extent_header *eh; unsigned int depth; int merge_done = 0; @@ -3718,9 +3680,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); - if (IS_ENCRYPTED(inode)) - max_zeroout = 0; - /* * five cases: * 1. split the extent into three extents. @@ -4706,6 +4665,10 @@ retry: return ret > 0 ? ret2 : ret; } +static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); + +static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); + static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) { @@ -4723,9 +4686,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, trace_ext4_zero_range(inode, offset, len, mode); - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - /* Call ext4_force_commit to flush all data in case of data=journal. */ if (ext4_should_journal_data(inode)) { ret = ext4_force_commit(inode->i_sb); @@ -4765,7 +4725,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, } if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len > i_size_read(inode) || + (offset + len > inode->i_size || offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); @@ -4849,7 +4809,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, * Mark that we allocate beyond EOF so the subsequent truncate * can proceed even if the new size is the same as i_size. */ - if ((offset + len) > i_size_read(inode)) + if (offset + len > inode->i_size) ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); @@ -4890,14 +4850,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) * range since we would need to re-encrypt blocks with a * different IV or XTS tweak (which are based on the logical * block number). - * - * XXX It's not clear why zero range isn't working, but we'll - * leave it disabled for encrypted inodes for now. This is a - * bug we should fix.... */ if (IS_ENCRYPTED(inode) && - (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE | - FALLOC_FL_ZERO_RANGE))) + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; /* Return error if mode is not supported */ @@ -4941,7 +4896,7 |