diff options
| -rw-r--r-- | fs/ext4/balloc.c | 15 | ||||
| -rw-r--r-- | fs/ext4/block_validity.c | 8 | ||||
| -rw-r--r-- | fs/ext4/crypto.c | 4 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 32 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.c | 5 | ||||
| -rw-r--r-- | fs/ext4/extents_status.c | 44 | ||||
| -rw-r--r-- | fs/ext4/file.c | 38 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 9 | ||||
| -rw-r--r-- | fs/ext4/hash.c | 2 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 8 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 2 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 124 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 200 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 14 | ||||
| -rw-r--r-- | fs/ext4/mmp.c | 2 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 17 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
| -rw-r--r-- | fs/ext4/super.c | 308 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 2 | ||||
| -rw-r--r-- | fs/jbd2/checkpoint.c | 34 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 484 | ||||
| -rw-r--r-- | fs/jbd2/recovery.c | 12 | ||||
| -rw-r--r-- | fs/libfs.c | 14 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 8 | ||||
| -rw-r--r-- | include/linux/jbd2.h | 5 |
26 files changed, 717 insertions, 678 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1f72f977c6db..79b20d6ae39e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -913,11 +913,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) } /* - * This function returns the number of file system metadata clusters at + * This function returns the number of file system metadata blocks at * the beginning of a block group, including the reserved gdt blocks. */ -static unsigned ext4_num_base_meta_clusters(struct super_block *sb, - ext4_group_t block_group) +unsigned int ext4_num_base_meta_blocks(struct super_block *sb, + ext4_group_t block_group) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned num; @@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb, } else { /* For META_BG_BLOCK_GROUPS */ num += ext4_bg_num_gdb_meta(sb, block_group); } - return EXT4_NUM_B2C(sbi, num); + return num; } + +static unsigned int ext4_num_base_meta_clusters(struct super_block *sb, + ext4_group_t block_group) +{ + return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group)); +} + /** * ext4_inode_to_goal_block - return a hint for block allocation * @inode: inode for block allocation diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 5504f72bbbbe..6fe3c941b565 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb) struct ext4_system_blocks *system_blks; struct ext4_group_desc *gdp; ext4_group_t i; - int flex_size = ext4_flex_bg_size(sbi); int ret; system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); @@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb) return -ENOMEM; for (i=0; i < ngroups; i++) { + unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i); + cond_resched(); - if (ext4_bg_has_super(sb, i) && - ((i < 5) || ((i % flex_size) == 0))) { + if (meta_blks != 0) { ret = add_system_zone(system_blks, ext4_group_first_block_no(sb, i), - ext4_bg_num_gdb(sb, i) + 1, 0); + meta_blks, 0); if (ret) goto err; } diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index e20ac0654b3f..453d4da5de52 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, #if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); + if (err) + ext4_fname_free_filename(fname); #endif return err; } @@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, #if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); + if (err) + ext4_fname_free_filename(fname); #endif return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 481491e892df..9418359b1d9d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -176,9 +176,6 @@ enum criteria { EXT4_MB_NUM_CRS }; -/* criteria below which we use fast block scanning and avoid unnecessary IO */ -#define CR_FAST CR_GOAL_LEN_SLOW - /* * Flags used in mballoc's allocation_context flags field. * @@ -1241,6 +1238,7 @@ struct ext4_inode_info { #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group * scanning in mballoc */ +#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */ #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt @@ -1258,10 +1256,8 @@ struct ext4_inode_info { #define ext4_test_and_set_bit __test_and_set_bit_le #define ext4_set_bit __set_bit_le -#define ext4_set_bit_atomic ext2_set_bit_atomic #define ext4_test_and_clear_bit __test_and_clear_bit_le #define ext4_clear_bit __clear_bit_le -#define ext4_clear_bit_atomic ext2_clear_bit_atomic #define ext4_test_bit test_bit_le #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le @@ -1708,10 +1704,13 @@ struct ext4_sb_info { const char *s_last_error_func; time64_t s_last_error_time; /* - * If we are in a context where we cannot update error information in - * the on-disk superblock, we queue this work to do it. + * If we are in a context where we cannot update the on-disk + * superblock, we queue the work here. This is used to update + * the error information in the superblock, and for periodic + * updates of the superblock called from the commit callback + * function. */ - struct work_struct s_error_work; + struct work_struct s_sb_upd_work; /* Ext4 fast commit sub transaction ID */ atomic_t s_fc_subtid; @@ -1804,7 +1803,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) */ enum { EXT4_MF_MNTDIR_SAMPLED, - EXT4_MF_FS_ABORTED, /* Fatal error detected */ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ }; @@ -2228,9 +2226,9 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly); #define EXT4_FLAGS_SHUTDOWN 1 #define EXT4_FLAGS_BDEV_IS_DAX 2 -static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi) +static inline int ext4_forced_shutdown(struct super_block *sb) { - return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); + return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags); } /* @@ -2708,7 +2706,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, s64 nclusters, unsigned int flags); extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); -extern void ext4_check_blocks_bitmap(struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); @@ -2864,7 +2861,6 @@ extern void ext4_free_inode(handle_t *, struct inode *); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); extern unsigned long ext4_count_dirs(struct super_block *); -extern void ext4_check_inodes_bitmap(struct super_block *); extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier); @@ -2907,7 +2903,6 @@ extern int ext4_mb_init(struct super_block *); extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); -extern int ext4_mb_reserve_blocks(struct super_block *, int); extern void ext4_discard_preallocations(struct inode *, unsigned int); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); @@ -2930,6 +2925,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, int len, int state); +static inline bool ext4_mb_cr_expensive(enum criteria cr) +{ + return cr >= CR_GOAL_LEN_SLOW; +} /* inode.c */ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, @@ -2983,7 +2982,6 @@ extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); extern int ext4_file_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); -extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); @@ -3090,6 +3088,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno, extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, ext4_group_t block_group, unsigned int flags); +extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb, + ext4_group_t block_group); extern __printf(7, 8) void __ext4_error(struct super_block *, const char *, unsigned int, bool, @@ -3531,8 +3531,6 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); /* inline.c */ extern int ext4_get_max_inline_size(struct inode *inode); extern int ext4_find_inline_data_nolock(struct inode *inode); -extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, - unsigned int len); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); int ext4_readpage_inline(struct inode *inode, struct folio *folio); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b38d59581411..d1a2e6624401 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -67,11 +67,12 @@ static int ext4_journal_check_start(struct super_block *sb) might_sleep(); - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + if (unlikely(ext4_forced_shutdown(sb))) return -EIO; - if (sb_rdonly(sb)) + if (WARN_ON_ONCE(sb_rdonly(sb))) return -EROFS; + WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; /* diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 9b5b8951afb4..6f7de14c0fa8 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -878,23 +878,29 @@ retry: err1 = __es_remove_extent(inode, lblk, end, NULL, es1); if (err1 != 0) goto error; + /* Free preallocated extent if it didn't get used. */ + if (es1) { + if (!es1->es_len) + __es_free_extent(es1); + es1 = NULL; + } err2 = __es_insert_extent(inode, &newes, es2); if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) err2 = 0; if (err2 != 0) goto error; + /* Free preallocated extent if it didn't get used. */ + if (es2) { + if (!es2->es_len) + __es_free_extent(es2); + es2 = NULL; + } if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && (status & EXTENT_STATUS_WRITTEN || status & EXTENT_STATUS_UNWRITTEN)) __revise_pending(inode, lblk, len); - - /* es is pre-allocated but not used, free it. */ - if (es1 && !es1->es_len) - __es_free_extent(es1); - if (es2 && !es2->es_len) - __es_free_extent(es2); error: write_unlock(&EXT4_I(inode)->i_es_lock); if (err1 || err2) @@ -1491,8 +1497,12 @@ retry: */ write_lock(&EXT4_I(inode)->i_es_lock); err = __es_remove_extent(inode, lblk, end, &reserved, es); - if (es && !es->es_len) - __es_free_extent(es); + /* Free preallocated extent if it didn't get used. */ + if (es) { + if (!es->es_len) + __es_free_extent(es); + es = NULL; + } write_unlock(&EXT4_I(inode)->i_es_lock); if (err) goto retry; @@ -2047,19 +2057,25 @@ retry: err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); if (err1 != 0) goto error; + /* Free preallocated extent if it didn't get used. */ + if (es1) { + if (!es1->es_len) + __es_free_extent(es1); + es1 = NULL; + } err2 = __es_insert_extent(inode, &newes, es2); if (err2 != 0) goto error; + /* Free preallocated extent if it didn't get used. */ + if (es2) { + if (!es2->es_len) + __es_free_extent(es2); + es2 = NULL; + } if (allocated) __insert_pending(inode, lblk); - - /* es is pre-allocated but not used, free it. */ - if (es1 && !es1->es_len) - __es_free_extent(es1); - if (es2 && !es2->es_len) - __es_free_extent(es2); error: write_unlock(&EXT4_I(inode)->i_es_lock); if (err1 || err2) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2dc3f8301225..6830ea3a6c59 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; if (!iov_iter_count(to)) @@ -153,7 +153,7 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos, { struct inode *inode = file_inode(in); - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; return filemap_splice_read(in, ppos, pipe, len, flags); } @@ -476,6 +476,11 @@ restart: * required to change security info in file_modified(), for extending * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten * extents (as partial block zeroing may be required). + * + * Note that unaligned writes are allowed under shared lock so long as + * they are pure overwrites. Otherwise, concurrent unaligned writes risk + * data corruption due to partial block zeroing in the dio layer, and so + * the I/O must occur exclusively. */ if (*ilock_shared && ((!IS_NOSEC(inode) || *extend || !overwrite || @@ -492,21 +497,12 @@ restart: /* * Now that locking is settled, determine dio flags and exclusivity - * requirements. Unaligned writes are allowed under shared lock so long - * as they are pure overwrites. Set the iomap overwrite only flag as an - * added precaution in this case. Even though this is unnecessary, we - * can detect and warn on unexpected -EAGAIN if an unsafe unaligned - * write is ever submitted. - * - * Otherwise, concurrent unaligned writes risk data corruption due to - * partial block zeroing in the dio layer, and so the I/O must occur - * exclusively. The inode lock is already held exclusive if the write is - * non-overwrite or extending, so drain all outstanding dio and set the - * force wait dio flag. + * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce + * behavior already. The inode lock is already held exclusive if the + * write is non-overwrite or extending, so drain all outstanding dio and + * set the force wait dio flag. */ - if (*ilock_shared && unaligned_io) { - *dio_flags = IOMAP_DIO_OVERWRITE_ONLY; - } else if (!*ilock_shared && (unaligned_io || *extend)) { + if (!*ilock_shared && (unaligned_io || *extend)) { if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; @@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, dio_flags, NULL, 0); - WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)); if (ret == -ENOTBLK) ret = 0; @@ -709,7 +704,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; #ifdef CONFIG_FS_DAX @@ -806,10 +801,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = { static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_mapping->host; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct dax_device *dax_dev = sbi->s_daxdev; + struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; - if (unlikely(ext4_forced_shutdown(sbi))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; /* @@ -885,7 +879,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp) { int ret; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0c56f3a011a1..b40d3b29f7e5 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -131,9 +131,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; bool needs_barrier = false; struct inode *inode = file->f_mapping->host; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (unlikely(ext4_forced_shutdown(sbi))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; ASSERT(ext4_journal_current_handle() == NULL); @@ -141,14 +140,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_ext4_sync_file_enter(file, datasync); if (sb_rdonly(inode->i_sb)) { - /* Make sure that we read updated s_mount_flags value */ + /* Make sure that we read updated s_ext4_flags value */ smp_rmb(); - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED)) + if (ext4_forced_shutdown(inode->i_sb)) ret = -EROFS; goto out; } - if (!sbi->s_journal) { + if (!EXT4_SB(inode->i_sb)->s_journal) { ret = ext4_fsync_nojournal(file, start, end, datasync, &needs_barrier); if (needs_barrier) diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 46c3423ddfa1..deabe29da7fb 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -300,7 +300,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; - if (len && IS_CASEFOLDED(dir) && um && + if (len && IS_CASEFOLDED(dir) && (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) { buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); if (!buff) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 48abef5f23e7..b65058d972f9 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -950,7 +950,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, sb = dir->i_sb; sbi = EXT4_SB(sb); - if (unlikely(ext4_forced_shutdown(sbi))) + if (unlikely(ext4_forced_shutdown(sb))) return ERR_PTR(-EIO); ngroups = ext4_get_groups_count(sb); @@ -1523,12 +1523,6 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int num, ret = 0, used_blks = 0; unsigned long used_inos = 0; - /* This should not happen, but just to be sure check this */ - if (sb_rdonly(sb)) { - ret = 1; - goto out; - } - gdp = ext4_get_group_desc(sb, group, &group_desc_bh); if (!gdp || !grp) goto out; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 003861037374..012d9259ff53 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -228,7 +228,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, struct ext4_inode *raw_inode; int cp_len = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return; BUG_ON(!EXT4_I(inode)->i_inline_off); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89737d5a1614..4ce35f1c8b0a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1114,7 +1114,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; trace_ext4_write_begin(inode, pos, len); @@ -2213,8 +2213,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, if (err < 0) { struct super_block *sb = inode->i_sb; - if (ext4_forced_shutdown(EXT4_SB(sb)) || - ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) + if (ext4_forced_shutdown(sb)) goto invalidate_dirty_pages; /* * Let the uper layers retry transient errors. @@ -2534,14 +2533,13 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that * will obscure the real source of the problem. We test - * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because + * fs shutdown state instead of sb->s_flag's SB_RDONLY because * the latter could be true if the filesystem is mounted * read-only, and in that case, ext4_writepages should * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || - ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) { + if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) { ret = -EROFS; goto out_writepages; } @@ -2759,7 +2757,7 @@ static int ext4_writepages(struct address_space *mapping, int ret; int alloc_ctx; - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + if (unlikely(ext4_forced_shutdown(sb))) return -EIO; alloc_ctx = ext4_writepages_down_read(sb); @@ -2798,16 +2796,16 @@ static int ext4_dax_writepages(struct address_space *mapping, int ret; long nr_to_write = wbc->nr_to_write; struct inode *inode = mapping->host; - struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); int alloc_ctx; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; alloc_ctx = ext4_writepages_down_read(inode->i_sb); trace_ext4_writepages(inode, wbc); - ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); + ret = dax_writeback_mapping_range(mapping, + EXT4_SB(inode->i_sb)->s_daxdev, wbc); trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); ext4_writepages_up_read(inode->i_sb, alloc_ctx); @@ -2857,7 +2855,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; struct inode *inode = mapping->host; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; index = pos >> PAGE_SHIFT; @@ -2937,14 +2935,73 @@ static int ext4_da_should_update_i_disksize(struct folio *folio, return 1; } +static int ext4_da_do_write_end(struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page) +{ + struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; + bool disksize_changed = false; + loff_t new_i_size; + + /* + * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES + * flag, which all that's needed to trigger page writeback. + */ + copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL); + new_i_size = pos + copied; + + /* + * It's important to update i_size while still holding page lock, + * because page writeout could otherwise come in and zero beyond + * i_size. + * + * Since we are holding inode lock, we are sure i_disksize <= + * i_size. We also know that if i_disksize < i_size, there are + * delalloc writes pending in the range up to i_size. If the end of + * the current write is <= i_size, there's no need to touch + * i_disksize since writeback will push i_disksize up to i_size + * eventually. If the end of the current write is > i_size and + * inside an allocated block which ext4_da_should_update_i_disksize() + * checked, we need to update i_disksize here as certain + * ext4_writepages() paths not allocating blocks and update i_disksize. + */ + if (new_i_size > inode->i_size) { + unsigned long end; + + i_size_write(inode, new_i_size); + end = (new_i_size - 1) & (PAGE_SIZE - 1); + if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) { + ext4_update_i_disksize(inode, new_i_size); + disksize_changed = true; + } + } + + unlock_page(page); + put_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + + if (disksize_changed) { + handle_t *handle; + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + } + + return copied; +} + static int ext4_da_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { struct inode *inode = mapping->host; - loff_t new_i_size; - unsigned long start, end; int write_mode = (int)(unsigned long)fsdata; struct folio *folio = page_folio(page); @@ -2963,30 +3020,7 @@ static int ext4_da_write_end(struct file *file, if (unlikely(copied < len) && !PageUptodate(page)) copied = 0; - start = pos & (PAGE_SIZE - 1); - end = start + copied - 1; - - /* - * Since we are holding inode lock, we are sure i_disksize <= - * i_size. We also know that if i_disksize < i_size, there are - * delalloc writes pending in the range upto i_size. If the end of - * the current write is <= i_size, there's no need to touch - * i_disksize since writeback will push i_disksize upto i_size - * eventually. If the end of the current write is > i_size and - * inside an allocated block (ext4_da_should_update_i_disksize() - * check), we need to update i_disksize here as certain - * ext4_writepages() paths not allocating blocks update i_disksize. - * - * Note that we defer inode dirtying to generic_write_end() / - * ext4_da_write_inline_data_end(). - */ - new_i_size = pos + copied; - if (copied && new_i_size > inode->i_size && - ext4_da_should_update_i_disksize(folio, end)) - ext4_update_i_disksize(inode, new_i_size); - - return generic_write_end(file, mapping, pos, len, copied, &folio->page, - fsdata); + return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page); } /* @@ -4940,9 +4974,12 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, "iget: bogus i_mode (%o)", inode->i_mode); goto bad_inode; } - if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) + if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) { ext4_error_inode(inode, function, line, 0, "casefold flag without casefold feature"); + ret = -EFSCORRUPTED; + goto bad_inode; + } if ((err_str = check_igot_inode(inode, flags)) != NULL) { ext4_error_inode(inode, function, line, 0, err_str); ret = -EFSCORRUPTED; @@ -5131,11 +5168,10 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) || - sb_rdonly(inode->i_sb)) + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) return 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; if (EXT4_SB(inode->i_sb)->s_journal) { @@ -5255,7 +5291,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, const unsigned int ia_valid = attr->ia_valid; bool inc_ivers = true; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; if (unlikely(IS_IMMUTABLE(inode))) @@ -5674,7 +5710,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + if (unlikely(ext4_forced_shutdown(inode->i_sb))) { put_bh(iloc->bh); return -EIO; } @@ -5700,7 +5736,7 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, { int err; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; err = ext4_get_inode_loc(inode, iloc); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b0349f451863..0bfe2ce589e2 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -802,7 +802,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags) if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) return -EINVAL; - if (ext4_forced_shutdown(sbi)) + if (ext4_forced_shutdown(sb)) return 0; ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 21b903fe546e..c91db9f57524 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -874,7 +874,7 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_group_info *iter, *grp; + struct ext4_group_info *iter; int i; if (ac->ac_status == AC_STATUS_FOUND) @@ -883,7 +883,6 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED)) atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions); - grp = NULL; for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) { if (list_empty(&sbi->s_mb_largest_free_orders[i])) continue; @@ -892,28 +891,22 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); continue; } - grp = NULL; list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i], bb_largest_free_order_node) { if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]); if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) { - grp = iter; - break; + *group = iter->bb_group; + ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED; + read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); + return; } } read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); - if (grp) - break; } - if (!grp) { - /* Increment cr and search again */ - *new_cr = CR_GOAL_LEN_FAST; - } else { - *group = grp->bb_group; - ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED; - } + /* Increment cr and search again if no group is found */ + *new_cr = CR_G |
