summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/balloc.c15
-rw-r--r--fs/ext4/block_validity.c8
-rw-r--r--fs/ext4/crypto.c4
-rw-r--r--fs/ext4/ext4.h32
-rw-r--r--fs/ext4/ext4_jbd2.c5
-rw-r--r--fs/ext4/extents_status.c44
-rw-r--r--fs/ext4/file.c38
-rw-r--r--fs/ext4/fsync.c9
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c8
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c124
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c200
-rw-r--r--fs/ext4/mballoc.h14
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/namei.c17
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/ext4/super.c308
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/jbd2/checkpoint.c34
-rw-r--r--fs/jbd2/journal.c484
-rw-r--r--fs/jbd2/recovery.c12
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--include/linux/jbd2.h5
26 files changed, 717 insertions, 678 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1f72f977c6db..79b20d6ae39e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -913,11 +913,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
}
/*
- * This function returns the number of file system metadata clusters at
+ * This function returns the number of file system metadata blocks at
* the beginning of a block group, including the reserved gdt blocks.
*/
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
- ext4_group_t block_group)
+unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+ ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned num;
@@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
} else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb_meta(sb, block_group);
}
- return EXT4_NUM_B2C(sbi, num);
+ return num;
}
+
+static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
+ ext4_group_t block_group)
+{
+ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
+}
+
/**
* ext4_inode_to_goal_block - return a hint for block allocation
* @inode: inode for block allocation
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5504f72bbbbe..6fe3c941b565 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
struct ext4_system_blocks *system_blks;
struct ext4_group_desc *gdp;
ext4_group_t i;
- int flex_size = ext4_flex_bg_size(sbi);
int ret;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
return -ENOMEM;
for (i=0; i < ngroups; i++) {
+ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
+
cond_resched();
- if (ext4_bg_has_super(sb, i) &&
- ((i < 5) || ((i % flex_size) == 0))) {
+ if (meta_blks != 0) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1, 0);
+ meta_blks, 0);
if (ret)
goto err;
}
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index e20ac0654b3f..453d4da5de52 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
+ if (err)
+ ext4_fname_free_filename(fname);
#endif
return err;
}
@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
+ if (err)
+ ext4_fname_free_filename(fname);
#endif
return err;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 481491e892df..9418359b1d9d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -176,9 +176,6 @@ enum criteria {
EXT4_MB_NUM_CRS
};
-/* criteria below which we use fast block scanning and avoid unnecessary IO */
-#define CR_FAST CR_GOAL_LEN_SLOW
-
/*
* Flags used in mballoc's allocation_context flags field.
*
@@ -1241,6 +1238,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group
* scanning in mballoc
*/
+#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
@@ -1258,10 +1256,8 @@ struct ext4_inode_info {
#define ext4_test_and_set_bit __test_and_set_bit_le
#define ext4_set_bit __set_bit_le
-#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_test_and_clear_bit __test_and_clear_bit_le
#define ext4_clear_bit __clear_bit_le
-#define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
@@ -1708,10 +1704,13 @@ struct ext4_sb_info {
const char *s_last_error_func;
time64_t s_last_error_time;
/*
- * If we are in a context where we cannot update error information in
- * the on-disk superblock, we queue this work to do it.
+ * If we are in a context where we cannot update the on-disk
+ * superblock, we queue the work here. This is used to update
+ * the error information in the superblock, and for periodic
+ * updates of the superblock called from the commit callback
+ * function.
*/
- struct work_struct s_error_work;
+ struct work_struct s_sb_upd_work;
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
@@ -1804,7 +1803,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
*/
enum {
EXT4_MF_MNTDIR_SAMPLED,
- EXT4_MF_FS_ABORTED, /* Fatal error detected */
EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
};
@@ -2228,9 +2226,9 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
#define EXT4_FLAGS_SHUTDOWN 1
#define EXT4_FLAGS_BDEV_IS_DAX 2
-static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+static inline int ext4_forced_shutdown(struct super_block *sb)
{
- return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
}
/*
@@ -2708,7 +2706,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags);
extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
-extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
@@ -2864,7 +2861,6 @@ extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
-extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
@@ -2907,7 +2903,6 @@ extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
-extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
@@ -2930,6 +2925,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, int state);
+static inline bool ext4_mb_cr_expensive(enum criteria cr)
+{
+ return cr >= CR_GOAL_LEN_SLOW;
+}
/* inode.c */
void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
@@ -2983,7 +2982,6 @@ extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
extern int ext4_file_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
-extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
@@ -3090,6 +3088,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
+extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+ ext4_group_t block_group);
extern __printf(7, 8)
void __ext4_error(struct super_block *, const char *, unsigned int, bool,
@@ -3531,8 +3531,6 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
-extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
- unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
int ext4_readpage_inline(struct inode *inode, struct folio *folio);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b38d59581411..d1a2e6624401 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -67,11 +67,12 @@ static int ext4_journal_check_start(struct super_block *sb)
might_sleep();
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
- if (sb_rdonly(sb))
+ if (WARN_ON_ONCE(sb_rdonly(sb)))
return -EROFS;
+
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
/*
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 9b5b8951afb4..6f7de14c0fa8 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -878,23 +878,29 @@ retry:
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
if (err1 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
err2 = 0;
if (err2 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
(status & EXTENT_STATUS_WRITTEN ||
status & EXTENT_STATUS_UNWRITTEN))
__revise_pending(inode, lblk, len);
-
- /* es is pre-allocated but not used, free it. */
- if (es1 && !es1->es_len)
- __es_free_extent(es1);
- if (es2 && !es2->es_len)
- __es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
@@ -1491,8 +1497,12 @@ retry:
*/
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end, &reserved, es);
- if (es && !es->es_len)
- __es_free_extent(es);
+ /* Free preallocated extent if it didn't get used. */
+ if (es) {
+ if (!es->es_len)
+ __es_free_extent(es);
+ es = NULL;
+ }
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err)
goto retry;
@@ -2047,19 +2057,25 @@ retry:
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
if (err1 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
if (allocated)
__insert_pending(inode, lblk);
-
- /* es is pre-allocated but not used, free it. */
- if (es1 && !es1->es_len)
- __es_free_extent(es1);
- if (es2 && !es2->es_len)
- __es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2dc3f8301225..6830ea3a6c59 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (!iov_iter_count(to))
@@ -153,7 +153,7 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
{
struct inode *inode = file_inode(in);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
return filemap_splice_read(in, ppos, pipe, len, flags);
}
@@ -476,6 +476,11 @@ restart:
* required to change security info in file_modified(), for extending
* I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
* extents (as partial block zeroing may be required).
+ *
+ * Note that unaligned writes are allowed under shared lock so long as
+ * they are pure overwrites. Otherwise, concurrent unaligned writes risk
+ * data corruption due to partial block zeroing in the dio layer, and so
+ * the I/O must occur exclusively.
*/
if (*ilock_shared &&
((!IS_NOSEC(inode) || *extend || !overwrite ||
@@ -492,21 +497,12 @@ restart:
/*
* Now that locking is settled, determine dio flags and exclusivity
- * requirements. Unaligned writes are allowed under shared lock so long
- * as they are pure overwrites. Set the iomap overwrite only flag as an
- * added precaution in this case. Even though this is unnecessary, we
- * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
- * write is ever submitted.
- *
- * Otherwise, concurrent unaligned writes risk data corruption due to
- * partial block zeroing in the dio layer, and so the I/O must occur
- * exclusively. The inode lock is already held exclusive if the write is
- * non-overwrite or extending, so drain all outstanding dio and set the
- * force wait dio flag.
+ * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
+ * behavior already. The inode lock is already held exclusive if the
+ * write is non-overwrite or extending, so drain all outstanding dio and
+ * set the force wait dio flag.
*/
- if (*ilock_shared && unaligned_io) {
- *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
- } else if (!*ilock_shared && (unaligned_io || *extend)) {
+ if (!*ilock_shared && (unaligned_io || *extend)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
@@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
dio_flags, NULL, 0);
- WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
if (ret == -ENOTBLK)
ret = 0;
@@ -709,7 +704,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
#ifdef CONFIG_FS_DAX
@@ -806,10 +801,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct dax_device *dax_dev = sbi->s_daxdev;
+ struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
- if (unlikely(ext4_forced_shutdown(sbi)))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
/*
@@ -885,7 +879,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0c56f3a011a1..b40d3b29f7e5 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -131,9 +131,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
bool needs_barrier = false;
struct inode *inode = file->f_mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (unlikely(ext4_forced_shutdown(sbi)))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ASSERT(ext4_journal_current_handle() == NULL);
@@ -141,14 +140,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_ext4_sync_file_enter(file, datasync);
if (sb_rdonly(inode->i_sb)) {
- /* Make sure that we read updated s_mount_flags value */
+ /* Make sure that we read updated s_ext4_flags value */
smp_rmb();
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
+ if (ext4_forced_shutdown(inode->i_sb))
ret = -EROFS;
goto out;
}
- if (!sbi->s_journal) {
+ if (!EXT4_SB(inode->i_sb)->s_journal) {
ret = ext4_fsync_nojournal(file, start, end, datasync,
&needs_barrier);
if (needs_barrier)
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 46c3423ddfa1..deabe29da7fb 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -300,7 +300,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
- if (len && IS_CASEFOLDED(dir) && um &&
+ if (len && IS_CASEFOLDED(dir) &&
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 48abef5f23e7..b65058d972f9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -950,7 +950,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
sb = dir->i_sb;
sbi = EXT4_SB(sb);
- if (unlikely(ext4_forced_shutdown(sbi)))
+ if (unlikely(ext4_forced_shutdown(sb)))
return ERR_PTR(-EIO);
ngroups = ext4_get_groups_count(sb);
@@ -1523,12 +1523,6 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int num, ret = 0, used_blks = 0;
unsigned long used_inos = 0;
- /* This should not happen, but just to be sure check this */
- if (sb_rdonly(sb)) {
- ret = 1;
- goto out;
- }
-
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp || !grp)
goto out;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 003861037374..012d9259ff53 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -228,7 +228,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
struct ext4_inode *raw_inode;
int cp_len = 0;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return;
BUG_ON(!EXT4_I(inode)->i_inline_off);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89737d5a1614..4ce35f1c8b0a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1114,7 +1114,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
trace_ext4_write_begin(inode, pos, len);
@@ -2213,8 +2213,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
if (err < 0) {
struct super_block *sb = inode->i_sb;
- if (ext4_forced_shutdown(EXT4_SB(sb)) ||
- ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+ if (ext4_forced_shutdown(sb))
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
@@ -2534,14 +2533,13 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
- * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
+ * fs shutdown state instead of sb->s_flag's SB_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_writepages should
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
- if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
- ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
+ if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
ret = -EROFS;
goto out_writepages;
}
@@ -2759,7 +2757,7 @@ static int ext4_writepages(struct address_space *mapping,
int ret;
int alloc_ctx;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(sb);
@@ -2798,16 +2796,16 @@ static int ext4_dax_writepages(struct address_space *mapping,
int ret;
long nr_to_write = wbc->nr_to_write;
struct inode *inode = mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
int alloc_ctx;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
trace_ext4_writepages(inode, wbc);
- ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
+ ret = dax_writeback_mapping_range(mapping,
+ EXT4_SB(inode->i_sb)->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
ext4_writepages_up_read(inode->i_sb, alloc_ctx);
@@ -2857,7 +2855,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
struct inode *inode = mapping->host;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
index = pos >> PAGE_SHIFT;
@@ -2937,14 +2935,73 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
return 1;
}
+static int ext4_da_do_write_end(struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page)
+{
+ struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
+ bool disksize_changed = false;
+ loff_t new_i_size;
+
+ /*
+ * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
+ * flag, which all that's needed to trigger page writeback.
+ */
+ copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
+ new_i_size = pos + copied;
+
+ /*
+ * It's important to update i_size while still holding page lock,
+ * because page writeout could otherwise come in and zero beyond
+ * i_size.
+ *
+ * Since we are holding inode lock, we are sure i_disksize <=
+ * i_size. We also know that if i_disksize < i_size, there are
+ * delalloc writes pending in the range up to i_size. If the end of
+ * the current write is <= i_size, there's no need to touch
+ * i_disksize since writeback will push i_disksize up to i_size
+ * eventually. If the end of the current write is > i_size and
+ * inside an allocated block which ext4_da_should_update_i_disksize()
+ * checked, we need to update i_disksize here as certain
+ * ext4_writepages() paths not allocating blocks and update i_disksize.
+ */
+ if (new_i_size > inode->i_size) {
+ unsigned long end;
+
+ i_size_write(inode, new_i_size);
+ end = (new_i_size - 1) & (PAGE_SIZE - 1);
+ if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
+ ext4_update_i_disksize(inode, new_i_size);
+ disksize_changed = true;
+ }
+ }
+
+ unlock_page(page);
+ put_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+
+ if (disksize_changed) {
+ handle_t *handle;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ }
+
+ return copied;
+}
+
static int ext4_da_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
- loff_t new_i_size;
- unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata;
struct folio *folio = page_folio(page);
@@ -2963,30 +3020,7 @@ static int ext4_da_write_end(struct file *file,
if (unlikely(copied < len) && !PageUptodate(page))
copied = 0;
- start = pos & (PAGE_SIZE - 1);
- end = start + copied - 1;
-
- /*
- * Since we are holding inode lock, we are sure i_disksize <=
- * i_size. We also know that if i_disksize < i_size, there are
- * delalloc writes pending in the range upto i_size. If the end of
- * the current write is <= i_size, there's no need to touch
- * i_disksize since writeback will push i_disksize upto i_size
- * eventually. If the end of the current write is > i_size and
- * inside an allocated block (ext4_da_should_update_i_disksize()
- * check), we need to update i_disksize here as certain
- * ext4_writepages() paths not allocating blocks update i_disksize.
- *
- * Note that we defer inode dirtying to generic_write_end() /
- * ext4_da_write_inline_data_end().
- */
- new_i_size = pos + copied;
- if (copied && new_i_size > inode->i_size &&
- ext4_da_should_update_i_disksize(folio, end))
- ext4_update_i_disksize(inode, new_i_size);
-
- return generic_write_end(file, mapping, pos, len, copied, &folio->page,
- fsdata);
+ return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
}
/*
@@ -4940,9 +4974,12 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
"iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
- if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+ if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
if ((err_str = check_igot_inode(inode, flags)) != NULL) {
ext4_error_inode(inode, function, line, 0, err_str);
ret = -EFSCORRUPTED;
@@ -5131,11 +5168,10 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err;
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
- sb_rdonly(inode->i_sb))
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (EXT4_SB(inode->i_sb)->s_journal) {
@@ -5255,7 +5291,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
const unsigned int ia_valid = attr->ia_valid;
bool inc_ivers = true;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (unlikely(IS_IMMUTABLE(inode)))
@@ -5674,7 +5710,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+ if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
put_bh(iloc->bh);
return -EIO;
}
@@ -5700,7 +5736,7 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
err = ext4_get_inode_loc(inode, iloc);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b0349f451863..0bfe2ce589e2 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -802,7 +802,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
return -EINVAL;
- if (ext4_forced_shutdown(sbi))
+ if (ext4_forced_shutdown(sb))
return 0;
ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 21b903fe546e..c91db9f57524 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -874,7 +874,7 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_group_info *iter, *grp;
+ struct ext4_group_info *iter;
int i;
if (ac->ac_status == AC_STATUS_FOUND)
@@ -883,7 +883,6 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED))
atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions);
- grp = NULL;
for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
if (list_empty(&sbi->s_mb_largest_free_orders[i]))
continue;
@@ -892,28 +891,22 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
continue;
}
- grp = NULL;
list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
bb_largest_free_order_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) {
- grp = iter;
- break;
+ *group = iter->bb_group;
+ ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
+ read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
+ return;
}
}
read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
- if (grp)
- break;
}
- if (!grp) {
- /* Increment cr and search again */
- *new_cr = CR_GOAL_LEN_FAST;
- } else {
- *group = grp->bb_group;
- ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
- }
+ /* Increment cr and search again if no group is found */
+ *new_cr = CR_G