diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 08:57:41 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 08:57:41 -0700 |
| commit | 0cfcde1fafc23068f57afa50faa3e69487b7cd30 (patch) | |
| tree | 86f9b91f143e39becea74df7db1d3e9cd89dd3ea | |
| parent | c3558a6b2a75d9adacf15dd7fae79dbfffa7ebe4 (diff) | |
| parent | 519fe1bae7e20fc4e7f179d50b6102b49980e85d (diff) | |
| download | linux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.tar.gz linux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.tar.bz2 linux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.zip | |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"There are a number of major cleanups in ext4 this cycle:
- The data=journal writepath has been significantly cleaned up and
simplified, and reduces a large number of data=journal special
cases by Jan Kara.
- Ojaswin Muhoo has replaced linked list used to track extents that
have been used for inode preallocation with a red-black tree in the
multi-block allocator. This improves performance for workloads
which do a large number of random allocating writes.
- Thanks to Kemeng Shi for a lot of cleanup and bug fixes in the
multi-block allocator.
- Matthew wilcox has converted the code paths for reading and writing
ext4 pages to use folios.
- Jason Yan has continued to factor out ext4_fill_super() into
smaller functions for improve ease of maintenance and
comprehension.
- Josh Triplett has created an uapi header for ext4 userspace API's"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (105 commits)
ext4: Add a uapi header for ext4 userspace APIs
ext4: remove useless conditional branch code
ext4: remove unneeded check of nr_to_submit
ext4: move dax and encrypt checking into ext4_check_feature_compatibility()
ext4: factor out ext4_block_group_meta_init()
ext4: move s_reserved_gdt_blocks and addressable checking into ext4_check_geometry()
ext4: rename two functions with 'check'
ext4: factor out ext4_flex_groups_free()
ext4: use ext4_group_desc_free() in ext4_put_super() to save some duplicated code
ext4: factor out ext4_percpu_param_init() and ext4_percpu_param_destroy()
ext4: factor out ext4_hash_info_init()
Revert "ext4: Fix warnings when freezing filesystem with journaled data"
ext4: Update comment in mpage_prepare_extent_to_map()
ext4: Simplify handling of journalled data in ext4_bmap()
ext4: Drop special handling of journalled data from ext4_quota_on()
ext4: Drop special handling of journalled data from ext4_evict_inode()
ext4: Fix special handling of journalled data from extent zeroing
ext4: Drop special handling of journalled data from extent shifting operations
ext4: Drop special handling of journalled data from ext4_sync_file()
ext4: Commit transaction before writing back pages in data=journal mode
...
| -rw-r--r-- | Documentation/admin-guide/ext4.rst | 3 | ||||
| -rw-r--r-- | MAINTAINERS | 1 | ||||
| -rw-r--r-- | block/bio.c | 1 | ||||
| -rw-r--r-- | fs/ext4/balloc.c | 124 | ||||
| -rw-r--r-- | fs/ext4/bitmap.c | 13 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 114 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 35 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 11 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 14 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 171 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 810 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 691 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 17 | ||||
| -rw-r--r-- | fs/ext4/move_extent.c | 33 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 116 | ||||
| -rw-r--r-- | fs/ext4/readpage.c | 72 | ||||
| -rw-r--r-- | fs/ext4/resize.c | 7 | ||||
| -rw-r--r-- | fs/ext4/super.c | 413 | ||||
| -rw-r--r-- | fs/ext4/sysfs.c | 2 | ||||
| -rw-r--r-- | fs/ext4/verity.c | 30 | ||||
| -rw-r--r-- | fs/iomap/buffered-io.c | 2 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 3 | ||||
| -rw-r--r-- | fs/netfs/buffered_read.c | 3 | ||||
| -rw-r--r-- | fs/nfs/file.c | 12 | ||||
| -rw-r--r-- | include/linux/fscrypt.h | 21 | ||||
| -rw-r--r-- | include/linux/page-flags.h | 5 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 2 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 7 | ||||
| -rw-r--r-- | include/uapi/linux/ext4.h | 117 | ||||
| -rw-r--r-- | mm/folio-compat.c | 4 |
30 files changed, 1413 insertions, 1441 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 4c559e08d11e..5740d85439ff 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -489,9 +489,6 @@ Files in /sys/fs/ext4/<devname>: multiple of this tuning parameter if the stripe size is not set in the ext4 superblock - mb_max_inode_prealloc - The maximum length of per-inode ext4_prealloc_space list. - mb_max_to_scan The maximum number of extents the multiblock allocator will search to find the best extent. diff --git a/MAINTAINERS b/MAINTAINERS index cd69cdfc23a2..731dbf5e6ec9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7745,6 +7745,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git F: Documentation/filesystems/ext4/ F: fs/ext4/ F: include/trace/events/ext4.h +F: include/uapi/linux/ext4.h Extended Verification Module (EVM) M: Mimi Zohar <zohar@linux.ibm.com> diff --git a/block/bio.c b/block/bio.c index fd11614bba4d..043944fd46eb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, return false; return bio_add_page(bio, &folio->page, len, off) > 0; } +EXPORT_SYMBOL(bio_add_folio); void __bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8ff4b9192a9f..094269488183 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -80,32 +80,56 @@ static inline int ext4_block_in_group(struct super_block *sb, return (actual_group == block_group) ? 1 : 0; } -/* Return the number of clusters used for file system metadata; this +/* + * Return the number of clusters used for file system metadata; this * represents the overhead needed by the file system. */ static unsigned ext4_num_overhead_clusters(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp) { - unsigned num_clusters; - int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; + unsigned base_clusters, num_clusters; + int block_cluster = -1, inode_cluster; + int itbl_cluster_start = -1, itbl_cluster_end = -1; ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group); - ext4_fsblk_t itbl_blk; + ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1; + ext4_fsblk_t itbl_blk_start, itbl_blk_end; struct ext4_sb_info *sbi = EXT4_SB(sb); /* This is the number of clusters used by the superblock, * block group descriptors, and reserved block group * descriptor blocks */ - num_clusters = ext4_num_base_meta_clusters(sb, block_group); + base_clusters = ext4_num_base_meta_clusters(sb, block_group); + num_clusters = base_clusters; + + /* + * Account and record inode table clusters if any cluster + * is in the block group, or inode table cluster range is + * [-1, -1] and won't overlap with block/inode bitmap cluster + * accounted below. + */ + itbl_blk_start = ext4_inode_table(sb, gdp); + itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1; + if (itbl_blk_start <= end && itbl_blk_end >= start) { + itbl_blk_start = itbl_blk_start >= start ? + itbl_blk_start : start; + itbl_blk_end = itbl_blk_end <= end ? + itbl_blk_end : end; + + itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start); + itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start); + + num_clusters += itbl_cluster_end - itbl_cluster_start + 1; + /* check if border cluster is overlapped */ + if (itbl_cluster_start == base_clusters - 1) + num_clusters--; + } /* - * For the allocation bitmaps and inode table, we first need - * to check to see if the block is in the block group. If it - * is, then check to see if the cluster is already accounted - * for in the clusters used for the base metadata cluster, or - * if we can increment the base metadata cluster to include - * that block. Otherwise, we will have to track the cluster - * used for the allocation bitmap or inode table explicitly. + * For the allocation bitmaps, we first need to check to see + * if the block is in the block group. If it is, then check + * to see if the cluster is already accounted for in the clusters + * used for the base metadata cluster and inode tables cluster. * Normally all of these blocks are contiguous, so the special * case handling shouldn't be necessary except for *very* * unusual file system layouts. @@ -113,46 +137,26 @@ static unsigned ext4_num_overhead_clusters(struct super_block *sb, if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { block_cluster = EXT4_B2C(sbi, ext4_block_bitmap(sb, gdp) - start); - if (block_cluster < num_clusters) - block_cluster = -1; - else if (block_cluster == num_clusters) { + if (block_cluster >= base_clusters && + (block_cluster < itbl_cluster_start || + block_cluster > itbl_cluster_end)) num_clusters++; - block_cluster = -1; - } } if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { inode_cluster = EXT4_B2C(sbi, ext4_inode_bitmap(sb, gdp) - start); - if (inode_cluster < num_clusters) - inode_cluster = -1; - else if (inode_cluster == num_clusters) { - num_clusters++; - inode_cluster = -1; - } - } - - itbl_blk = ext4_inode_table(sb, gdp); - for (i = 0; i < sbi->s_itb_per_group; i++) { - if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { - c = EXT4_B2C(sbi, itbl_blk + i - start); - if ((c < num_clusters) || (c == inode_cluster) || - (c == block_cluster) || (c == itbl_cluster)) - continue; - if (c == num_clusters) { - num_clusters++; - continue; - } + /* + * Additional check if inode bitmap is in just accounted + * block_cluster + */ + if (inode_cluster != block_cluster && + inode_cluster >= base_clusters && + (inode_cluster < itbl_cluster_start || + inode_cluster > itbl_cluster_end)) num_clusters++; - itbl_cluster = c; - } } - if (block_cluster != -1) - num_clusters++; - if (inode_cluster != -1) - num_clusters++; - return num_clusters; } @@ -187,8 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, ASSERT(buffer_locked(bh)); - /* If checksum is bad mark all blocks used to prevent allocation - * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { ext4_mark_group_bitmap_corrupted(sb, block_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT | @@ -350,13 +352,13 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || - EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit) + EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit) return blk; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - EXT4_B2C(sbi, offset + sbi->s_itb_per_group), + EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1, EXT4_B2C(sbi, offset)); if (next_zero_bit < - EXT4_B2C(sbi, offset + sbi->s_itb_per_group)) + EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1) /* bad bitmap for inode tables */ return blk; return 0; @@ -383,8 +385,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, ext4_lock_group(sb, block_group); if (buffer_verified(bh)) goto verified; - if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh) || + if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) || ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) { ext4_unlock_group(sb, block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); @@ -474,17 +475,19 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, goto out; } err = ext4_init_block_bitmap(sb, bh, block_group, desc); - set_bitmap_uptodate(bh); - set_buffer_uptodate(bh); - set_buffer_verified(bh); - ext4_unlock_group(sb, block_group); - unlock_buffer(bh); if (err) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); ext4_error(sb, "Failed to init block bitmap for group " "%u: %d", block_group, err); goto out; } - goto verify; + set_bitmap_uptodate(bh); + set_buffer_uptodate(bh); + set_buffer_verified(bh); + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + return bh; } ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { @@ -842,10 +845,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, if (!ext4_bg_has_super(sb, group)) return 0; - if (ext4_has_feature_meta_bg(sb)) - return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); - else - return EXT4_SB(sb)->s_gdb_count; + return EXT4_SB(sb)->s_gdb_count; } /** @@ -887,11 +887,11 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb, block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * sbi->s_desc_per_block) { if (num) { - num += ext4_bg_num_gdb(sb, block_group); + num += ext4_bg_num_gdb_nometa(sb, block_group); num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); } } else { /* For META_BG_BLOCK_GROUPS */ - num += ext4_bg_num_gdb(sb, block_group); + num += ext4_bg_num_gdb_meta(sb, block_group); } return EXT4_NUM_B2C(sbi, num); } diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index f63e028c638c..cd725bebe69e 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -16,7 +16,7 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars) return numchars * BITS_PER_BYTE - memweight(bitmap, numchars); } -int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, +int ext4_inode_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz) { @@ -38,7 +38,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, return provided == calculated; } -void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, +void ext4_inode_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz) { @@ -54,7 +54,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); } -int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, +int ext4_block_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh) { @@ -74,13 +74,10 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, } else calculated &= 0xFFFF; - if (provided == calculated) - return 1; - - return 0; + return provided == calculated; } -void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, +void ext4_block_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 08b29c289da4..18cb2680dc39 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -40,6 +40,7 @@ #ifdef __KERNEL__ #include <linux/compat.h> #endif +#include <uapi/linux/ext4.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> @@ -591,17 +592,6 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(RESERVED); } -/* Used to pass group descriptor data when online resize is done */ -struct ext4_new_group_input { - __u32 group; /* Group number for this data */ - __u64 block_bitmap; /* Absolute block number of block bitmap */ - __u64 inode_bitmap; /* Absolute block number of inode bitmap */ - __u64 inode_table; /* Absolute block number of inode table start */ - __u32 blocks_count; /* Total number of blocks in this group */ - __u16 reserved_blocks; /* Number of reserved blocks in this group */ - __u16 unused; -}; - #if defined(__KERNEL__) && defined(CONFIG_COMPAT) struct compat_ext4_new_group_input { u32 group; @@ -698,70 +688,6 @@ enum { #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 #define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040 -/* - * ioctl commands - */ -#define EXT4_IOC_GETVERSION _IOR('f', 3, long) -#define EXT4_IOC_SETVERSION _IOW('f', 4, long) -#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION -#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) -#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) -#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) -#define EXT4_IOC_MIGRATE _IO('f', 9) - /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ - /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ -#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) -#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) -#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) -#define EXT4_IOC_SWAP_BOOT _IO('f', 17) -#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) -/* ioctl codes 19--39 are reserved for fscrypt */ -#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40) -#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32) -#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap) -#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32) -#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid) -#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid) - -#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) - -/* - * Flags for going down operation - */ -#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */ -#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ -#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ - -/* - * Flags returned by EXT4_IOC_GETSTATE - * - * We only expose to userspace a subset of the state flags in - * i_state_flags - */ -#define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001 -#define EXT4_STATE_FLAG_NEW 0x00000002 -#define EXT4_STATE_FLAG_NEWENTRY 0x00000004 -#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008 - -/* flags for ioctl EXT4_IOC_CHECKPOINT */ -#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD 0x1 -#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT 0x2 -#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN 0x4 -#define EXT4_IOC_CHECKPOINT_FLAG_VALID (EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \ - EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \ - EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) - -/* - * Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID - */ -struct fsuuid { - __u32 fsu_len; - __u32 fsu_flags; - __u8 fsu_uuid[]; -}; - #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* * ioctl commands in 32 bit emulation @@ -776,12 +702,6 @@ struct fsuuid { #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION #endif -/* - * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag. - * It indicates that the entry in extent status cache is for a hole. - */ -#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000 - /* Max physical block we can address w/o extents */ #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF @@ -852,15 +772,6 @@ struct ext4_inode { __le32 i_projid; /* Project ID */ }; -struct move_extent { - __u32 reserved; /* should be zero */ - __u32 donor_fd; /* donor file descriptor */ - __u64 orig_start; /* logical start offset in block for orig */ - __u64 donor_start; /* logical start offset in block for donor */ - __u64 len; /* block length to be moved */ - __u64 moved_len; /* moved block length */ -}; - #define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) @@ -1120,8 +1031,8 @@ struct ext4_inode_info { /* mballoc */ atomic_t i_prealloc_active; - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; + struct rb_root i_prealloc_node; + rwlock_t i_prealloc_lock; /* extents status tree */ struct ext4_es_tree i_es_tree; @@ -1613,7 +1524,6 @@ struct ext4_sb_info { unsigned int s_mb_stats; unsigned int s_mb_order2_reqs; unsigned int s_mb_group_prealloc; - unsigned int s_mb_max_inode_prealloc; unsigned int s_max_dir_size_kb; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; @@ -1887,7 +1797,6 @@ static inline void ext4_simulate_fail_bh(struct super_block *sb, * Inode dynamic state flags */ enum { - EXT4_STATE_JDATA, /* journaled data exists */ EXT4_STATE_NEW, /* inode is newly created */ EXT4_STATE_XATTR, /* has in-inode xattrs */ EXT4_STATE_NO_EXPAND, /* No space for expansion */ @@ -2676,16 +2585,16 @@ struct mmpd_data { /* bitmap.c */ extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); -void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, +void ext4_inode_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz); -int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, +int ext4_inode_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh, int sz); -void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, +void ext4_block_bitmap_csum_set(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh); -int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, +int ext4_block_bitmap_csum_verify(struct super_block *sb, struct ext4_group_desc *gdp, struct buffer_head *bh); @@ -3550,7 +3459,7 @@ extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, unsigned int len); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); -extern int ext4_readpage_inline(struct inode *inode, struct page *page); +int ext4_readpage_inline(struct inode *inode, struct folio *folio); extern int ext4_try_to_write_inline_data(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, @@ -3647,7 +3556,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct inode *inode, - struct readahead_control *rac, struct page *page); + struct readahead_control *rac, struct folio *folio); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); @@ -3757,9 +3666,8 @@ extern void ext4_io_submit_init(struct ext4_io_submit *io, struct writeback_control *wbc); extern void ext4_end_io_rsv_work(struct work_struct *work); extern void ext4_io_submit(struct ext4_io_submit *io); -extern int ext4_bio_write_page(struct ext4_io_submit *io, - struct page *page, - int len); +int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *page, + size_t len); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3559ea6b0781..e79c767cc5e0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4526,13 +4526,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, trace_ext4_zero_range(inode, offset, len, mode); - /* Call ext4_force_commit to flush all data in case of data=journal. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - /* * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to @@ -4616,6 +4609,20 @@ static long ext4_zero_range(struct file *file, loff_t offset, filemap_invalidate_unlock(mapping); goto out_mutex; } + + /* + * For journalled data we need to write (and checkpoint) pages + * before discarding page cache to avoid inconsitent data on + * disk in case of crash before zeroing trans is committed. + */ + if (ext4_should_journal_data(inode)) { + ret = filemap_write_and_wait_range(mapping, start, end); + if (ret) { + filemap_invalidate_unlock(mapping); + goto out_mutex; + } + } + /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); inode->i_mtime = inode->i_ctime = current_time(inode); @@ -5290,13 +5297,6 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); - /* Call ext4_force_commit to flush all data in case of data=journal. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - inode_lock(inode); /* * There is no need to overlap collapse range with EOF, in which case @@ -5443,13 +5443,6 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); - /* Call ext4_force_commit to flush all data in case of data=journal */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - inode_lock(inode); /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 027a7d7037a0..f65fdb27ce14 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -153,23 +153,12 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; /* - * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. - * - * data=journal: - * filemap_fdatawrite won't do anything (the buffers are clean). - * ext4_force_commit will write the file data into the journal and - * will wait on that. - * filemap_fdatawait() will encounter a ton of newly-dirtied pages - * (they were dirtied by commit). But that's OK - the blocks are - * safe in-journal, which is all fsync() needs to ensure. */ if (!sbi->s_journal) ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier); - else if (ext4_should_journal_data(inode)) - ret = ext4_force_commit(inode->i_sb); else ret = ext4_fsync_journal(inode, datasync, &needs_barrier); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 157663031f8c..787ab89c2c26 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -98,7 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, if (buffer_verified(bh)) goto verified; blk = ext4_inode_bitmap(sb, desc); - if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, + if (!ext4_inode_bitmap_csum_verify(sb, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8) || ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) { ext4_unlock_group(sb, block_group); @@ -327,7 +327,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (percpu_counter_initialized(&sbi->s_dirs_counter)) percpu_counter_dec(&sbi->s_dirs_counter); } - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, + ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); @@ -813,8 +813,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); - ext4_block_bitmap_csum_set(sb, group, gdp, - block_bitmap_bh); + ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh); ext4_group_desc_csum_set(sb, group, gdp); } ext4_unlock_group(sb, group); @@ -852,7 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino) ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); if (ext4_has_group_desc_csum(sb)) { - ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); ext4_group_desc_csum_set(sb, group, gdp); } @@ -1165,8 +1164,7 @@ got: gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); - ext4_block_bitmap_csum_set(sb, group, gdp, - block_bitmap_bh); + ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh); ext4_group_desc_csum_set(sb, group, gdp); } ext4_unlock_group(sb, group); @@ -1222,7 +1220,7 @@ got: } } if (ext4_has_group_desc_csum(sb)) { - ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); ext4_group_desc_csum_set(sb, group, gdp); } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1602d74b5eeb..b9fb1177fff6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -467,16 +467,16 @@ out: return error; } -static int ext4_read_inline_page(struct inode *inode, struct page *page) +static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) { void *kaddr; int ret = 0; size_t len; struct ext4_iloc iloc; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(!ext4_has_inline_data(inode)); - BUG_ON(page->index); + BUG_ON(folio->index); if (!EXT4_I(inode)->i_inline_off) { ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", @@ -489,19 +489,20 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page) |
