summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 08:57:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 08:57:41 -0700
commit0cfcde1fafc23068f57afa50faa3e69487b7cd30 (patch)
tree86f9b91f143e39becea74df7db1d3e9cd89dd3ea
parentc3558a6b2a75d9adacf15dd7fae79dbfffa7ebe4 (diff)
parent519fe1bae7e20fc4e7f179d50b6102b49980e85d (diff)
downloadlinux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.tar.gz
linux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.tar.bz2
linux-0cfcde1fafc23068f57afa50faa3e69487b7cd30.zip
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "There are a number of major cleanups in ext4 this cycle: - The data=journal writepath has been significantly cleaned up and simplified, and reduces a large number of data=journal special cases by Jan Kara. - Ojaswin Muhoo has replaced linked list used to track extents that have been used for inode preallocation with a red-black tree in the multi-block allocator. This improves performance for workloads which do a large number of random allocating writes. - Thanks to Kemeng Shi for a lot of cleanup and bug fixes in the multi-block allocator. - Matthew wilcox has converted the code paths for reading and writing ext4 pages to use folios. - Jason Yan has continued to factor out ext4_fill_super() into smaller functions for improve ease of maintenance and comprehension. - Josh Triplett has created an uapi header for ext4 userspace API's" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (105 commits) ext4: Add a uapi header for ext4 userspace APIs ext4: remove useless conditional branch code ext4: remove unneeded check of nr_to_submit ext4: move dax and encrypt checking into ext4_check_feature_compatibility() ext4: factor out ext4_block_group_meta_init() ext4: move s_reserved_gdt_blocks and addressable checking into ext4_check_geometry() ext4: rename two functions with 'check' ext4: factor out ext4_flex_groups_free() ext4: use ext4_group_desc_free() in ext4_put_super() to save some duplicated code ext4: factor out ext4_percpu_param_init() and ext4_percpu_param_destroy() ext4: factor out ext4_hash_info_init() Revert "ext4: Fix warnings when freezing filesystem with journaled data" ext4: Update comment in mpage_prepare_extent_to_map() ext4: Simplify handling of journalled data in ext4_bmap() ext4: Drop special handling of journalled data from ext4_quota_on() ext4: Drop special handling of journalled data from ext4_evict_inode() ext4: Fix special handling of journalled data from extent zeroing ext4: Drop special handling of journalled data from extent shifting operations ext4: Drop special handling of journalled data from ext4_sync_file() ext4: Commit transaction before writing back pages in data=journal mode ...
-rw-r--r--Documentation/admin-guide/ext4.rst3
-rw-r--r--MAINTAINERS1
-rw-r--r--block/bio.c1
-rw-r--r--fs/ext4/balloc.c124
-rw-r--r--fs/ext4/bitmap.c13
-rw-r--r--fs/ext4/ext4.h114
-rw-r--r--fs/ext4/extents.c35
-rw-r--r--fs/ext4/fsync.c11
-rw-r--r--fs/ext4/ialloc.c14
-rw-r--r--fs/ext4/inline.c171
-rw-r--r--fs/ext4/inode.c810
-rw-r--r--fs/ext4/mballoc.c691
-rw-r--r--fs/ext4/mballoc.h17
-rw-r--r--fs/ext4/move_extent.c33
-rw-r--r--fs/ext4/page-io.c116
-rw-r--r--fs/ext4/readpage.c72
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c413
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/verity.c30
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/jbd2/transaction.c3
-rw-r--r--fs/netfs/buffered_read.c3
-rw-r--r--fs/nfs/file.c12
-rw-r--r--include/linux/fscrypt.h21
-rw-r--r--include/linux/page-flags.h5
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/trace/events/ext4.h7
-rw-r--r--include/uapi/linux/ext4.h117
-rw-r--r--mm/folio-compat.c4
30 files changed, 1413 insertions, 1441 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 4c559e08d11e..5740d85439ff 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -489,9 +489,6 @@ Files in /sys/fs/ext4/<devname>:
multiple of this tuning parameter if the stripe size is not set in the
ext4 superblock
- mb_max_inode_prealloc
- The maximum length of per-inode ext4_prealloc_space list.
-
mb_max_to_scan
The maximum number of extents the multiblock allocator will search to
find the best extent.
diff --git a/MAINTAINERS b/MAINTAINERS
index cd69cdfc23a2..731dbf5e6ec9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7745,6 +7745,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
F: Documentation/filesystems/ext4/
F: fs/ext4/
F: include/trace/events/ext4.h
+F: include/uapi/linux/ext4.h
Extended Verification Module (EVM)
M: Mimi Zohar <zohar@linux.ibm.com>
diff --git a/block/bio.c b/block/bio.c
index fd11614bba4d..043944fd46eb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
return false;
return bio_add_page(bio, &folio->page, len, off) > 0;
}
+EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8ff4b9192a9f..094269488183 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -80,32 +80,56 @@ static inline int ext4_block_in_group(struct super_block *sb,
return (actual_group == block_group) ? 1 : 0;
}
-/* Return the number of clusters used for file system metadata; this
+/*
+ * Return the number of clusters used for file system metadata; this
* represents the overhead needed by the file system.
*/
static unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
- unsigned num_clusters;
- int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
+ unsigned base_clusters, num_clusters;
+ int block_cluster = -1, inode_cluster;
+ int itbl_cluster_start = -1, itbl_cluster_end = -1;
ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
- ext4_fsblk_t itbl_blk;
+ ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+ ext4_fsblk_t itbl_blk_start, itbl_blk_end;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* This is the number of clusters used by the superblock,
* block group descriptors, and reserved block group
* descriptor blocks */
- num_clusters = ext4_num_base_meta_clusters(sb, block_group);
+ base_clusters = ext4_num_base_meta_clusters(sb, block_group);
+ num_clusters = base_clusters;
+
+ /*
+ * Account and record inode table clusters if any cluster
+ * is in the block group, or inode table cluster range is
+ * [-1, -1] and won't overlap with block/inode bitmap cluster
+ * accounted below.
+ */
+ itbl_blk_start = ext4_inode_table(sb, gdp);
+ itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
+ if (itbl_blk_start <= end && itbl_blk_end >= start) {
+ itbl_blk_start = itbl_blk_start >= start ?
+ itbl_blk_start : start;
+ itbl_blk_end = itbl_blk_end <= end ?
+ itbl_blk_end : end;
+
+ itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
+ itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
+
+ num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
+ /* check if border cluster is overlapped */
+ if (itbl_cluster_start == base_clusters - 1)
+ num_clusters--;
+ }
/*
- * For the allocation bitmaps and inode table, we first need
- * to check to see if the block is in the block group. If it
- * is, then check to see if the cluster is already accounted
- * for in the clusters used for the base metadata cluster, or
- * if we can increment the base metadata cluster to include
- * that block. Otherwise, we will have to track the cluster
- * used for the allocation bitmap or inode table explicitly.
+ * For the allocation bitmaps, we first need to check to see
+ * if the block is in the block group. If it is, then check
+ * to see if the cluster is already accounted for in the clusters
+ * used for the base metadata cluster and inode tables cluster.
* Normally all of these blocks are contiguous, so the special
* case handling shouldn't be necessary except for *very*
* unusual file system layouts.
@@ -113,46 +137,26 @@ static unsigned ext4_num_overhead_clusters(struct super_block *sb,
if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
block_cluster = EXT4_B2C(sbi,
ext4_block_bitmap(sb, gdp) - start);
- if (block_cluster < num_clusters)
- block_cluster = -1;
- else if (block_cluster == num_clusters) {
+ if (block_cluster >= base_clusters &&
+ (block_cluster < itbl_cluster_start ||
+ block_cluster > itbl_cluster_end))
num_clusters++;
- block_cluster = -1;
- }
}
if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
inode_cluster = EXT4_B2C(sbi,
ext4_inode_bitmap(sb, gdp) - start);
- if (inode_cluster < num_clusters)
- inode_cluster = -1;
- else if (inode_cluster == num_clusters) {
- num_clusters++;
- inode_cluster = -1;
- }
- }
-
- itbl_blk = ext4_inode_table(sb, gdp);
- for (i = 0; i < sbi->s_itb_per_group; i++) {
- if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
- c = EXT4_B2C(sbi, itbl_blk + i - start);
- if ((c < num_clusters) || (c == inode_cluster) ||
- (c == block_cluster) || (c == itbl_cluster))
- continue;
- if (c == num_clusters) {
- num_clusters++;
- continue;
- }
+ /*
+ * Additional check if inode bitmap is in just accounted
+ * block_cluster
+ */
+ if (inode_cluster != block_cluster &&
+ inode_cluster >= base_clusters &&
+ (inode_cluster < itbl_cluster_start ||
+ inode_cluster > itbl_cluster_end))
num_clusters++;
- itbl_cluster = c;
- }
}
- if (block_cluster != -1)
- num_clusters++;
- if (inode_cluster != -1)
- num_clusters++;
-
return num_clusters;
}
@@ -187,8 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
ASSERT(buffer_locked(bh));
- /* If checksum is bad mark all blocks used to prevent allocation
- * essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_mark_group_bitmap_corrupted(sb, block_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT |
@@ -350,13 +352,13 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
blk = ext4_inode_table(sb, desc);
offset = blk - group_first_block;
if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
- EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit)
+ EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
return blk;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
- EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
+ EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
EXT4_B2C(sbi, offset));
if (next_zero_bit <
- EXT4_B2C(sbi, offset + sbi->s_itb_per_group))
+ EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
/* bad bitmap for inode tables */
return blk;
return 0;
@@ -383,8 +385,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
ext4_lock_group(sb, block_group);
if (buffer_verified(bh))
goto verified;
- if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
- desc, bh) ||
+ if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
@@ -474,17 +475,19 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
goto out;
}
err = ext4_init_block_bitmap(sb, bh, block_group, desc);
- set_bitmap_uptodate(bh);
- set_buffer_uptodate(bh);
- set_buffer_verified(bh);
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
if (err) {
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
ext4_error(sb, "Failed to init block bitmap for group "
"%u: %d", block_group, err);
goto out;
}
- goto verify;
+ set_bitmap_uptodate(bh);
+ set_buffer_uptodate(bh);
+ set_buffer_verified(bh);
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
+ return bh;
}
ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) {
@@ -842,10 +845,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
if (!ext4_bg_has_super(sb, group))
return 0;
- if (ext4_has_feature_meta_bg(sb))
- return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
- else
- return EXT4_SB(sb)->s_gdb_count;
+ return EXT4_SB(sb)->s_gdb_count;
}
/**
@@ -887,11 +887,11 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
sbi->s_desc_per_block) {
if (num) {
- num += ext4_bg_num_gdb(sb, block_group);
+ num += ext4_bg_num_gdb_nometa(sb, block_group);
num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
}
} else { /* For META_BG_BLOCK_GROUPS */
- num += ext4_bg_num_gdb(sb, block_group);
+ num += ext4_bg_num_gdb_meta(sb, block_group);
}
return EXT4_NUM_B2C(sbi, num);
}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index f63e028c638c..cd725bebe69e 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -16,7 +16,7 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
}
-int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
@@ -38,7 +38,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
return provided == calculated;
}
-void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
{
@@ -54,7 +54,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
}
-int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+int ext4_block_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh)
{
@@ -74,13 +74,10 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
} else
calculated &= 0xFFFF;
- if (provided == calculated)
- return 1;
-
- return 0;
+ return provided == calculated;
}
-void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh)
{
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 08b29c289da4..18cb2680dc39 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -40,6 +40,7 @@
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
+#include <uapi/linux/ext4.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
@@ -591,17 +592,6 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(RESERVED);
}
-/* Used to pass group descriptor data when online resize is done */
-struct ext4_new_group_input {
- __u32 group; /* Group number for this data */
- __u64 block_bitmap; /* Absolute block number of block bitmap */
- __u64 inode_bitmap; /* Absolute block number of inode bitmap */
- __u64 inode_table; /* Absolute block number of inode table start */
- __u32 blocks_count; /* Total number of blocks in this group */
- __u16 reserved_blocks; /* Number of reserved blocks in this group */
- __u16 unused;
-};
-
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
struct compat_ext4_new_group_input {
u32 group;
@@ -698,70 +688,6 @@ enum {
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
-/*
- * ioctl commands
- */
-#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
-#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
-#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
-#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
-#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
-#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
-#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
-#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
-#define EXT4_IOC_MIGRATE _IO('f', 9)
- /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
- /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
-#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
-#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
-#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
-#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
-#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
-/* ioctl codes 19--39 are reserved for fscrypt */
-#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
-#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
-#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
-#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32)
-#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid)
-#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid)
-
-#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
-
-/*
- * Flags for going down operation
- */
-#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
-#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
-#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-
-/*
- * Flags returned by EXT4_IOC_GETSTATE
- *
- * We only expose to userspace a subset of the state flags in
- * i_state_flags
- */
-#define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001
-#define EXT4_STATE_FLAG_NEW 0x00000002
-#define EXT4_STATE_FLAG_NEWENTRY 0x00000004
-#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008
-
-/* flags for ioctl EXT4_IOC_CHECKPOINT */
-#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD 0x1
-#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT 0x2
-#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN 0x4
-#define EXT4_IOC_CHECKPOINT_FLAG_VALID (EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \
- EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \
- EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
-
-/*
- * Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID
- */
-struct fsuuid {
- __u32 fsu_len;
- __u32 fsu_flags;
- __u8 fsu_uuid[];
-};
-
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
@@ -776,12 +702,6 @@ struct fsuuid {
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif
-/*
- * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
- * It indicates that the entry in extent status cache is for a hole.
- */
-#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000
-
/* Max physical block we can address w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
@@ -852,15 +772,6 @@ struct ext4_inode {
__le32 i_projid; /* Project ID */
};
-struct move_extent {
- __u32 reserved; /* should be zero */
- __u32 donor_fd; /* donor file descriptor */
- __u64 orig_start; /* logical start offset in block for orig */
- __u64 donor_start; /* logical start offset in block for donor */
- __u64 len; /* block length to be moved */
- __u64 moved_len; /* moved block length */
-};
-
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
@@ -1120,8 +1031,8 @@ struct ext4_inode_info {
/* mballoc */
atomic_t i_prealloc_active;
- struct list_head i_prealloc_list;
- spinlock_t i_prealloc_lock;
+ struct rb_root i_prealloc_node;
+ rwlock_t i_prealloc_lock;
/* extents status tree */
struct ext4_es_tree i_es_tree;
@@ -1613,7 +1524,6 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
- unsigned int s_mb_max_inode_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
@@ -1887,7 +1797,6 @@ static inline void ext4_simulate_fail_bh(struct super_block *sb,
* Inode dynamic state flags
*/
enum {
- EXT4_STATE_JDATA, /* journaled data exists */
EXT4_STATE_NEW, /* inode is newly created */
EXT4_STATE_XATTR, /* has in-inode xattrs */
EXT4_STATE_NO_EXPAND, /* No space for expansion */
@@ -2676,16 +2585,16 @@ struct mmpd_data {
/* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
-void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
-int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
-void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh);
-int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+int ext4_block_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh);
@@ -3550,7 +3459,7 @@ extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
-extern int ext4_readpage_inline(struct inode *inode, struct page *page);
+int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
@@ -3647,7 +3556,7 @@ static inline void ext4_set_de_type(struct super_block *sb,
/* readpages.c */
extern int ext4_mpage_readpages(struct inode *inode,
- struct readahead_control *rac, struct page *page);
+ struct readahead_control *rac, struct folio *folio);
extern int __init ext4_init_post_read_processing(void);
extern void ext4_exit_post_read_processing(void);
@@ -3757,9 +3666,8 @@ extern void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc);
extern void ext4_end_io_rsv_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
-extern int ext4_bio_write_page(struct ext4_io_submit *io,
- struct page *page,
- int len);
+int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *page,
+ size_t len);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3559ea6b0781..e79c767cc5e0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4526,13 +4526,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
trace_ext4_zero_range(inode, offset, len, mode);
- /* Call ext4_force_commit to flush all data in case of data=journal. */
- if (ext4_should_journal_data(inode)) {
- ret = ext4_force_commit(inode->i_sb);
- if (ret)
- return ret;
- }
-
/*
* Round up offset. This is not fallocate, we need to zero out
* blocks, so convert interior block aligned part of the range to
@@ -4616,6 +4609,20 @@ static long ext4_zero_range(struct file *file, loff_t offset,
filemap_invalidate_unlock(mapping);
goto out_mutex;
}
+
+ /*
+ * For journalled data we need to write (and checkpoint) pages
+ * before discarding page cache to avoid inconsitent data on
+ * disk in case of crash before zeroing trans is committed.
+ */
+ if (ext4_should_journal_data(inode)) {
+ ret = filemap_write_and_wait_range(mapping, start, end);
+ if (ret) {
+ filemap_invalidate_unlock(mapping);
+ goto out_mutex;
+ }
+ }
+
/* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -5290,13 +5297,6 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
- /* Call ext4_force_commit to flush all data in case of data=journal. */
- if (ext4_should_journal_data(inode)) {
- ret = ext4_force_commit(inode->i_sb);
- if (ret)
- return ret;
- }
-
inode_lock(inode);
/*
* There is no need to overlap collapse range with EOF, in which case
@@ -5443,13 +5443,6 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
- /* Call ext4_force_commit to flush all data in case of data=journal */
- if (ext4_should_journal_data(inode)) {
- ret = ext4_force_commit(inode->i_sb);
- if (ret)
- return ret;
- }
-
inode_lock(inode);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 027a7d7037a0..f65fdb27ce14 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -153,23 +153,12 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
/*
- * data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
* Metadata is in the journal, we wait for proper transaction to
* commit here.
- *
- * data=journal:
- * filemap_fdatawrite won't do anything (the buffers are clean).
- * ext4_force_commit will write the file data into the journal and
- * will wait on that.
- * filemap_fdatawait() will encounter a ton of newly-dirtied pages
- * (they were dirtied by commit). But that's OK - the blocks are
- * safe in-journal, which is all fsync() needs to ensure.
*/
if (!sbi->s_journal)
ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
- else if (ext4_should_journal_data(inode))
- ret = ext4_force_commit(inode->i_sb);
else
ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 157663031f8c..787ab89c2c26 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -98,7 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh))
goto verified;
blk = ext4_inode_bitmap(sb, desc);
- if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
+ if (!ext4_inode_bitmap_csum_verify(sb, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8) ||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
ext4_unlock_group(sb, block_group);
@@ -327,7 +327,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
if (percpu_counter_initialized(&sbi->s_dirs_counter))
percpu_counter_dec(&sbi->s_dirs_counter);
}
- ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+ ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
@@ -813,8 +813,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
- ext4_block_bitmap_csum_set(sb, group, gdp,
- block_bitmap_bh);
+ ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
@@ -852,7 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
if (ext4_has_group_desc_csum(sb)) {
- ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
+ ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, group, gdp);
}
@@ -1165,8 +1164,7 @@ got:
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
- ext4_block_bitmap_csum_set(sb, group, gdp,
- block_bitmap_bh);
+ ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
@@ -1222,7 +1220,7 @@ got:
}
}
if (ext4_has_group_desc_csum(sb)) {
- ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
+ ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, group, gdp);
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1602d74b5eeb..b9fb1177fff6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -467,16 +467,16 @@ out:
return error;
}
-static int ext4_read_inline_page(struct inode *inode, struct page *page)
+static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
{
void *kaddr;
int ret = 0;
size_t len;
struct ext4_iloc iloc;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
BUG_ON(!ext4_has_inline_data(inode));
- BUG_ON(page->index);
+ BUG_ON(folio->index);
if (!EXT4_I(inode)->i_inline_off) {
ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
@@ -489,19 +489,20 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page)