summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-20 19:26:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-20 19:26:45 -0700
commit056f8c437dc33e9e8e64b9344e816d7d46c06c16 (patch)
tree068abc36c0ebd92b966e9cbd95b341775be2a867 /fs/ext4/inode.c
parent171754c3808214d4fd8843eab584599a429deb52 (diff)
parentff2beee206d23f49d022650122f81285849033e4 (diff)
downloadlinux-056f8c437dc33e9e8e64b9344e816d7d46c06c16.tar.gz
linux-056f8c437dc33e9e8e64b9344e816d7d46c06c16.tar.bz2
linux-056f8c437dc33e9e8e64b9344e816d7d46c06c16.zip
Merge tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Lots of cleanups and bug fixes this cycle, primarily in the block allocation, extent management, fast commit, and journalling" * tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (93 commits) ext4: convert EXT4_B2C(sbi->s_stripe) users to EXT4_NUM_B2C ext4: check stripe size compatibility on remount as well ext4: fix i_data_sem unlock order in ext4_ind_migrate() ext4: remove the special buffer dirty handling in do_journal_get_write_access ext4: fix a potential assertion failure due to improperly dirtied buffer ext4: hoist ext4_block_write_begin and replace the __block_write_begin ext4: persist the new uptodate buffers in ext4_journalled_zero_new_buffers ext4: dax: keep orphan list before truncate overflow allocated blocks ext4: fix error message when rejecting the default hash ext4: save unnecessary indentation in ext4_ext_create_new_leaf() ext4: make some fast commit functions reuse extents path ext4: refactor ext4_swap_extents() to reuse extents path ext4: get rid of ppath in convert_initialized_extent() ext4: get rid of ppath in ext4_ext_handle_unwritten_extents() ext4: get rid of ppath in ext4_ext_convert_to_initialized() ext4: get rid of ppath in ext4_convert_unwritten_extents_endio() ext4: get rid of ppath in ext4_split_convert_extents() ext4: get rid of ppath in ext4_split_extent() ext4: get rid of ppath in ext4_force_split_extent_at() ext4: get rid of ppath in ext4_split_extent_at() ...
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c292
1 files changed, 133 insertions, 159 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03374dc215d1..54bdd4884fe6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -49,6 +49,11 @@
#include <trace/events/ext4.h>
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct inode *inode,
+ struct folio *folio,
+ unsigned from, unsigned to);
+
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
@@ -478,7 +483,89 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
+ map->m_pblk, status, 0);
+ return retval;
+}
+
+static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, int flags)
+{
+ struct extent_status es;
+ unsigned int status;
+ int err, retval = 0;
+
+ /*
+ * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE
+ * indicates that the blocks and quotas has already been
+ * checked when the data was copied into the page cache.
+ */
+ if (map->m_flags & EXT4_MAP_DELAYED)
+ flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+
+ /*
+ * Here we clear m_flags because after allocating an new extent,
+ * it will be set again.
+ */
+ map->m_flags &= ~EXT4_MAP_FLAGS;
+
+ /*
+ * We need to check for EXT4 here because migrate could have
+ * changed the inode type in between.
+ */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ retval = ext4_ext_map_blocks(handle, inode, map, flags);
+ } else {
+ retval = ext4_ind_map_blocks(handle, inode, map, flags);
+
+ /*
+ * We allocated new blocks which will result in i_data's
+ * format changing. Force the migrate to fail by clearing
+ * migrate flags.
+ */
+ if (retval > 0 && map->m_flags & EXT4_MAP_NEW)
+ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
+ }
+ if (retval <= 0)
+ return retval;
+
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode %lu: "
+ "retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
+ }
+
+ /*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed. We also have to
+ * unmap metadata before zeroing as otherwise writeback can
+ * overwrite zeros with stale data from block device.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) {
+ err = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk,
+ map->m_len);
+ if (err)
+ return err;
+ }
+
+ /*
+ * If the extent has been zeroed out, we don't need to update
+ * extent status tree.
+ */
+ if (flags & EXT4_GET_BLOCKS_PRE_IO &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_is_written(&es))
+ return retval;
+ }
+
+ status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status, flags);
+
return retval;
}
@@ -576,32 +663,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* file system block.
*/
down_read(&EXT4_I(inode)->i_data_sem);
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, 0);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, 0);
- }
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
+ retval = ext4_map_query_blocks(handle, inode, map);
up_read((&EXT4_I(inode)->i_data_sem));
found:
@@ -631,88 +693,13 @@ found:
return retval;
/*
- * Here we clear m_flags because after allocating an new extent,
- * it will be set again.
- */
- map->m_flags &= ~EXT4_MAP_FLAGS;
-
- /*
* New blocks allocate and/or writing to unwritten extent
* will possibly result in updating i_data, so we take
* the write lock of i_data_sem, and call get_block()
* with create == 1 flag.
*/
down_write(&EXT4_I(inode)->i_data_sem);
-
- /*
- * We need to check for EXT4 here because migrate
- * could have changed the inode type in between
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, flags);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, flags);
-
- if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
- /*
- * We allocated new blocks which will result in
- * i_data's format changing. Force the migrate
- * to fail by clearing migrate flags
- */
- ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- }
- }
-
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- /*
- * We have to zeroout blocks before inserting them into extent
- * status tree. Otherwise someone could look them up there and
- * use them before they are really zeroed. We also have to
- * unmap metadata before zeroing as otherwise writeback can
- * overwrite zeros with stale data from block device.
- */
- if (flags & EXT4_GET_BLOCKS_ZERO &&
- map->m_flags & EXT4_MAP_MAPPED &&
- map->m_flags & EXT4_MAP_NEW) {
- ret = ext4_issue_zeroout(inode, map->m_lblk,
- map->m_pblk, map->m_len);
- if (ret) {
- retval = ret;
- goto out_sem;
- }
- }
-
- /*
- * If the extent has been zeroed out, we don't need to update
- * extent status tree.
- */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
- ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- if (ext4_es_is_written(&es))
- goto out_sem;
- }
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
-
-out_sem:
+ retval = ext4_map_create_blocks(handle, inode, map, flags);
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
@@ -1018,32 +1005,16 @@ static int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh)
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- int dirty = buffer_dirty(bh);
- int ret;
-
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
- /*
- * __block_write_begin() could have dirtied some buffers. Clean
- * the dirty bit as jbd2_journal_get_write_access() could complain
- * otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_write_begin() isn't a real problem here as we clear
- * the bit before releasing a page lock and thus writeback cannot
- * ever write the buffer.
- */
- if (dirty)
- clear_buffer_dirty(bh);
BUFFER_TRACE(bh, "get write access");
- ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ return ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
- if (!ret && dirty)
- ret = ext4_dirty_journalled_data(handle, bh);
- return ret;
}
-#ifdef CONFIG_FS_ENCRYPTION
-static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
- get_block_t *get_block)
+int ext4_block_write_begin(handle_t *handle, struct folio *folio,
+ loff_t pos, unsigned len,
+ get_block_t *get_block)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
@@ -1056,6 +1027,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
struct buffer_head *bh, *head, *wait[2];
int nr_wait = 0;
int i;
+ bool should_journal_data = ext4_should_journal_data(inode);
BUG_ON(!folio_test_locked(folio));
BUG_ON(from > PAGE_SIZE);
@@ -1085,10 +1057,22 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
if (err)
break;
if (buffer_new(bh)) {
+ /*
+ * We may be zeroing partial buffers or all new
+ * buffers in case of failure. Prepare JBD2 for
+ * that.
+ */
+ if (should_journal_data)
+ do_journal_get_write_access(handle,
+ inode, bh);
if (folio_test_uptodate(folio)) {
- clear_buffer_new(bh);
+ /*
+ * Unlike __block_write_begin() we leave
+ * dirtying of new uptodate buffers to
+ * ->write_end() time or
+ * folio_zero_new_buffers().
+ */
set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
continue;
}
if (block_end > to || block_start < from)
@@ -1118,7 +1102,11 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err)) {
- folio_zero_new_buffers(folio, from, to);
+ if (should_journal_data)
+ ext4_journalled_zero_new_buffers(handle, inode, folio,
+ from, to);
+ else
+ folio_zero_new_buffers(folio, from, to);
} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
for (i = 0; i < nr_wait; i++) {
int err2;
@@ -1134,7 +1122,6 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
return err;
}
-#endif
/*
* To preserve ordering, it is essential that the hole instantiation and
@@ -1216,19 +1203,12 @@ retry_journal:
/* In case writeback began while the folio was unlocked */
folio_wait_stable(folio);
-#ifdef CONFIG_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
- ret = ext4_block_write_begin(folio, pos, len,
+ ret = ext4_block_write_begin(handle, folio, pos, len,
ext4_get_block_unwritten);
else
- ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
-#else
- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(folio, pos, len,
- ext4_get_block_unwritten);
- else
- ret = __block_write_begin(folio, pos, len, ext4_get_block);
-#endif
+ ret = ext4_block_write_begin(handle, folio, pos, len,
+ ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
folio_buffers(folio), from, to,
@@ -1241,7 +1221,7 @@ retry_journal:
folio_unlock(folio);
/*
- * __block_write_begin may have instantiated a few blocks
+ * ext4_block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
* i_size_read because we hold i_rwsem.
*
@@ -1388,9 +1368,9 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
size = min(to, block_end) - start;
folio_zero_range(folio, start, size);
- write_end_fn(handle, inode, bh);
}
clear_buffer_new(bh);
+ write_end_fn(handle, inode, bh);
}
}
block_start = block_end;
@@ -1661,7 +1641,7 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
int ret;
/* Has delalloc reservation? */
- if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
+ if (ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
return 1;
/* Already been allocated? */
@@ -1782,7 +1762,7 @@ found:
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
- if (ext4_es_is_delonly(&es)) {
+ if (ext4_es_is_delayed(&es)) {
map->m_flags |= EXT4_MAP_DELAYED;
return 0;
}
@@ -2217,11 +2197,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
* writeback and there is nothing we can do about it so it might result
* in data loss. So use reserved blocks to allocate metadata if
* possible.
- *
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
- * the blocks in question are delalloc blocks. This indicates
- * that the blocks and quotas has already been checked when
- * the data was copied into the page cache.
*/
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL |
@@ -2229,8 +2204,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
dioread_nolock = ext4_should_dioread_nolock(inode);
if (dioread_nolock)
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
- if (map->m_flags & BIT(BH_Delay))
- get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
if (err < 0)
@@ -2959,11 +2932,8 @@ retry:
if (IS_ERR(folio))
return PTR_ERR(folio);
-#ifdef CONFIG_FS_ENCRYPTION
- ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
-#else
- ret = __block_write_begin(folio, pos, len, ext4_da_get_block_prep);
-#endif
+ ret = ext4_block_write_begin(NULL, folio, pos, len,
+ ext4_da_get_block_prep);
if (ret < 0) {
folio_unlock(folio);
folio_put(folio);
@@ -4067,7 +4037,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
stop_block);
ext4_es_insert_extent(inode, first_block, hole_len, ~0,
- EXTENT_STATUS_HOLE);
+ EXTENT_STATUS_HOLE, 0);
up_write(&EXT4_I(inode)->i_data_sem);
}
ext4_fc_track_range(handle, inode, first_block, stop_block);
@@ -5276,8 +5246,9 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
unsigned offset;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- tid_t commit_tid = 0;
+ tid_t commit_tid;
int ret;
+ bool has_transaction;
offset = inode->i_size & (PAGE_SIZE - 1);
/*
@@ -5302,12 +5273,14 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
folio_put(folio);
if (ret != -EBUSY)
return;
- commit_tid = 0;
+ has_transaction = false;
read_lock(&journal->j_state_lock);
- if (journal->j_committing_transaction)
+ if (journal->j_committing_transaction) {
commit_tid = journal->j_committing_transaction->t_tid;
+ has_transaction = true;
+ }
read_unlock(&journal->j_state_lock);
- if (commit_tid)
+ if (has_transaction)
jbd2_log_wait_commit(journal, commit_tid);
}
}
@@ -6216,7 +6189,8 @@ retry_alloc:
if (folio_pos(folio) + len > size)
len = size - folio_pos(folio);
- err = __block_write_begin(folio, 0, len, ext4_get_block);
+ err = ext4_block_write_begin(handle, folio, 0, len,
+ ext4_get_block);
if (!err) {
ret = VM_FAULT_SIGBUS;
if (ext4_journal_folio_buffers(handle, folio, len))