summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2025-06-20 15:54:05 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-07-17 18:30:50 +0200
commit21ab2c7c9794494456ac571e0af1664ea44f4b30 (patch)
treec601f99a988d55bbb9e615f94cc73e8a4da3bd4d
parentd216d5a277de53f650ab9039215d02987c519b60 (diff)
downloadlinux-21ab2c7c9794494456ac571e0af1664ea44f4b30.tar.gz
linux-21ab2c7c9794494456ac571e0af1664ea44f4b30.tar.bz2
linux-21ab2c7c9794494456ac571e0af1664ea44f4b30.zip
btrfs: propagate last_unlink_trans earlier when doing a rmdir
[ Upstream commit c466e33e729a0ee017d10d919cba18f503853c60 ] In case the removed directory had a snapshot that was deleted, we are propagating its inode's last_unlink_trans to the parent directory after we removed the entry from the parent directory. This leaves a small race window where someone can log the parent directory after we removed the entry and before we updated last_unlink_trans, and as a result if we ever try to replay such a log tree, we will fail since we will attempt to remove a snapshot during log replay, which is currently not possible and results in the log replay (and mount) to fail. This is the type of failure described in commit 1ec9a1ae1e30 ("Btrfs: fix unreplayable log after snapshot delete + parent dir fsync"). So fix this by propagating the last_unlink_trans to the parent directory before we remove the entry from it. Fixes: 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink") Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--fs/btrfs/inode.c36
1 files changed, 18 insertions, 18 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e3e5bd4fb477..60079099e24b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4655,7 +4655,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
int err = 0;
struct btrfs_trans_handle *trans;
- u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -4666,6 +4665,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (IS_ERR(trans))
return PTR_ERR(trans);
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to its
+ * parent directory. This is to prevent an unrecoverable log tree in the
+ * case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ *
+ * This is because we can't unlink other roots when replaying the dir
+ * deletes for directory foo.
+ */
+ if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
+ BTRFS_I(dir)->last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
@@ -4675,28 +4691,12 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
- last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
/* now the directory is empty */
err = btrfs_unlink_inode(trans, BTRFS_I(dir),
BTRFS_I(d_inode(dentry)), dentry->d_name.name,
dentry->d_name.len);
- if (!err) {
+ if (!err)
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * Propagate the last_unlink_trans value of the deleted dir to
- * its parent directory. This is to prevent an unrecoverable
- * log tree in the case we do something like this:
- * 1) create dir foo
- * 2) create snapshot under dir foo
- * 3) delete the snapshot
- * 4) rmdir foo
- * 5) mkdir foo
- * 6) fsync foo or some file inside foo
- */
- if (last_unlink_trans >= trans->transid)
- BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
- }
out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);