// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_error.h"
#include "xfs_rtbitmap.h"
#include <linux/iversion.h>
struct kmem_cache *xfs_ili_cache; /* inode log item */
static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_inode_log_item, ili_item);
}
static uint64_t
xfs_inode_item_sort(
struct xfs_log_item *lip)
{
return INODE_ITEM(lip)->ili_inode->i_ino;
}
/*
* Prior to finally logging the inode, we have to ensure that all the
* per-modification inode state changes are applied. This includes VFS inode
* state updates, format conversions, verifier state synchronisation and
* ensuring the inode buffer remains in memory whilst the inode is dirty.
*
* We have to be careful when we grab the inode cluster buffer due to lock
* ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
* require AGI -> inode cluster buffer lock order. The inode cluster buffer is
* not locked until ->precommit, so it happens after everything else has been
* modified.
*
* Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
* have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
* cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
* it can be called on a inode (e.g. via bumplink/droplink) before we take the
* AGF lock modifying directory blocks.
*
* Rather than force a complete rework of all the transactions to call
* xfs_trans_log_inode() once and once only at the end of every transaction, we
* move the pinning of the inode cluster buffer to a ->precommit operation. This
* matches how the xfs_iunlink_item locks the inode cluster buffer, and it
* ensures that the inode cluster buffer locking is always done last in a
* transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
* cluster buffer.
*
* If we return the inode number as the precommit sort key then we'll also
* guarantee that the order all inode cluster buffer locking is the same all the
* inodes and unlink items in the transaction.
*/
static int
xfs_inode_item_precommit(
struct xfs_trans *tp,
struct xfs_log_item *lip)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
struct inode *inode = VFS_I(ip);
unsigned int flags = iip->ili_dirty_flags;
/*
* Don't bother with i_lock for the I_DIRTY_TIME check here, as races
* don't matter - we either will need an extra transaction in 24 hours
* to log the timestamps, or will clear already cleared fields in the
* worst case.
*/
if (inode->i_state & I_DIRTY_TIME) {
spin_lock(&inode->i_lock);
inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
}
/*
* If we're updating the inode core or the timestamps and it's possible
* to upgrade this inode to bigtime format, do so now.
*/
if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
xfs_has_bigtime(ip->i_mount) &&
!xfs_inode_has_bigtime(ip)) {
ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
flags |= XFS_ILOG_CORE;
}
/*
* Inode verifiers do not check that the extent size hint is an integer
* multiple of the rt extent size on a directory with both rtinherit
* and extszinherit flags set. If we're logging a directory that is
* misconfigured in this way, clear the hint.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
flags |= XFS_ILOG_CORE;
}
/*
* Record the specific change for fdatasync optimisation. This allows
* fdatasync to skip log forces for inodes that are only timestamp
* dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
* to XFS_ILOG_CORE so that the actual on-disk dirty tracking
* (ili_fields) correctly tracks that the version has changed.
*/
spin_lock(&iip->ili_lock);
iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
if (flags & XFS_ILOG_IVERSION)
flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
if (!iip->ili_item.li_buf) {
struct xfs_buf *bp;
int error;
/*
* We hold the ILOCK here, so this inode is not going to be
* flushed while we are here. Further, because there is no
* buffer attached to the item, we know that there is no IO in
* progress, so nothing will clear the ili_fields while we read
* in the buffer. Hence we can safely drop the spin lock and
* read the buffer knowing that the state will not change from
* here.
*/
spin_unlock(&iip->ili_lock);
error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
if (error)
return error;
/*
* We need an explicit buffer reference for the log item but
* don't want the buffer to remain attached to the transaction.
* Hold the buffer but release the transaction reference once
* we've attached the inode log item to the buffer log item
* list.
*/
xfs_buf_hold(bp);
spin_lock(&iip->ili_lock);
iip->ili_item.li_buf = bp;
bp->b_flags |= _XBF_INODES;
list_add_tail(&iip->ili_item.