diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 1488 |
1 files changed, 161 insertions, 1327 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4e3cd8971fc..7dc6f326936c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -42,55 +42,11 @@ #include "xfs_pnfs.h" #include "xfs_parent.h" #include "xfs_xattr.h" -#include "xfs_sb.h" +#include "xfs_inode_util.h" struct kmem_cache *xfs_inode_cache; /* - * helper function to extract extent size hint from inode - */ -xfs_extlen_t -xfs_get_extsz_hint( - struct xfs_inode *ip) -{ - /* - * No point in aligning allocations if we need to COW to actually - * write to them. - */ - if (xfs_is_always_cow_inode(ip)) - return 0; - if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) - return ip->i_extsize; - if (XFS_IS_REALTIME_INODE(ip) && - ip->i_mount->m_sb.sb_rextsize > 1) - return ip->i_mount->m_sb.sb_rextsize; - return 0; -} - -/* - * Helper function to extract CoW extent size hint from inode. - * Between the extent size hint and the CoW extent size hint, we - * return the greater of the two. If the value is zero (automatic), - * use the default size. - */ -xfs_extlen_t -xfs_get_cowextsz_hint( - struct xfs_inode *ip) -{ - xfs_extlen_t a, b; - - a = 0; - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) - a = ip->i_cowextsize; - b = xfs_get_extsz_hint(ip); - - a = max(a, b); - if (a == 0) - return XFS_DEFAULT_COWEXTSZ_HINT; - return a; -} - -/* * These two are wrapper routines around the xfs_ilock() routine used to * centralize some grungy code. They are used in places that wish to lock the * inode solely for reading the extents. The reason these places can't just @@ -567,55 +523,6 @@ xfs_lock_two_inodes( } } -uint -xfs_ip2xflags( - struct xfs_inode *ip) -{ - uint flags = 0; - - if (ip->i_diflags & XFS_DIFLAG_ANY) { - if (ip->i_diflags & XFS_DIFLAG_REALTIME) - flags |= FS_XFLAG_REALTIME; - if (ip->i_diflags & XFS_DIFLAG_PREALLOC) - flags |= FS_XFLAG_PREALLOC; - if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_XFLAG_IMMUTABLE; - if (ip->i_diflags & XFS_DIFLAG_APPEND) - flags |= FS_XFLAG_APPEND; - if (ip->i_diflags & XFS_DIFLAG_SYNC) - flags |= FS_XFLAG_SYNC; - if (ip->i_diflags & XFS_DIFLAG_NOATIME) - flags |= FS_XFLAG_NOATIME; - if (ip->i_diflags & XFS_DIFLAG_NODUMP) - flags |= FS_XFLAG_NODUMP; - if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) - flags |= FS_XFLAG_RTINHERIT; - if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) - flags |= FS_XFLAG_PROJINHERIT; - if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) - flags |= FS_XFLAG_NOSYMLINKS; - if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) - flags |= FS_XFLAG_EXTSIZE; - if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) - flags |= FS_XFLAG_EXTSZINHERIT; - if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) - flags |= FS_XFLAG_NODEFRAG; - if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) - flags |= FS_XFLAG_FILESTREAM; - } - - if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { - if (ip->i_diflags2 & XFS_DIFLAG2_DAX) - flags |= FS_XFLAG_DAX; - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) - flags |= FS_XFLAG_COWEXTSIZE; - } - - if (xfs_inode_has_attr_fork(ip)) - flags |= FS_XFLAG_HASATTR; - return flags; -} - /* * Lookups up an inode from "name". If ci_name is not NULL, then a CI match * is allowed, otherwise it has to be an exact match. If a CI match is found, @@ -657,97 +564,6 @@ out_unlock: return error; } -/* Propagate di_flags from a parent inode to a child inode. */ -static void -xfs_inode_inherit_flags( - struct xfs_inode *ip, - const struct xfs_inode *pip) -{ - unsigned int di_flags = 0; - xfs_failaddr_t failaddr; - umode_t mode = VFS_I(ip)->i_mode; - - if (S_ISDIR(mode)) { - if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - ip->i_extsize = pip->i_extsize; - } - if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - } else if (S_ISREG(mode)) { - if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && - xfs_has_realtime(ip->i_mount)) - di_flags |= XFS_DIFLAG_REALTIME; - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSIZE; - ip->i_extsize = pip->i_extsize; - } - } - if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && - xfs_inherit_noatime) - di_flags |= XFS_DIFLAG_NOATIME; - if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && - xfs_inherit_nodump) - di_flags |= XFS_DIFLAG_NODUMP; - if ((pip->i_diflags & XFS_DIFLAG_SYNC) && - xfs_inherit_sync) - di_flags |= XFS_DIFLAG_SYNC; - if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && - xfs_inherit_nosymlinks) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && - xfs_inherit_nodefrag) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - - ip->i_diflags |= di_flags; - - /* - * Inode verifiers on older kernels only check that the extent size - * hint is an integer multiple of the rt extent size on realtime files. - * They did not check the hint alignment on a directory with both - * rtinherit and extszinherit flags set. If the misaligned hint is - * propagated from a directory into a new realtime file, new file - * allocations will fail due to math errors in the rt allocator and/or - * trip the verifiers. Validate the hint settings in the new file so - * that we don't let broken hints propagate. - */ - failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, - VFS_I(ip)->i_mode, ip->i_diflags); - if (failaddr) { - ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | - XFS_DIFLAG_EXTSZINHERIT); - ip->i_extsize = 0; - } -} - -/* Propagate di_flags2 from a parent inode to a child inode. */ -static void -xfs_inode_inherit_flags2( - struct xfs_inode *ip, - const struct xfs_inode *pip) -{ - xfs_failaddr_t failaddr; - - if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; - ip->i_cowextsize = pip->i_cowextsize; - } - if (pip->i_diflags2 & XFS_DIFLAG2_DAX) - ip->i_diflags2 |= XFS_DIFLAG2_DAX; - - /* Don't let invalid cowextsize hints propagate. */ - failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, - VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); - if (failaddr) { - ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; - ip->i_cowextsize = 0; - } -} - /* * Initialise a newly allocated inode and return the in-core inode to the * caller locked exclusively. @@ -755,39 +571,15 @@ xfs_inode_inherit_flags2( * Caller is responsible for unlocking the inode manually upon return */ int -xfs_init_new_inode( - struct mnt_idmap *idmap, +xfs_icreate( struct xfs_trans *tp, - struct xfs_inode *pip, xfs_ino_t ino, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - bool init_xattrs, + const struct xfs_icreate_args *args, struct xfs_inode **ipp) { - struct inode *dir = pip ? VFS_I(pip) : NULL; struct xfs_mount *mp = tp->t_mountp; - struct xfs_inode *ip; - unsigned int flags; + struct xfs_inode *ip = NULL; int error; - struct timespec64 tv; - struct inode *inode; - - /* - * Protect against obviously corrupt allocation btree records. Later - * xfs_iget checks will catch re-allocation of other active in-memory - * and on-disk inodes. If we don't catch reallocating the parent inode - * here we will deadlock in xfs_iget() so we have to do these checks - * first. - */ - if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) { - xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); - xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), - XFS_SICK_AG_INOBT); - return -EFSCORRUPTED; - } /* * Get the in-core inode with the lock held exclusively to prevent @@ -798,96 +590,8 @@ xfs_init_new_inode( return error; ASSERT(ip != NULL); - inode = VFS_I(ip); - set_nlink(inode, nlink); - inode->i_rdev = rdev; - ip->i_projid = prid; - - if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { - inode_fsuid_set(inode, idmap); - inode->i_gid = dir->i_gid; - inode->i_mode = mode; - } else { - inode_init_owner(idmap, inode, dir, mode); - } - - /* - * If the group ID of the new file does not match the effective group - * ID or one of the supplementary group IDs, the S_ISGID bit is cleared - * (and only if the irix_sgid_inherit compatibility variable is set). - */ - if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && - !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) - inode->i_mode &= ~S_ISGID; - - ip->i_disk_size = 0; - ip->i_df.if_nextents = 0; - ASSERT(ip->i_nblocks == 0); - - tv = inode_set_ctime_current(inode); - inode_set_mtime_to_ts(inode, tv); - inode_set_atime_to_ts(inode, tv); - - ip->i_extsize = 0; - ip->i_diflags = 0; - - if (xfs_has_v3inodes(mp)) { - inode_set_iversion(inode, 1); - ip->i_cowextsize = 0; - ip->i_crtime = tv; - } - - flags = XFS_ILOG_CORE; - switch (mode & S_IFMT) { - case S_IFIFO: - case S_IFCHR: - case S_IFBLK: - case S_IFSOCK: - ip->i_df.if_format = XFS_DINODE_FMT_DEV; - flags |= XFS_ILOG_DEV; - break; - case S_IFREG: - case S_IFDIR: - if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) - xfs_inode_inherit_flags(ip, pip); - if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) - xfs_inode_inherit_flags2(ip, pip); - fallthrough; - case S_IFLNK: - ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; - ip->i_df.if_bytes = 0; - ip->i_df.if_data = NULL; - break; - default: - ASSERT(0); - } - - /* - * If we need to create attributes immediately after allocating the - * inode, initialise an empty attribute fork right now. We use the - * default fork offset for attributes here as we don't know exactly what - * size or how many attributes we might be adding. We can do this - * safely here because we know the data fork is completely empty and - * this saves us from needing to run a separate transaction to set the - * fork offset in the immediate future. - */ - if (init_xattrs) { - ip->i_forkoff = xfs_default_attroffset(ip) >> 3; - xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); - - if (!xfs_has_attr(mp)) { - spin_lock(&mp->m_sb_lock); - xfs_add_attr(mp); - spin_unlock(&mp->m_sb_lock); - xfs_log_sb(tp); - } - } - - /* - * Log the new values stuffed into the inode. - */ xfs_trans_ijoin(tp, ip, 0); - xfs_trans_log_inode(tp, ip, flags); + xfs_inode_init(tp, args, ip); /* now that we have an i_mode we can setup the inode structure */ xfs_setup_inode(ip); @@ -896,158 +600,60 @@ xfs_init_new_inode( return 0; } -/* - * Decrement the link count on an inode & log the change. If this causes the - * link count to go to zero, move the inode to AGI unlinked list so that it can - * be freed when the last active reference goes away via xfs_inactive(). - */ +/* Return dquots for the ids that will be assigned to a new file. */ int -xfs_droplink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - - if (inode->i_nlink == 0) { - xfs_info_ratelimited(tp->t_mountp, - "Inode 0x%llx link count dropped below zero. Pinning link count.", - ip->i_ino); - set_nlink(inode, XFS_NLINK_PINNED); - } - if (inode->i_nlink != XFS_NLINK_PINNED) - drop_nlink(inode); - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - if (inode->i_nlink) - return 0; - - return xfs_iunlink(tp, ip); -} - -/* - * Increment the link count on an inode & log the change. - */ -void -xfs_bumplink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - - if (inode->i_nlink == XFS_NLINK_PINNED - 1) - xfs_info_ratelimited(tp->t_mountp, - "Inode 0x%llx link count exceeded maximum. Pinning link count.", - ip->i_ino); - if (inode->i_nlink != XFS_NLINK_PINNED) - inc_nlink(inode); - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -} - -#ifdef CONFIG_XFS_LIVE_HOOKS -/* - * Use a static key here to reduce the overhead of directory live update hooks. - * If the compiler supports jump labels, the static branch will be replaced by - * a nop sled when there are no hook users. Online fsck is currently the only - * caller, so this is a reasonable tradeoff. - * - * Note: Patching the kernel code requires taking the cpu hotplug lock. Other - * parts of the kernel allocate memory with that lock held, which means that - * XFS callers cannot hold any locks that might be used by memory reclaim or - * writeback when calling the static_branch_{inc,dec} functions. - */ -DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); - -void -xfs_dir_hook_disable(void) -{ - xfs_hooks_switch_off(&xfs_dir_hooks_switch); -} - -void -xfs_dir_hook_enable(void) -{ - xfs_hooks_switch_on(&xfs_dir_hooks_switch); -} - -/* Call hooks for a directory update relating to a child dirent update. */ -inline void -xfs_dir_update_hook( - struct xfs_inode *dp, - struct xfs_inode *ip, - int delta, - const struct xfs_name *name) -{ - if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { - struct xfs_dir_update_params p = { - .dp = dp, - .ip = ip, - .delta = delta, - .name = name, - }; - struct xfs_mount *mp = ip->i_mount; - - xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); +xfs_icreate_dqalloc( + const struct xfs_icreate_args *args, + struct xfs_dquot **udqpp, + struct xfs_dquot **gdqpp, + struct xfs_dquot **pdqpp) +{ + struct inode *dir = VFS_I(args->pip); + kuid_t uid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID; + prid_t prid = 0; + unsigned int flags = XFS_QMOPT_QUOTALL; + + if (args->idmap) { + /* + * The uid/gid computation code must match what the VFS uses to + * assign i_[ug]id. INHERIT adjusts the gid computation for + * setgid/grpid systems. + */ + uid = mapped_fsuid(args->idmap, i_user_ns(dir)); + gid = mapped_fsgid(args->idmap, i_user_ns(dir)); + prid = xfs_get_initial_prid(args->pip); + flags |= XFS_QMOPT_INHERIT; } -} -/* Call the specified function during a directory update. */ -int -xfs_dir_hook_add( - struct xfs_mount *mp, - struct xfs_dir_hook *hook) -{ - return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); -} + *udqpp = *gdqpp = *pdqpp = NULL; -/* Stop calling the specified function during a directory update. */ -void -xfs_dir_hook_del( - struct xfs_mount *mp, - struct xfs_dir_hook *hook) -{ - xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); + return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp, + gdqpp, pdqpp); } -/* Configure directory update hook functions. */ -void -xfs_dir_hook_setup( - struct xfs_dir_hook *hook, - notifier_fn_t mod_fn) -{ - xfs_hook_setup(&hook->dirent_hook, mod_fn); -} -#endif /* CONFIG_XFS_LIVE_HOOKS */ - int xfs_create( - struct mnt_idmap *idmap, - struct xfs_inode *dp, + const struct xfs_icreate_args *args, struct xfs_name *name, - umode_t mode, - dev_t rdev, - bool init_xattrs, - xfs_inode_t **ipp) + struct xfs_inode **ipp) { - int is_dir = S_ISDIR(mode); + struct xfs_inode *dp = args->pip; + struct xfs_dir_update du = { + .dp = dp, + .name = name, + }; struct xfs_mount *mp = dp->i_mount; - struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; - int error; - bool unlock_dp_on_error = false; - prid_t prid; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; struct xfs_trans_res *tres; - uint resblks; xfs_ino_t ino; - struct xfs_parent_args *ppargs; + bool unlock_dp_on_error = false; + bool is_dir = S_ISDIR(args->mode); + uint resblks; + int error; trace_xfs_create(dp, name); @@ -1056,15 +662,8 @@ xfs_create( if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) return -EIO; - prid = xfs_get_initial_prid(dp); - - /* - * Make sure that we have allocated dquot(s) on disk. - */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), - mapped_fsgid(idmap, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + /* Make sure that we have allocated dquot(s) on disk. */ + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -1076,7 +675,7 @@ xfs_create( tres = &M_RES(mp)->tr_create; } - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto out_release_dquots; @@ -1105,10 +704,9 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); if (!error) - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, - is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); + error = xfs_icreate(tp, ino, args, &du.ip); if (error) goto out_trans_cancel; @@ -1121,38 +719,9 @@ xfs_create( */ xfs_trans_ijoin(tp, dp, 0); - error = xfs_dir_createname(tp, dp, name, ip->i_ino, - resblks - XFS_IALLOC_SPACE_RES(mp)); - if (error) { - ASSERT(error != -ENOSPC); + error = xfs_dir_create_child(tp, resblks, &du); + if (error) goto out_trans_cancel; - } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - - if (is_dir) { - error = xfs_dir_init(tp, ip, dp); - if (error) - goto out_trans_cancel; - - xfs_bumplink(tp, dp); - } - - /* - * If we have parent pointers, we need to add the attribute containing - * the parent information now. - */ - if (ppargs) { - error = xfs_parent_addname(tp, ppargs, dp, name, ip); - if (error) - goto out_trans_cancel; - } - - /* - * Create ip with a reference from dp, and add '.' and '..' references - * if it's a directory. - */ - xfs_dir_update_hook(dp, ip, 1, name); /* * If this is a synchronous mount, make sure that the @@ -1167,7 +736,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); + xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); error = xfs_trans_commit(tp); if (error) @@ -1177,10 +746,10 @@ xfs_create( xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); - *ipp = ip; - xfs_iunlock(ip, XFS_ILOCK_EXCL); + *ipp = du.ip; + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return 0; out_trans_cancel: @@ -1191,13 +760,13 @@ xfs_create( * setup of the inode and release the inode. This prevents recursive * transactions and deadlocks from xfs_inactive. */ - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_finish_inode_setup(ip); - xfs_irele(ip); + if (du.ip) { + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); + xfs_finish_inode_setup(du.ip); + xfs_irele(du.ip); } out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -1210,36 +779,28 @@ xfs_create( int xfs_create_tmpfile( - struct mnt_idmap *idmap, - struct xfs_inode *dp, - umode_t mode, - bool init_xattrs, + const struct xfs_icreate_args *args, struct xfs_inode **ipp) { + struct xfs_inode *dp = args->pip; struct xfs_mount *mp = dp->i_mount; struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; - int error; - prid_t prid; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; struct xfs_trans_res *tres; - uint resblks; xfs_ino_t ino; + uint resblks; + int error; + + ASSERT(args->flags & XFS_ICREATE_TMPFILE); if (xfs_is_shutdown(mp)) return -EIO; - prid = xfs_get_initial_prid(dp); - - /* - * Make sure that we have allocated dquot(s) on disk. - */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), - mapped_fsgid(idmap, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + /* Make sure that we have allocated dquot(s) on disk. */ + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -1251,10 +812,9 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); if (!error) - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, - 0, 0, prid, init_xattrs, &ip); + error = xfs_icreate(tp, ino, args, &ip); if (error) goto out_trans_cancel; @@ -1311,11 +871,15 @@ xfs_link( struct xfs_inode *sip, struct xfs_name *target_name) { + struct xfs_dir_update du = { + .dp = tdp, + .name = target_name, + .ip = sip, + }; struct xfs_mount *mp = tdp->i_mount; struct xfs_trans *tp; int error, nospace_error = 0; int resblks; - struct xfs_parent_args *ppargs; trace_xfs_link(tdp, target_name); @@ -1334,7 +898,7 @@ xfs_link( if (error) goto std_return; - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto std_return; @@ -1349,7 +913,7 @@ xfs_link( * pointers are enabled because we can't back out if the xattrs must * grow. */ - if (ppargs && nospace_error) { + if (du.ppargs && nospace_error) { error = nospace_error; goto error_return; } @@ -1376,47 +940,9 @@ xfs_link( } } - if (!resblks) { - error = xfs_dir_canenter(tp, tdp, target_name); - if (error) - goto error_return; - } - - /* - * Handle initial link state of O_TMPFILE inode - */ - if (VFS_I(sip)->i_nlink == 0) { - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino)); - error = xfs_iunlink_remove(tp, pag, sip); - xfs_perag_put(pag); - if (error) - goto error_return; - } - - error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, - resblks); + error = xfs_dir_add_child(tp, resblks, &du); if (error) goto error_return; - xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); - - xfs_bumplink(tp, sip); - - /* - * If we have parent pointers, we now need to add the parent record to - * the attribute fork of the inode. If this is the initial parent - * attribute, we need to create it correctly, otherwise we can just add - * the parent to the inode. - */ - if (ppargs) { - error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip); - if (error) - goto error_return; - } - - xfs_dir_update_hook(tdp, sip, 1, target_name); /* * If this is a synchronous mount, make sure that the @@ -1429,7 +955,7 @@ xfs_link( error = xfs_trans_commit(tp); xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return error; error_return: @@ -1437,7 +963,7 @@ xfs_link( xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); std_return: if (error == -ENOSPC && nospace_error) error = nospace_error; @@ -2024,39 +1550,6 @@ out: } /* - * In-Core Unlinked List Lookups - * ============================= - * - * Every inode is supposed to be reachable from some other piece of metadata - * with the exception of the root directory. Inodes with a connection to a - * file descriptor but not linked from anywhere in the on-disk directory tree - * are collectively known as unlinked inodes, though the filesystem itself - * maintains links to these inodes so that on-disk metadata are consistent. - * - * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI - * header contains a number of buckets that point to an inode, and each inode - * record has a pointer to the next inode in the hash chain. This - * singly-linked list causes scaling problems in the iunlink remove function - * because we must walk that list to find the inode that points to the inode - * being removed from the unlinked hash bucket list. - * - * Hence we keep an in-memory double linked list to link each inode on an - * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer - * based lists would require having 64 list heads in the perag, one for each - * list. This is expensive in terms of memory (think millions of AGs) and cache - * misses on lookups. Instead, use the fact that inodes on the unlinked list - * must be referenced at the VFS level to keep them on the list and hence we - * have an existence guarantee for inodes on the unlinked list. - * - * Given we have an existence guarantee, we can use lockless inode cache lookups - * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode - * for the double linked unlinked list, and we don't need any extra locking to - * keep the list safe as all manipulations are done under the AGI buffer lock. - * Keeping the list up to date does not require memory allocation, just finding - * the XFS inode and updating the next/prev unlinked list aginos. - */ - -/* * Find an inode on the unlinked list. This does not take references to the * inode as we have existence guarantees by holding the AGI buffer lock and that * only unlinked, referenced inodes can be on the unlinked inode list. If we @@ -2091,75 +1584,11 @@ xfs_iunlink_lookup( } /* - * Update the prev pointer of the next agino. Returns -ENOLINK if the inode - * is not in cache. - */ -static int -xfs_iunlink_update_backref( - struct xfs_perag *pag, - xfs_agino_t prev_agino, - xfs_agino_t next_agino) -{ - struct xfs_inode *ip; - - /* No update necessary if we are at the end of the list. */ - if (next_agino == NULLAGINO) - return 0; - - ip = xfs_iunlink_lookup(pag, next_agino); - if (!ip) - return -ENOLINK; - - ip->i_prev_unlinked = prev_agino; - return 0; -} - -/* - * Point the AGI unlinked bucket at an inode and log the results. The caller - * is responsible for validating the old value. - */ -STATIC int -xfs_iunlink_update_bucket( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - unsigned int bucket_index, - xfs_agino_t new_agino) -{ - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t old_value; - int offset; - - ASSERT(xfs_verify_agino_or_null(pag, new_agino)); - - old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); - trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, - old_value, new_agino); - - /* - * We should never find the head of the list already set to the value - * passed in because either we're adding or removing ourselves from the - * head of the list. - */ - if (old_value == new_agino) { - xfs_buf_mark_corrupt(agibp); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); - offset = offsetof(struct xfs_agi, agi_unlinked) + - (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); - return 0; -} - -/* * Load the inode @next_agino into the cache and set its prev_unlinked pointer * to @prev_agino. Caller must hold the AGI to synchronize with other changes * to the unlinked list. */ -STATIC int +int xfs_iunlink_reload_next( struct xfs_trans *tp, struct xfs_buf *agibp, @@ -2215,187 +1644,6 @@ rele: return error; } -static int -xfs_iunlink_insert_inode( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t next_agino; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - int error; - - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the pointer isn't garbage and that this inode - * isn't already on the list. - */ - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (next_agino == agino || - !xfs_verify_agino_or_null(pag, next_agino)) { - xfs_buf_mark_corrupt(agibp); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - /* - * Update the prev pointer in the next inode to point back to this - * inode. - */ - error = xfs_iunlink_update_backref(pag, agino, next_agino); - if (error == -ENOLINK) - error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); - if (error) - return error; - - if (next_agino != NULLAGINO) { - /* - * There is already another inode in the bucket, so point this - * inode to the current head of the list. - */ - error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); - if (error) - return error; - ip->i_next_unlinked = next_agino; - } - - /* Point the head of the list to point to this inode. */ - ip->i_prev_unlinked = NULLAGINO; - return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); -} - -/* - * This is called when the inode's link count has gone to 0 or we are creating - * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. - * - * We place the on-disk inode on a list in the AGI. It will be pulled from this - * list when the inode is freed. - */ -int -xfs_iunlink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; - struct xfs_buf *agibp; - int error; - - ASSERT(VFS_I(ip)->i_nlink == 0); - ASSERT(VFS_I(ip)->i_mode != 0); - trace_xfs_iunlink(ip); - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, 0, &agibp); - if (error) - goto out; - - error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); -out: - xfs_perag_put(pag); - return error; -} - -static int -xfs_iunlink_remove_inode( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - xfs_agino_t head_agino; - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - int error; - - trace_xfs_iunlink_remove(ip); - - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the head pointer isn't garbage. - */ - head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (!xfs_verify_agino(pag, head_agino)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - agi, sizeof(*agi)); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - /* - * Set our inode's next_unlinked pointer to NULL and then return - * the old pointer value so that we can update whatever was previous - * to us in the list to point to whatever was next in the list. - */ - error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); - if (error) - return error; - - /* - * Update the prev pointer in the next inode to point back to previous - * inode in the chain. - */ - error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, - ip->i_next_unlinked); - if (error == -ENOLINK) - error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, - ip->i_next_unlinked); - if (error) - return error; - - if (head_agino != agino) { - struct xfs_inode *prev_ip; - - prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); - if (!prev_ip) { - xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); - return -EFSCORRUPTED; - } - - error = xfs_iunlink_log_inode(tp, prev_ip, pag, - ip->i_next_unlinked); - prev_ip->i_next_unlinked = ip->i_next_unlinked; - } else { - /* Point the head of the list to the next unlinked inode. */ - error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, - ip->i_next_unlinked); - } - - ip->i_next_unlinked = NULLAGINO; - ip->i_prev_unlinked = 0; - return error; -} - -/* - * Pull the on-disk inode from the AGI unlinked list. - */ -int -xfs_iunlink_remove( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_inode *ip) -{ - struct xfs_buf *agibp; - int error; - - trace_xfs_iunlink_remove(ip); - - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, 0, &agibp); - if (error) - return error; - - return xfs_iunlink_remove_inode(tp, pag, agibp, ip); -} - /* * Look up the inode number specified and if it is not already marked XFS_ISTALE * mark it stale. We should only find clean inodes in this lookup that aren't @@ -2614,36 +1862,10 @@ xfs_ifree( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - /* - * Free the inode first so that we guarantee that the AGI lock is going - * to be taken before we remove the inode from the unlinked list. This - * makes the AGI lock -> unlinked list modification order the same as - * used in O_TMPFILE creation. - */ - error = xfs_difree(tp, pag, ip->i_ino, &xic); - if (error) - goto out; |