diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 12:57:48 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 12:57:48 -0700 |
commit | bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0 (patch) | |
tree | 791228dc4eb6d90e2c27295930449b06f6952ad3 /fs/xfs/libxfs/xfs_alloc.c | |
parent | 0260b0a7445c62a08938fa66fad256e5d0779817 (diff) | |
parent | 2bf6e353542d233486195953dc9c346331f82dcb (diff) | |
download | linux-bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0.tar.gz linux-bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0.tar.bz2 linux-bf3aa9de7ba57c2c7b5ea70c1ad3a6670cd6fcb0.zip |
Merge tag 'xfs-6.11-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Chandan Babu:
"Major changes in this release are limited to enabling FITRIM on
realtime devices and Byte-based grant head log reservation tracking.
The remaining changes are limited to fixes and cleanups included in
this pull request.
Core:
- Enable FITRIM on the realtime device
- Introduce byte-based grant head log reservation tracking instead of
physical log location tracking.
This allows grant head to track a full 64 bit bytes space and hence
overcome the limit of 4GB indexing that has been present until now
Fixes:
- xfs_flush_unmap_range() and xfs_prepare_shift() should consider RT
extents in the flush unmap range
- Implement bounds check when traversing log operations during log
replay
- Prevent out of bounds access when traversing a directory data block
- Prevent incorrect ENOSPC when concurrently performing file creation
and file writes
- Fix rtalloc rotoring when delalloc is in use
Cleanups:
- Clean up I/O path inode locking helpers and the page fault handler
- xfs: hoist inode operations to libxfs in anticipation of the
metadata inode directory feature, which maintains a directory tree
of metadata inodes. This will be necessary for further enhancements
to the realtime feature, subvolume support
- Clean up some warts in the extent freeing log intent code
- Clean up the refcount and rmap intent code before adding support
for realtime devices
- Provide the correct email address for sysfs ABI documentation"
* tag 'xfs-6.11-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (80 commits)
xfs: fix rtalloc rotoring when delalloc is in use
xfs: get rid of xfs_ag_resv_rmapbt_alloc
xfs: skip flushing log items during push
xfs: grant heads track byte counts, not LSNs
xfs: pass the full grant head to accounting functions
xfs: track log space pinned by the AIL
xfs: collapse xlog_state_set_callback in caller
xfs: l_last_sync_lsn is really AIL state
xfs: ensure log tail is always up to date
xfs: background AIL push should target physical space
xfs: AIL doesn't need manual pushing
xfs: move and rename xfs_trans_committed_bulk
xfs: fix the contact address for the sysfs ABI documentation
xfs: Avoid races with cnt_btree lastrec updates
xfs: move xfs_refcount_update_defer_add to xfs_refcount_item.c
xfs: simplify usage of the rcur local variable in xfs_refcount_finish_one
xfs: don't bother calling xfs_refcount_finish_one_cleanup in xfs_refcount_finish_one
xfs: reuse xfs_refcount_update_cancel_item
xfs: add a ci_entry helper
xfs: remove xfs_trans_set_refcount_flags
...
Diffstat (limited to 'fs/xfs/libxfs/xfs_alloc.c')
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 235 |
1 files changed, 139 insertions, 96 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6c55a6e88eba..59326f84f6a5 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,6 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" #include "xfs_health.h" +#include "xfs_extfree_item.h" struct kmem_cache *xfs_extfree_item_cache; @@ -466,6 +467,97 @@ xfs_alloc_fix_len( } /* + * Determine if the cursor points to the block that contains the right-most + * block of records in the by-count btree. This block contains the largest + * contiguous free extent in the AG, so if we modify a record in this block we + * need to call xfs_alloc_fixup_longest() once the modifications are done to + * ensure the agf->agf_longest field is kept up to date with the longest free + * extent tracked by the by-count btree. + */ +static bool +xfs_alloc_cursor_at_lastrec( + struct xfs_btree_cur *cnt_cur) +{ + struct xfs_btree_block *block; + union xfs_btree_ptr ptr; + struct xfs_buf *bp; + + block = xfs_btree_get_block(cnt_cur, 0, &bp); + + xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB); + return xfs_btree_ptr_is_null(cnt_cur, &ptr); +} + +/* + * Find the rightmost record of the cntbt, and return the longest free space + * recorded in it. Simply set both the block number and the length to their + * maximum values before searching. + */ +static int +xfs_cntbt_longest( + struct xfs_btree_cur *cnt_cur, + xfs_extlen_t *longest) +{ + struct xfs_alloc_rec_incore irec; + union xfs_btree_rec *rec; + int stat = 0; + int error; + + memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec)); + error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat); + if (error) + return error; + if (!stat) { + /* totally empty tree */ + *longest = 0; + return 0; + } + + error = xfs_btree_get_rec(cnt_cur, &rec, &stat); + if (error) + return error; + if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) { + xfs_btree_mark_sick(cnt_cur); + return -EFSCORRUPTED; + } + + xfs_alloc_btrec_to_irec(rec, &irec); + *longest = irec.ar_blockcount; + return 0; +} + +/* + * Update the longest contiguous free extent in the AG from the by-count cursor + * that is passed to us. This should be done at the end of any allocation or + * freeing operation that touches the longest extent in the btree. + * + * Needing to update the longest extent can be determined by calling + * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record + * modification but before the modification begins. + */ +static int +xfs_alloc_fixup_longest( + struct xfs_btree_cur *cnt_cur) +{ + struct xfs_perag *pag = cnt_cur->bc_ag.pag; + struct xfs_buf *bp = cnt_cur->bc_ag.agbp; + struct xfs_agf *agf = bp->b_addr; + xfs_extlen_t longest = 0; + int error; + + /* Lookup last rec in order to update AGF. */ + error = xfs_cntbt_longest(cnt_cur, &longest); + if (error) + return error; + + pag->pagf_longest = longest; + agf->agf_longest = cpu_to_be32(pag->pagf_longest); + xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST); + + return 0; +} + +/* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the * actual (current) free extent fbno for flen blocks. @@ -489,6 +581,7 @@ xfs_alloc_fixup_trees( xfs_extlen_t nflen1=0; /* first new free length */ xfs_extlen_t nflen2=0; /* second new free length */ struct xfs_mount *mp; + bool fixup_longest = false; mp = cnt_cur->bc_mp; @@ -577,6 +670,10 @@ xfs_alloc_fixup_trees( nfbno2 = rbno + rlen; nflen2 = (fbno + flen) - nfbno2; } + + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) + fixup_longest = true; + /* * Delete the entry from the by-size btree. */ @@ -654,6 +751,10 @@ xfs_alloc_fixup_trees( return -EFSCORRUPTED; } } + + if (fixup_longest) + return xfs_alloc_fixup_longest(cnt_cur); + return 0; } @@ -1932,7 +2033,7 @@ out_nominleft: /* * Free the extent starting at agno/bno for length. */ -STATIC int +int xfs_free_ag_extent( struct xfs_trans *tp, struct xfs_buf *agbp, @@ -1956,6 +2057,7 @@ xfs_free_ag_extent( int i; int error; struct xfs_perag *pag = agbp->b_pag; + bool fixup_longest = false; bno_cur = cnt_cur = NULL; mp = tp->t_mountp; @@ -2219,8 +2321,13 @@ xfs_free_ag_extent( } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); bno_cur = NULL; + /* * In all cases we need to insert the new freespace in the by-size tree. + * + * If this new freespace is being inserted in the block that contains + * the largest free space in the btree, make sure we also fix up the + * agf->agf-longest tracker field. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; @@ -2229,6 +2336,8 @@ xfs_free_ag_extent( error = -EFSCORRUPTED; goto error0; } + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) + fixup_longest = true; if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { @@ -2236,6 +2345,12 @@ xfs_free_ag_extent( error = -EFSCORRUPTED; goto error0; } + if (fixup_longest) { + error = xfs_alloc_fixup_longest(cnt_cur); + if (error) + goto error0; + } + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; @@ -2422,32 +2537,6 @@ xfs_alloc_space_available( return true; } -int -xfs_free_agfl_block( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agblock_t agbno, - struct xfs_buf *agbp, - struct xfs_owner_info *oinfo) -{ - int error; - struct xfs_buf *bp; - - error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo, - XFS_AG_RESV_AGFL); - if (error) - return error; - - error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp, - XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno), - tp->t_mountp->m_bsize, 0, &bp); - if (error) - return error; - xfs_trans_binval(tp, bp); - - return 0; -} - /* * Check the agfl fields of the agf for inconsistency or corruption. * @@ -2536,48 +2625,6 @@ xfs_agfl_reset( } /* - * Defer an AGFL block free. This is effectively equivalent to - * xfs_free_extent_later() with some special handling particular to AGFL blocks. - * - * Deferring AGFL frees helps prevent log reservation overruns due to too many - * allocation operations in a transaction. AGFL frees are prone to this problem - * because for one they are always freed one at a time. Further, an immediate - * AGFL block free can cause a btree join and require another block free before - * the real allocation can proceed. Deferring the free disconnects freeing up - * the AGFL slot from freeing the block. - */ -static int -xfs_defer_agfl_block( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agblock_t agbno, - struct xfs_owner_info *oinfo) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent_free_item *xefi; - xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); - - ASSERT(xfs_extfree_item_cache != NULL); - ASSERT(oinfo != NULL); - - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) - return -EFSCORRUPTED; - - xefi = kmem_cache_zalloc(xfs_extfree_item_cache, - GFP_KERNEL | __GFP_NOFAIL); - xefi->xefi_startblock = fsbno; - xefi->xefi_blockcount = 1; - xefi->xefi_owner = oinfo->oi_owner; - xefi->xefi_agresv = XFS_AG_RESV_AGFL; - - trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); - - xfs_extent_free_get_group(mp, xefi); - xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type); - return 0; -} - -/* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ @@ -2588,28 +2635,15 @@ xfs_defer_extent_free( xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, - bool skip_discard, + unsigned int free_flags, struct xfs_defer_pending **dfpp) { struct xfs_extent_free_item *xefi; struct xfs_mount *mp = tp->t_mountp; -#ifdef DEBUG - xfs_agnumber_t agno; - xfs_agblock_t agbno; - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_extfree_item_cache != NULL); - ASSERT(type != XFS_AG_RESV_AGFL); + ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; @@ -2619,7 +2653,7 @@ xfs_defer_extent_free( xefi->xefi_startblock = bno; xefi->xefi_blockcount = (xfs_extlen_t)len; xefi->xefi_agresv = type; - if (skip_discard) + if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { ASSERT(oinfo->oi_offset == 0); @@ -2632,12 +2666,8 @@ xfs_defer_extent_free( } else { xefi->xefi_owner = XFS_RMAP_OWN_NULL; } - trace_xfs_bmap_free_defer(mp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_extent_free_get_group(mp, xefi); - *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type); + xfs_extent_free_defer_add(tp, xefi, dfpp); return 0; } @@ -2648,11 +2678,11 @@ xfs_free_extent_later( xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, - bool skip_discard) + unsigned int free_flags) { struct xfs_defer_pending *dontcare = NULL; - return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard, + return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags, &dontcare); } @@ -2677,13 +2707,13 @@ xfs_free_extent_later( int xfs_alloc_schedule_autoreap( const struct xfs_alloc_arg *args, - bool skip_discard, + unsigned int free_flags, struct xfs_alloc_autoreap *aarp) { int error; error = xfs_defer_extent_free(args->tp, args->fsbno, args->len, - &args->oinfo, args->resv, skip_discard, &aarp->dfp); + &args->oinfo, args->resv, free_flags, &aarp->dfp); if (error) return error; @@ -2895,8 +2925,21 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; - /* defer agfl frees */ - error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + /* + * Defer the AGFL block free. + * + * This helps to prevent log reservation overruns due to too + * many allocation operations in a transaction. AGFL frees are + * prone to this problem because for one they are always freed + * one at a time. Further, an immediate AGFL block free can + * cause a btree join and require another block free before the + * real allocation can proceed. + * Deferring the free disconnects freeing up the AGFL slot from + * freeing the block. + */ + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, args->agno, bno), 1, + &targs.oinfo, XFS_AG_RESV_AGFL, 0); if (error) goto out_agbp_relse; } |