diff options
| author | Dave Chinner <david@fromorbit.com> | 2014-05-15 09:37:44 +1000 |
|---|---|---|
| committer | Dave Chinner <david@fromorbit.com> | 2014-05-15 09:37:44 +1000 |
| commit | b76769294ba400415fc44038c21cc2df86f9a28b (patch) | |
| tree | a1ca6152391c86ae024e09d69e7b0ad75a5c61d5 /fs | |
| parent | 232c2f5c65dd80055e7163a5c82e3816119330e6 (diff) | |
| parent | 53801fd97ae000793f51187b122b9475102199a8 (diff) | |
| download | linux-b76769294ba400415fc44038c21cc2df86f9a28b.tar.gz linux-b76769294ba400415fc44038c21cc2df86f9a28b.tar.bz2 linux-b76769294ba400415fc44038c21cc2df86f9a28b.zip | |
Merge branch 'xfs-free-inode-btree' into for-next
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/xfs/xfs_ag.h | 36 | ||||
| -rw-r--r-- | fs/xfs/xfs_btree.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_btree.h | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_format.h | 14 | ||||
| -rw-r--r-- | fs/xfs/xfs_fs.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsops.c | 36 | ||||
| -rw-r--r-- | fs/xfs/xfs_ialloc.c | 695 | ||||
| -rw-r--r-- | fs/xfs/xfs_ialloc_btree.c | 68 | ||||
| -rw-r--r-- | fs/xfs/xfs_ialloc_btree.h | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 28 | ||||
| -rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_log_recover.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_sb.h | 10 | ||||
| -rw-r--r-- | fs/xfs/xfs_stats.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_stats.h | 18 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_resv.c | 53 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_space.h | 7 | ||||
| -rw-r--r-- | fs/xfs/xfs_types.h | 2 |
18 files changed, 852 insertions, 137 deletions
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 0fdd4109c624..6e247a99f5db 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -160,30 +160,38 @@ typedef struct xfs_agi { * still being referenced. */ __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; - + /* + * This marks the end of logging region 1 and start of logging region 2. + */ uuid_t agi_uuid; /* uuid of filesystem */ __be32 agi_crc; /* crc of agi sector */ __be32 agi_pad32; __be64 agi_lsn; /* last write sequence */ + __be32 agi_free_root; /* root of the free inode btree */ + __be32 agi_free_level;/* levels in free inode btree */ + /* structure must be padded to 64 bit alignment */ } xfs_agi_t; #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) -#define XFS_AGI_MAGICNUM 0x00000001 -#define XFS_AGI_VERSIONNUM 0x00000002 -#define XFS_AGI_SEQNO 0x00000004 -#define XFS_AGI_LENGTH 0x00000008 -#define XFS_AGI_COUNT 0x00000010 -#define XFS_AGI_ROOT 0x00000020 -#define XFS_AGI_LEVEL 0x00000040 -#define XFS_AGI_FREECOUNT 0x00000080 -#define XFS_AGI_NEWINO 0x00000100 -#define XFS_AGI_DIRINO 0x00000200 -#define XFS_AGI_UNLINKED 0x00000400 -#define XFS_AGI_NUM_BITS 11 -#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) +#define XFS_AGI_MAGICNUM (1 << 0) +#define XFS_AGI_VERSIONNUM (1 << 1) +#define XFS_AGI_SEQNO (1 << 2) +#define XFS_AGI_LENGTH (1 << 3) +#define XFS_AGI_COUNT (1 << 4) +#define XFS_AGI_ROOT (1 << 5) +#define XFS_AGI_LEVEL (1 << 6) +#define XFS_AGI_FREECOUNT (1 << 7) +#define XFS_AGI_NEWINO (1 << 8) +#define XFS_AGI_DIRINO (1 << 9) +#define XFS_AGI_UNLINKED (1 << 10) +#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ +#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) +#define XFS_AGI_FREE_ROOT (1 << 11) +#define XFS_AGI_FREE_LEVEL (1 << 12) +#define XFS_AGI_NUM_BITS_R2 13 /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index c13d650fdb99..182bac2bb276 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone; * Btree magic numbers. */ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + XFS_FIBT_MAGIC }, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } }; #define xfs_btree_magic(cur) \ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -1115,6 +1116,7 @@ xfs_btree_set_refs( xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: + case XFS_BTNUM_FINO: xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 875f6876ff48..a04b69422f67 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -62,6 +62,7 @@ union xfs_btree_rec { #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) +#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) /* * For logging record fields. @@ -92,6 +93,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -105,6 +107,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index 9898f31d05d8..34d85aca3058 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h @@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t; */ #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ +#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ +#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ typedef __uint64_t xfs_inofree_t; #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) @@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t; * block numbers in the AG. */ #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + +/* + * The first data block of an AG depends on whether the filesystem was formatted + * with the finobt feature. If so, account for the finobt reserved root btree + * block. + */ +#define XFS_PREALLOC_BLOCKS(mp) \ + (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ + XFS_FIBT_BLOCK(mp) + 1 : \ + XFS_IBT_BLOCK(mp) + 1) diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c5fc116dfaa3..d34703dbcb42 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ +#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 02fb943cbf22..3445ead7c1fc 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -104,7 +104,9 @@ xfs_fs_geometry( (xfs_sb_version_hascrc(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_V5SB : 0) | (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0); + XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | + (xfs_sb_version_hasfinobt(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FINOBT : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; @@ -316,6 +318,10 @@ xfs_growfs_data_private( agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_sb_version_hascrc(&mp->m_sb)) uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); + agi->agi_free_level = cpu_to_be32(1); + } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); @@ -407,6 +413,34 @@ xfs_growfs_data_private( xfs_buf_relse(bp); if (error) goto error0; + + /* + * FINO btree root block + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + bp = xfs_growfs_get_hdr_buf(mp, + XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), 0, + &xfs_inobt_buf_ops); + if (!bp) { + error = ENOMEM; + goto error0; + } + + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, + 0, 0, agno, + XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, + 0, agno, 0); + + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + goto error0; + } + } xfs_trans_agblocks_delta(tp, nfree); /* diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 8f711db61a0c..6ac0c2986c32 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -112,6 +112,66 @@ xfs_inobt_get_rec( } /* + * Insert a single inobt record. Cursor must already point to desired location. + */ +STATIC int +xfs_inobt_insert_rec( + struct xfs_btree_cur *cur, + __int32_t freecount, + xfs_inofree_t free, + int *stat) +{ + cur->bc_rec.i.ir_freecount = freecount; + cur->bc_rec.i.ir_free = free; + return xfs_btree_insert(cur, stat); +} + +/* + * Insert records describing a newly allocated inode chunk into the inobt. + */ +STATIC int +xfs_inobt_insert( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t newino, + xfs_agino_t newlen, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agino_t thisino; + int i; + int error; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + + error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, + XFS_INOBT_ALL_FREE, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + + return 0; +} + +/* * Verify that the number of free inodes in the AGI is correct. */ #ifdef DEBUG @@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc( { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_agnumber_t agno; int error; - int i; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ - xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ struct xfs_perag *pag; @@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc( agi->agi_newino = cpu_to_be32(newino); /* - * Insert records describing the new inode chunk into the btree. + * Insert records describing the new inode chunk into the btrees. */ - cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); - for (thisino = newino; - thisino < newino + newlen; - thisino += XFS_INODES_PER_CHUNK) { - cur->bc_rec.i.ir_startino = thisino; - cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; - cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; - error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - ASSERT(i == 0); - error = xfs_btree_insert(cur, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_INO); + if (error) + return error; + + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_FINO); + if (error) return error; - } - ASSERT(i == 1); } - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); /* * Log allocation group header fields */ @@ -675,13 +722,10 @@ xfs_ialloc_get_rec( } /* - * Allocate an inode. - * - * The caller selected an AG for us, and made sure that free inodes are - * available. + * Allocate an inode using the inobt-only algorithm. */ STATIC int -xfs_dialloc_ag( +xfs_dialloc_ag_inobt( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_ino_t parent, @@ -707,7 +751,7 @@ xfs_dialloc_ag( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -940,6 +984,294 @@ error0: } /* + * Use the free inode btree to allocate an inode based on distance from the + * parent. Note that the provided cursor may be deleted and replaced. + */ +STATIC int +xfs_dialloc_ag_finobt_near( + xfs_agino_t pagino, + struct xfs_btree_cur **ocur, + struct xfs_inobt_rec_incore *rec) +{ + struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ + struct xfs_btree_cur *rcur; /* right search cursor */ + struct xfs_inobt_rec_incore rrec; + int error; + int i, j; + + error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); + if (error) + return error; + + if (i == 1) { + error = xfs_inobt_get_rec(lcur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + /* + * See if we've landed in the parent inode record. The finobt + * only tracks chunks with at least one free inode, so record + * existence is enough. + */ + if (pagino >= rec->ir_startino && + pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) + return 0; + } + + error = xfs_btree_dup_cursor(lcur, &rcur); + if (error) + return error; + + error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); + if (error) + goto error_rcur; + if (j == 1) { + error = xfs_inobt_get_rec(rcur, &rrec, &j); + if (error) + goto error_rcur; + XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); + } + + XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); + if (i == 1 && j == 1) { + /* + * Both the left and right records are valid. Choose the closer + * inode chunk to the target. + */ + if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > + (rrec.ir_startino - pagino)) { + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else { + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + } else if (j == 1) { + /* only the right record is valid */ + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else if (i == 1) { + /* only the left record is valid */ + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + + return 0; + +error_rcur: + xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Use the free inode btree to find a free inode based on a newino hint. If + * the hint is NULL, find the first free inode in the AG. + */ +STATIC int +xfs_dialloc_ag_finobt_newino( + struct xfs_agi *agi, + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *rec) +{ + int error; + int i; + + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, + &i); + if (error) + return error; + if (i == 1) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; + } + } + + /* + * Find the first inode available in the AG. + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; +} + +/* + * Update the inobt based on a modification made to the finobt. Also ensure that + * the records from both trees are equivalent post-modification. + */ +STATIC int +xfs_dialloc_ag_update_inobt( + struct xfs_btree_cur *cur, /* inobt cursor */ + struct xfs_inobt_rec_incore *frec, /* finobt record */ + int offset) /* inode offset */ +{ + struct xfs_inobt_rec_incore rec; + int error; + int i; + + error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + + XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && + (rec.ir_freecount == frec->ir_freecount)); + + error = xfs_inobt_update(cur, &rec); + if (error) + return error; + + return 0; +} + +/* + * Allocate an inode using the free inode btree, if available. Otherwise, fall + * back to the inobt search algorithm. + * + * The caller selected an AG for us, and made sure that free inodes are + * available. + */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; /* finobt cursor */ + struct xfs_btree_cur *icur; /* inobt cursor */ + struct xfs_inobt_rec_incore rec; + xfs_ino_t ino; + int error; + int offset; + int i; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + + pag = xfs_perag_get(mp, agno); + + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = be32_to_cpu(agi->agi_newino); + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_cur; + + /* + * The search algorithm depends on whether we're in the same AG as the + * parent. If so, find the closest available inode to the parent. If + * not, consider the agi hint or find the first free inode in the AG. + */ + if (agno == pagno) + error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); + else + error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); + if (error) + goto error_cur; + + offset = xfs_lowbit64(rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + + /* + * Modify or remove the finobt record. + */ + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + if (rec.ir_freecount) + error = xfs_inobt_update(cur, &rec); + else + error = xfs_btree_delete(cur, &i); + if (error) + goto error_cur; + + /* + * The finobt has now been updated appropriately. We haven't updated the + * agi and superblock yet, so we can create an inobt cursor and validate + * the original freecount. If all is well, make the equivalent update to + * the inobt using the finobt record and offset information. + */ + icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + + error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); + if (error) + goto error_icur; + + /* + * Both trees have now been updated. We must update the perag and + * superblock before we can check the freecount for each btree. + */ + be32_add_cpu(&agi->agi_freecount, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag->pagi_freecount--; + + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_icur; + + xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_perag_put(pag); + *inop = ino; + return 0; + +error_icur: + xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); +error_cur: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); + return error; +} + +/* * Allocate an inode on disk. * * Mode is used to tell whether the new inode will need space, and whether it @@ -1098,78 +1430,34 @@ out_error: return XFS_ERROR(error); } -/* - * Free disk inode. Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int -xfs_difree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - xfs_bmap_free_t *flist, /* extents to free */ - int *delete, /* set if inode cluster was deleted */ - xfs_ino_t *first_ino) /* first inode in deleted cluster */ +STATIC int +xfs_difree_inobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_bmap_free *flist, + int *delete, + xfs_ino_t *first_ino, + struct xfs_inobt_rec_incore *orec) { - /* REFERENCED */ - xfs_agblock_t agbno; /* block number containing inode */ - xfs_buf_t *agbp; /* buffer containing allocation group header */ - xfs_agino_t agino; /* inode number relative to allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agi_t *agi; /* allocation group header */ - xfs_btree_cur_t *cur; /* inode btree cursor */ - int error; /* error return value */ - int i; /* result code */ - int ilen; /* inodes in an inode cluster */ - xfs_mount_t *mp; /* mount structure for filesystem */ - int off; /* offset of inode in inode chunk */ - xfs_inobt_rec_incore_t rec; /* btree record */ - struct xfs_perag *pag; - - mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int ilen; + int error; + int i; + int off; - /* - * Break up inode number into its components. - */ - agno = XFS_INO_TO_AGNO(mp, inode); - if (agno >= mp->m_sb.sb_agcount) { - xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", - __func__, agno, mp->m_sb.sb_agcount); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", - __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - /* - * Get the allocation group header. - */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", - __func__, error); - return error; - } - agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(agbno < be32_to_cpu(agi->agi_length)); + ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); + /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1261,6 +1549,7 @@ xfs_difree( if (error) goto error0; + *orec = rec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; @@ -1269,6 +1558,182 @@ error0: return error; } +/* + * Free an inode in the free inode btree. + */ +STATIC int +xfs_difree_finobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int offset = agino - ibtrec->ir_startino; + int error; + int i; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + if (i == 0) { + /* + * If the record does not exist in the finobt, we must have just + * freed an inode in a previously fully allocated chunk. If not, + * something is out of sync. + */ + XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + + error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, + ibtrec->ir_free, &i); + if (error) + goto error; + ASSERT(i == 1); + + goto out; + } + + /* + * Read and update the existing record. We could just copy the ibtrec + * across here, but that would defeat the purpose of having redundant + * metadata. By making the modifications independently, we can catch + * corruptions that we wouldn't see if we just copied from one record + * to another. + */ + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(i == 1, error); + + rec.ir_free |= XFS_INOBT_MASK(offset); + rec.ir_freecount++; + + XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && + (rec.ir_freecount == ibtrec->ir_freecount), + error); + + /* + * The content of inobt records should always match between the inobt + * and finobt. The lifecycle of records in the finobt is different from + * the inobt in that the finobt only tracks records with at least one + * free inode. Hence, if all of the inodes are free and we aren't + * keeping inode chunks permanently on disk, remove the record. + * Otherwise, update the record with the new information. + */ + if (rec.ir_freecount == mp->m_ialloc_inos && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + error = xfs_btree_delete(cur, &i); + if (error) + goto error; + ASSERT(i == 1); + } else { + error = xfs_inobt_update(cur, &rec); + if (error) + goto error; + } + +out: + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error; + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; + +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t inode, /* inode to be freed */ + struct xfs_bmap_free *flist, /* extents to free */ + int *delete,/* set if inode cluster was deleted */ + xfs_ino_t *first_ino)/* first inode in deleted cluster */ +{ + /* REFERENCED */ + xfs_agblock_t agbno; /* block number containing inode */ + struct xfs_buf *agbp; /* buffer for allocation group header */ + xfs_agino_t agino; /* allocation group inode number */ + xfs_agnumber_t agno; /* allocation group number */ + int error; /* error return value */ + struct xfs_mount *mp; /* mount structure for filesystem */ + struct xfs_inobt_rec_incore rec;/* btree record */ + + mp = tp->t_mountp; + + /* + * Break up inode number into its components. + */ + agno = XFS_INO_TO_AGNO(mp, inode); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", + __func__, agno, mp->m_sb.sb_agcount); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agino = XFS_INO_TO_AGINO(mp, inode); + if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + __func__, (unsigned long long)inode, + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", + __func__, agbno, mp->m_sb.sb_agblocks); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + /* + * Get the allocation group header. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", + __func__, error); + return error; + } + + /* + * Fix up the inode allocation btree. + */ + error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino, + &rec); + if (error) + goto error0; + + /* + * Fix up the free inode btree. + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); + if (error) + goto error0; + } + + return 0; + +error0: + return error; +} + STATIC int xfs_imap_lookup( struct xfs_mount *mp, @@ -1300,7 +1765,7 @@ xfs_imap_lookup( * we have a record, we need to |
