diff options
Diffstat (limited to 'fs/xfs/libxfs')
| -rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 109 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c | 9 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.h | 1 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_da_format.h | 1 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_dir2.c | 8 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_format.h | 104 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_fs.h | 25 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.c | 83 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.c | 39 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.h | 76 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_log_format.h | 33 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 5 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.c | 11 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_types.h | 11 |
17 files changed, 384 insertions, 138 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 1ff3fa67d4c9..d3f2886fdc08 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2511,7 +2511,7 @@ __xfs_free_extent_later( ASSERT(bno != NULLFSBLOCK); ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); + ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); agno = XFS_FSB_TO_AGNO(mp, bno); agbno = XFS_FSB_TO_AGBNO(mp, bno); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 23523b802539..2815cfbbae70 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -776,6 +776,9 @@ xfs_attr_set( if (args->value || xfs_inode_hasattr(dp)) { error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK, XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(args->trans, dp, + XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); if (error) goto out_trans_cancel; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ad938e6e23aa..24462bdfd8e7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -52,19 +52,17 @@ xfs_bmap_compute_maxlevels( xfs_mount_t *mp, /* file system mount structure */ int whichfork) /* data or attr fork */ { + uint64_t maxblocks; /* max blocks at this level */ + xfs_extnum_t maxleafents; /* max leaf entries possible */ int level; /* btree level */ - uint maxblocks; /* max blocks at this level */ - uint maxleafents; /* max leaf entries possible */ int maxrootrecs; /* max records in root block */ int minleafrecs; /* min records in leaf block */ int minnoderecs; /* min records in node block */ int sz; /* root block size */ /* - * The maximum number of extents in a file, hence the maximum number of - * leaf entries, is controlled by the size of the on-disk extent count, - * either a signed 32-bit number for the data fork, or a signed 16-bit - * number for the attr fork. + * The maximum number of extents in a fork, hence the maximum number of + * leaf entries, is controlled by the size of the on-disk extent count. * * Note that we can no longer assume that if we are in ATTR1 that the * fork offset of all the inodes will be @@ -74,22 +72,22 @@ xfs_bmap_compute_maxlevels( * ATTR2 we have to assume the worst case scenario of a minimum size * available. */ - if (whichfork == XFS_DATA_FORK) { - maxleafents = MAXEXTNUM; + maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), + whichfork); + if (whichfork == XFS_DATA_FORK) sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); - } else { - maxleafents = MAXAEXTNUM; + else sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); - } + maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; - maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + maxblocks = howmany_64(maxleafents, minleafrecs); for (level = 1; maxblocks > 1; level++) { if (maxblocks <= maxrootrecs) maxblocks = 1; else - maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + maxblocks = howmany_64(maxblocks, minnoderecs); } mp->m_bm_maxlevels[whichfork] = level; ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); @@ -468,7 +466,7 @@ error0: if (bp_release) xfs_trans_brelse(NULL, bp); error_norelse: - xfs_warn(mp, "%s: BAD after btree leaves for %d extents", + xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", __func__, i); xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); @@ -1452,7 +1450,7 @@ xfs_bmap_add_extent_delay_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) state |= BMAP_LEFT_CONTIG; /* @@ -1470,13 +1468,13 @@ xfs_bmap_add_extent_delay_real( new_endoff == RIGHT.br_startoff && new->br_startblock + new->br_blockcount == RIGHT.br_startblock && new->br_state == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) != (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) + <= XFS_MAX_BMBT_EXTLEN)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -2000,7 +1998,7 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) state |= BMAP_LEFT_CONTIG; /* @@ -2018,13 +2016,13 @@ xfs_bmap_add_extent_unwritten_real( new_endoff == RIGHT.br_startoff && new->br_startblock + new->br_blockcount == RIGHT.br_startblock && new->br_state == RIGHT.br_state && - new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) != (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= MAXEXTLEN)) + <= XFS_MAX_BMBT_EXTLEN)) state |= BMAP_RIGHT_CONTIG; /* @@ -2510,15 +2508,15 @@ xfs_bmap_add_extent_hole_delay( */ if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN))) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) state |= BMAP_RIGHT_CONTIG; /* @@ -2661,17 +2659,17 @@ xfs_bmap_add_extent_hole_real( left.br_startoff + left.br_blockcount == new->br_startoff && left.br_startblock + left.br_blockcount == new->br_startblock && left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= MAXEXTLEN) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && new->br_startoff + new->br_blockcount == right.br_startoff && new->br_startblock + new->br_blockcount == right.br_startblock && new->br_state == right.br_state && - new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || left.br_blockcount + new->br_blockcount + - right.br_blockcount <= MAXEXTLEN)) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -2906,15 +2904,15 @@ xfs_bmap_extsize_align( /* * For large extent hint sizes, the aligned extent might be larger than - * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls - * the length back under MAXEXTLEN. The outer allocation loops handle - * short allocation just fine, so it is safe to do this. We only want to - * do it when we are forced to, though, because it means more allocation - * operations are required. + * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so + * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer + * allocation loops handle short allocation just fine, so it is safe to + * do this. We only want to do it when we are forced to, though, because + * it means more allocation operations are required. */ - while (align_alen > MAXEXTLEN) + while (align_alen > XFS_MAX_BMBT_EXTLEN) align_alen -= extsz; - ASSERT(align_alen <= MAXEXTLEN); + ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN); /* * If the previous block overlaps with this proposed allocation @@ -3004,9 +3002,9 @@ xfs_bmap_extsize_align( return -EINVAL; } else { ASSERT(orig_off >= align_off); - /* see MAXEXTLEN handling above */ + /* see XFS_BMBT_MAX_EXTLEN handling above */ ASSERT(orig_end <= align_off + align_alen || - align_alen + extsz > MAXEXTLEN); + align_alen + extsz > XFS_MAX_BMBT_EXTLEN); } #ifdef DEBUG @@ -3971,7 +3969,7 @@ xfs_bmapi_reserve_delalloc( * Cap the alloc length. Keep track of prealloc so we know whether to * tag the inode before we return. */ - alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); + alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); if (prealloc && alen >= len) @@ -4104,7 +4102,7 @@ xfs_bmapi_allocate( if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev)) bma->prev.br_startoff = NULLFILEOFF; } else { - bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); + bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN); if (!bma->eof) bma->length = XFS_FILBLKS_MIN(bma->length, bma->got.br_startoff - bma->offset); @@ -4424,8 +4422,8 @@ xfs_bmapi_write( * xfs_extlen_t and therefore 32 bits. Hence we have to * check for 32-bit overflows and handle them here. */ - if (len > (xfs_filblks_t)MAXEXTLEN) - bma.length = MAXEXTLEN; + if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN) + bma.length = XFS_MAX_BMBT_EXTLEN; else bma.length = len; @@ -4526,14 +4524,16 @@ xfs_bmapi_convert_delalloc( return error; xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); error = xfs_iext_count_may_overflow(ip, whichfork, XFS_IEXT_ADD_NOSPLIT_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, + XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto out_trans_cancel; - xfs_trans_ijoin(tp, ip, 0); - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) || bma.got.br_startoff > offset_fsb) { /* @@ -4560,7 +4560,8 @@ xfs_bmapi_convert_delalloc( bma.ip = ip; bma.wasdel = true; bma.offset = bma.got.br_startoff; - bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN); + bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, + XFS_MAX_BMBT_EXTLEN); bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); /* @@ -4641,7 +4642,7 @@ xfs_bmapi_remap( ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(len > 0); - ASSERT(len <= (xfs_filblks_t)MAXEXTLEN); + ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | XFS_BMAPI_NORMAP))); @@ -5148,26 +5149,6 @@ xfs_bmap_del_extent_real( * Deleting the middle of the extent. */ - /* - * For directories, -ENOSPC is returned since a directory entry - * remove operation must not fail due to low extent count - * availability. -ENOSPC will be handled by higher layers of XFS - * by letting the corresponding empty Data/Free blocks to linger - * until a future remove operation. Dabtree blocks would be - * swapped with the last block in the leaf space and then the - * new last block will be unmapped. - * - * The above logic also applies to the source directory entry of - * a rename operation. - */ - error = xfs_iext_count_may_overflow(ip, whichfork, 1); - if (error) { - ASSERT(S_ISDIR(VFS_I(ip)->i_mode) && - whichfork == XFS_DATA_FORK); - error = -ENOSPC; - goto done; - } - old = got; got.br_blockcount = del->br_startoff - got.br_startoff; @@ -5641,7 +5622,7 @@ xfs_bmse_can_merge( if ((left->br_startoff + left->br_blockcount != startoff) || (left->br_startblock + left->br_blockcount != got->br_startblock) || (left->br_state != got->br_state) || - (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) + (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN)) return false; return true; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 453309fc85f2..2b77d45c215f 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -597,7 +597,11 @@ xfs_bmbt_maxrecs( return xfs_bmbt_block_maxrecs(blocklen, leaf); } -/* Compute the max possible height for block mapping btrees. */ +/* + * Calculate the maximum possible height of the btree that the on-disk format + * supports. This is used for sizing structures large enough to support every + * possible configuration of a filesystem that might get mounted. + */ unsigned int xfs_bmbt_maxlevels_ondisk(void) { @@ -611,7 +615,8 @@ xfs_bmbt_maxlevels_ondisk(void) minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; /* One extra level for the inode root. */ - return xfs_btree_compute_maxlevels(minrecs, MAXEXTNUM) + 1; + return xfs_btree_compute_maxlevels(minrecs, + XFS_MAX_EXTCNT_DATA_FORK_LARGE) + 1; } /* diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 7b0f986e5cb5..deb368d041e3 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -30,6 +30,7 @@ struct xfs_da_geometry { unsigned int free_hdr_size; /* dir2 free header size */ unsigned int free_max_bests; /* # of bests entries in dir2 free */ xfs_dablk_t freeblk; /* blockno of free data v2 */ + xfs_extnum_t max_extents; /* Max. extents in corresponding fork */ xfs_dir2_data_aoff_t data_first_offset; size_t data_entry_offset; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 4c6561baf9e9..25e2841084e1 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -277,6 +277,7 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) * Directory address space divided into sections, * spaces separated by 32GB. */ +#define XFS_DIR2_MAX_SPACES 3 #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) #define XFS_DIR2_DATA_SPACE 0 #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 5f1e4799e8fa..3cd51fa3837b 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -150,6 +150,8 @@ xfs_da_mount( dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / (uint)sizeof(xfs_da_node_entry_t); + dageo->max_extents = (XFS_DIR2_MAX_SPACES * XFS_DIR2_SPACE_SIZE) >> + mp->m_sb.sb_blocklog; dageo->magicpct = (dageo->blksize * 37) / 100; /* set up attribute geometry - single fsb only */ @@ -161,6 +163,12 @@ xfs_da_mount( dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size; dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / (uint)sizeof(xfs_da_node_entry_t); + + if (xfs_has_large_extent_counts(mp)) + dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_LARGE; + else + dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_SMALL; + dageo->magicpct = (dageo->blksize * 37) / 100; return 0; } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f524736d811e..96fd49fbc9fa 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -372,12 +372,14 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ #define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ #define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ +#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ XFS_SB_FEAT_INCOMPAT_META_UUID| \ XFS_SB_FEAT_INCOMPAT_BIGTIME| \ - XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR| \ + XFS_SB_FEAT_INCOMPAT_NREXT64) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -791,16 +793,41 @@ struct xfs_dinode { __be32 di_nlink; /* number of links to file */ __be16 di_projid_lo; /* lower part of owner's project id */ __be16 di_projid_hi; /* higher part owner's project id */ - __u8 di_pad[6]; /* unused, zeroed space */ - __be16 di_flushiter; /* incremented on flush */ + union { + /* Number of data fork extents if NREXT64 is set */ + __be64 di_big_nextents; + + /* Padding for V3 inodes without NREXT64 set. */ + __be64 di_v3_pad; + + /* Padding and inode flush counter for V2 inodes. */ + struct { + __u8 di_v2_pad[6]; + __be16 di_flushiter; + }; + }; xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ xfs_timestamp_t di_ctime; /* time created/inode modified */ __be64 di_size; /* number of bytes in file */ __be64 di_nblocks; /* # of direct & btree blocks used */ __be32 di_extsize; /* basic/minimum extent size for file */ - __be32 di_nextents; /* number of extents in data fork */ - __be16 di_anextents; /* number of extents in attribute fork*/ + union { + /* + * For V2 inodes and V3 inodes without NREXT64 set, this + * is the number of data and attr fork extents. + */ + struct { + __be32 di_nextents; + __be16 di_anextents; + } __packed; + + /* Number of attr fork extents if NREXT64 is set. */ + struct { + __be32 di_big_anextents; + __be16 di_nrext64_pad; + } __packed; + } __packed; __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ __s8 di_aformat; /* format of attr fork's data */ __be32 di_dmevmask; /* DMIG event mask */ @@ -870,6 +897,56 @@ enum xfs_dinode_fmt { { XFS_DINODE_FMT_UUID, "uuid" } /* + * Max values for extnum and aextnum. + * + * The original on-disk extent counts were held in signed fields, resulting in + * maximum extent counts of 2^31 and 2^15 for the data and attr forks + * respectively. Similarly the maximum extent length is limited to 2^21 blocks + * by the 21-bit wide blockcount field of a BMBT extent record. + * + * The newly introduced data fork extent counter can hold a 64-bit value, + * however the maximum number of extents in a file is also limited to 2^54 + * extents by the 54-bit wide startoff field of a BMBT extent record. + * + * It is further limited by the maximum supported file size of 2^63 + * *bytes*. This leads to a maximum extent count for maximally sized filesystem + * blocks (64kB) of: + * + * 2^63 bytes / 2^16 bytes per block = 2^47 blocks + * + * Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence + * 2^48 was chosen as the maximum data fork extent count. + * + * The maximum file size that can be represented by the data fork extent counter + * in the worst case occurs when all extents are 1 block in length and each + * block is 1KB in size. + * + * With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and + * with 1KB sized blocks, a file can reach upto, + * 1KB * (2^31) = 2TB + * + * This is much larger than the theoretical maximum size of a directory + * i.e. XFS_DIR2_SPACE_SIZE * XFS_DIR2_MAX_SPACES = ~96GB. + * + * Hence, a directory inode can never overflow its data fork extent counter. + */ +#define XFS_MAX_EXTCNT_DATA_FORK_LARGE ((xfs_extnum_t)((1ULL << 48) - 1)) +#define XFS_MAX_EXTCNT_ATTR_FORK_LARGE ((xfs_extnum_t)((1ULL << 32) - 1)) +#define XFS_MAX_EXTCNT_DATA_FORK_SMALL ((xfs_extnum_t)((1ULL << 31) - 1)) +#define XFS_MAX_EXTCNT_ATTR_FORK_SMALL ((xfs_extnum_t)((1ULL << 15) - 1)) + +/* + * When we upgrade an inode to the large extent counts, the maximum value by + * which the extent count can increase is bound by the change in size of the + * on-disk field. No upgrade operation should ever be adding more than a few + * tens of extents, so if we get a really large value it is a sign of a code bug + * or corruption. + */ +#define XFS_MAX_EXTCNT_UPGRADE_NR \ + min(XFS_MAX_EXTCNT_ATTR_FORK_LARGE - XFS_MAX_EXTCNT_ATTR_FORK_SMALL, \ + XFS_MAX_EXTCNT_DATA_FORK_LARGE - XFS_MAX_EXTCNT_DATA_FORK_SMALL) + +/* * Inode minimum and maximum sizes. */ #define XFS_DINODE_MIN_LOG 8 @@ -918,10 +995,6 @@ enum xfs_dinode_fmt { ((w) == XFS_DATA_FORK ? \ (dip)->di_format : \ (dip)->di_aformat) -#define XFS_DFORK_NEXTENTS(dip,w) \ - ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dip)->di_nextents) : \ - be16_to_cpu((dip)->di_anextents)) /* * For block and character special files the 32bit dev_t is stored at the @@ -988,15 +1061,17 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ #define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ +#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) #define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { @@ -1004,6 +1079,13 @@ static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME)); } +static inline bool xfs_dinode_has_large_extent_counts( + const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64)); +} + /* * Inode number format: * low inopblog bits - offset in block @@ -1596,6 +1678,8 @@ typedef struct xfs_bmdr_block { #define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1) #define BMBT_BLOCKCOUNT_MASK ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1) +#define XFS_MAX_BMBT_EXTLEN ((xfs_extlen_t)(BMBT_BLOCKCOUNT_MASK)) + /* * bmbt records have a file offset (block) field that is 54 bits wide, so this * is the largest xfs_fileoff_t that we ever expect to see. diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 52c9d8676fa3..1cfd5bc6520a 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_REFLINK (1 << 20) /* files can share blocks */ #define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */ #define XFS_FSOP_GEOM_FLAGS_INOBTCNT (1 << 22) /* inobt btree counter */ +#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */ /* * Minimum and maximum sizes need for growth checks. @@ -377,7 +378,7 @@ struct xfs_bulkstat { uint32_t bs_extsize_blks; /* extent size hint, blocks */ uint32_t bs_nlink; /* number of links */ - uint32_t bs_extents; /* number of extents */ + uint32_t bs_extents; /* 32-bit data fork extent counter */ uint32_t bs_aextents; /* attribute number of extents */ uint16_t bs_version; /* structure version */ uint16_t bs_forkoff; /* inode fork offset in bytes */ @@ -386,8 +387,9 @@ struct xfs_bulkstat { uint16_t bs_checked; /* checked inode metadata */ uint16_t bs_mode; /* type and mode */ uint16_t bs_pad2; /* zeroed */ + uint64_t bs_extents64; /* 64-bit data fork extent counter */ - uint64_t bs_pad[7]; /* zeroed */ + uint64_t bs_pad[6]; /* zeroed */ }; #define XFS_BULKSTAT_VERSION_V1 (1) @@ -459,17 +461,28 @@ struct xfs_bulk_ireq { * Only return results from the specified @agno. If @ino is zero, start * with the first inode of @agno. */ -#define XFS_BULK_IREQ_AGNO (1 << 0) +#define XFS_BULK_IREQ_AGNO (1U << 0) /* * Return bulkstat information for a single inode, where @ino value is a * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_* * values below. Not compatible with XFS_BULK_IREQ_AGNO. */ -#define XFS_BULK_IREQ_SPECIAL (1 << 1) +#define XFS_BULK_IREQ_SPECIAL (1U << 1) -#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ - XFS_BULK_IREQ_SPECIAL) +/* + * Return data fork extent count via xfs_bulkstat->bs_extents64 field and assign + * 0 to xfs_bulkstat->bs_extents when the flag is set. Otherwise, use + * xfs_bulkstat->bs_extents for returning data fork extent count and set + * xfs_bulkstat->bs_extents64 to 0. In the second case, return -EOVERFLOW and + * assign 0 to xfs_bulkstat->bs_extents if data fork extent count is larger than + * XFS_MAX_EXTCNT_DATA_FORK_OLD. + */ +#define XFS_BULK_IREQ_NREXT64 (1U << 2) + +#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ + XFS_BULK_IREQ_SPECIAL | \ + XFS_BULK_IREQ_NREXT64) /* Operate on the root directory inode. */ #define XFS_BULK_IREQ_SPECIAL_ROOT (1) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 54c2be6a2972..bf2f4bc89193 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2772,6 +2772,8 @@ xfs_ialloc_setup_geometry( igeo->new_diflags2 = 0; if (xfs_has_bigtime(mp)) igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME; + if (xfs_has_large_extent_counts(mp)) + igeo->new_diflags2 |= XFS_DIFLAG2_NREXT64; /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index cae9708c8587..74b82ec80f8e 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -279,6 +279,25 @@ xfs_inode_to_disk_ts( return ts; } +static inline void +xfs_inode_to_disk_iext_counters( + struct xfs_inode *ip, + struct xfs_dinode *to) +{ + if (xfs_inode_has_large_extent_counts(ip)) { + to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); + to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp)); + /* + * We might be upgrading the inode to use larger extent counters + * than was previously used. Hence zero the unused field. + */ + to->di_nrext64_pad = cpu_to_be16(0); + } else { + to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); + to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); + } +} + void xfs_inode_to_disk( struct xfs_inode *ip, @@ -296,7 +315,6 @@ xfs_inode_to_disk( to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); - memset(to->di_pad, 0, sizeof(to->di_pad)); to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime); to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime); to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime); @@ -307,8 +325,6 @@ xfs_inode_to_disk( to->di_size = cpu_to_be64(ip->i_disk_size); to->di_nblocks = cpu_to_be64(ip->i_nblocks); to->di_extsize = cpu_to_be32(ip->i_extsize); - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); - to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); to->di_forkoff = ip->i_forkoff; to->di_aformat = xfs_ifork_format(ip->i_afp); to->di_flags = cpu_to_be16(ip->i_diflags); @@ -323,11 +339,14 @@ xfs_inode_to_disk( to->di_lsn = cpu_to_be64(lsn); memset(to->di_pad2, 0, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); - to->di_flushiter = 0; + to->di_v3_pad = 0; } else { to->di_version = 2; to->di_flushiter = cpu_to_be16(ip->i_flushiter); + memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); } + + xfs_inode_to_disk_iext_counters(ip, to); } static xfs_failaddr_t @@ -336,7 +355,10 @@ xfs_dinode_verify_fork( struct xfs_mount *mp, int whichfork) { - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); + xfs_extnum_t di_nextents; + xfs_extnum_t max_extents; + + di_nextents = xfs_dfork_nextents(dip, whichfork); switch (XFS_DFORK_FORMAT(dip, whichfork)) { case XFS_DINODE_FMT_LOCAL: @@ -358,12 +380,11 @@ xfs_dinode_verify_fork( return __this_address; break; case XFS_DINODE_FMT_BTREE: - if (whichfork == XFS_ATTR_FORK) { - if (di_nextents > MAXAEXTNUM) - return __this_address; - } else if (di_nextents > MAXEXTNUM) { + max_extents = xfs_iext_max_nextents( + xfs_dinode_has_large_extent_counts(dip), + whichfork); + if (di_nextents > max_extents) return __this_address; - } break; default: return __this_address; @@ -396,6 +417,24 @@ xfs_dinode_verify_forkoff( return NULL; } +static xfs_failaddr_t +xfs_dinode_verify_nrext64( + struct xfs_mount *mp, + struct xfs_dinode *dip) +{ + if (xfs_dinode_has_large_extent_counts(dip)) { + if (!xfs_has_large_extent_counts(mp)) + return __this_address; + if (dip->di_nrext64_pad != 0) + return __this_address; + } else if (dip->di_version >= 3) { + if (dip->di_v3_pad != 0) + return __this_address; + } + + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -407,6 +446,9 @@ xfs_dinode_verify( uint16_t flags; uint64_t flags2; uint64_t di_size; + xfs_extnum_t nextents; + xfs_extnum_t naextents; + xfs_filblks_t nblocks; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return __this_address; @@ -437,10 +479,19 @@ xfs_dinode_verify( if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) return __this_address; + fa = xfs_dinode_verify_nrext64(mp, dip); + if (fa) + return fa; + + nextents = xfs_dfork_data_extents(dip); + naextents = xfs_dfork_attr_extents(dip); + nblocks = be64_to_cpu(dip->di_nblocks); + /* Fork checks carried over from xfs_iformat_fork */ - if (mode && - be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks)) + if (mode && nextents + naextents > nblocks) + return __this_address; + + if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) return __this_address; if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) @@ -497,7 +548,7 @@ xfs_dinode_verify( default: return __this_address; } - if (dip->di_anextents) + if (naextents) return __this_address; } @@ -639,7 +690,7 @@ xfs_inode_validate_extsize( if (extsize_bytes % blocksize_bytes) return __this_address; - if (extsize > MAXEXTLEN) + if (extsize > XFS_MAX_BMBT_EXTLEN) return __this_address; if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) @@ -696,7 +747,7 @@ xfs_inode_validate_cowextsize( if (cowextsize_bytes % mp->m_sb.sb_blocksize) return __this_address; - if (cowextsize > MAXEXTLEN) + if (cowextsize > XFS_MAX_BMBT_EXTLEN) return __this_address; if (cowextsize > mp->m_sb.sb_agblocks / 2) |
