From f88ae46b09e93ef07ac9efaf85df62adb5ba58e6 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:16:37 -0800 Subject: xfs: glean crc status from mp not flags in xfs_btree_init_block_int xfs_btree_init_block_int() can determine whether crcs are in effect without the passed-in XFS_BTREE_CRC_BLOCKS flag; the mp argument allows us to determine this from the superblock. Remove the flag from callers, and use xfs_sb_version_hascrc(&mp->m_sb) internally instead. This removes one difference between the if & else cases in the callers. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 4 ++-- fs/xfs/libxfs/xfs_bmap_btree.c | 2 +- fs/xfs/libxfs/xfs_btree.c | 6 ++++-- fs/xfs/xfs_fsops.c | 14 ++++++-------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index bfc00de5c6f1..1d4b8d5edaaf 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -743,7 +743,7 @@ xfs_bmap_extents_to_btree( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + XFS_BTREE_LONG_PTRS); else xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, XFS_BMAP_MAGIC, 1, 1, ip->i_ino, @@ -820,7 +820,7 @@ try_another_ag: if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block_int(mp, ablock, abp->b_bn, XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + XFS_BTREE_LONG_PTRS); else xfs_btree_init_block_int(mp, ablock, abp->b_bn, XFS_BMAP_MAGIC, 0, 0, ip->i_ino, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d9be241fc86f..a80bf8080b1c 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -74,7 +74,7 @@ xfs_bmdr_to_bmbt( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); + XFS_BTREE_LONG_PTRS); else xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, XFS_BMAP_MAGIC, 0, 0, ip->i_ino, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 21e6a6ab6b9a..c91823c202b6 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1090,6 +1090,8 @@ xfs_btree_init_block_int( __u64 owner, unsigned int flags) { + int crc = xfs_sb_version_hascrc(&mp->m_sb); + buf->bb_magic = cpu_to_be32(magic); buf->bb_level = cpu_to_be16(level); buf->bb_numrecs = cpu_to_be16(numrecs); @@ -1097,7 +1099,7 @@ xfs_btree_init_block_int( if (flags & XFS_BTREE_LONG_PTRS) { buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); @@ -1110,7 +1112,7 @@ xfs_btree_init_block_int( buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); buf->bb_u.s.bb_owner = cpu_to_be32(__owner); uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 242e8091296d..21e3cdbaebbc 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -354,7 +354,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1, - agno, XFS_BTREE_CRC_BLOCKS); + agno, 0); else xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, agno, 0); @@ -383,7 +383,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1, - agno, XFS_BTREE_CRC_BLOCKS); + agno, 0); else xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, agno, 0); @@ -414,7 +414,7 @@ xfs_growfs_data_private( } xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0, - agno, XFS_BTREE_CRC_BLOCKS); + agno, 0); block = XFS_BUF_TO_BLOCK(bp); @@ -490,7 +490,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0, - agno, XFS_BTREE_CRC_BLOCKS); + agno, 0); else xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, agno, 0); @@ -515,8 +515,7 @@ xfs_growfs_data_private( if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, - 0, 0, agno, - XFS_BTREE_CRC_BLOCKS); + 0, 0, agno, 0); else xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, 0, agno, 0); @@ -541,8 +540,7 @@ xfs_growfs_data_private( } xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC, - 0, 0, agno, - XFS_BTREE_CRC_BLOCKS); + 0, 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); -- cgit v1.2.3 From af7d20fd83d9e2b3111a847e4220bf943e2d531c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:16:38 -0800 Subject: xfs: make xfs_btree_magic more generic Right now the xfs_btree_magic() define takes only a cursor; change this to take crc and btnum args to make it more generically useful, and move to a function. This will allow xfs_btree_init_block_int callers which don't have a cursor to make use of the xfs_magics array, which will happen in the next patch. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.c | 34 ++++++++++++++++++++++++++-------- fs/xfs/libxfs/xfs_btree.h | 2 ++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index c91823c202b6..18afab315445 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -50,8 +50,18 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, XFS_REFC_CRC_MAGIC } }; -#define xfs_btree_magic(cur) \ - xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] + +__uint32_t +xfs_btree_magic( + int crc, + xfs_btnum_t btnum) +{ + __uint32_t magic = xfs_magics[crc][btnum]; + + /* Ensure we asked for crc for crc-only magics. */ + ASSERT(magic != 0); + return magic; +} STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( @@ -62,10 +72,13 @@ xfs_btree_check_lblock( { int lblock_ok = 1; /* block passes checks */ struct xfs_mount *mp; /* file system mount point */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { lblock_ok = lblock_ok && uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -74,7 +87,7 @@ xfs_btree_check_lblock( } lblock_ok = lblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -110,13 +123,16 @@ xfs_btree_check_sblock( struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ int sblock_ok = 1; /* block passes checks */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); agbp = cur->bc_private.a.agbp; agf = XFS_BUF_TO_AGF(agbp); agflen = be32_to_cpu(agf->agf_length); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { sblock_ok = sblock_ok && uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -125,7 +141,7 @@ xfs_btree_check_sblock( } sblock_ok = sblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -1142,7 +1158,9 @@ xfs_btree_init_block_cur( int level, int numrecs) { - __u64 owner; + __u64 owner; + int crc = xfs_sb_version_hascrc(&cur->bc_mp->m_sb); + xfs_btnum_t btnum = cur->bc_btnum; /* * we can pull the owner from the cursor right now as the different @@ -1156,7 +1174,7 @@ xfs_btree_init_block_cur( owner = cur->bc_private.a.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - xfs_btree_magic(cur), level, numrecs, + xfs_btree_magic(crc, btnum), level, numrecs, owner, cur->bc_flags); } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b69b947c4c1b..95ea6ed0c14b 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -76,6 +76,8 @@ union xfs_btree_rec { #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) +__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); + /* * For logging record fields. */ -- cgit v1.2.3 From b6f41e448277ff080fea734b93121e6cd7513f0c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:16:39 -0800 Subject: xfs: remove boilerplate around xfs_btree_init_block Now that xfs_btree_init_block_int is able to determine crc status from the passed-in mp, we can determine the proper magic as well if we are given a btree number, rather than an explicit magic value. Change xfs_btree_init_block[_int] callers to pass in the btree number, and let xfs_btree_init_block_int use the xfs_magics array via the xfs_btree_magic macro to determine which magic value is needed. This makes all of the if (crc) / else stanzas identical, and the if/else can be removed, leading to a single, common init_block call. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 19 ++++--------------- fs/xfs/libxfs/xfs_bmap_btree.c | 10 ++-------- fs/xfs/libxfs/xfs_btree.c | 11 +++++------ fs/xfs/libxfs/xfs_btree.h | 4 ++-- fs/xfs/xfs_fsops.c | 31 ++++++------------------------- 5 files changed, 19 insertions(+), 56 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1d4b8d5edaaf..d3da53e6a927 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -740,15 +740,9 @@ xfs_bmap_extents_to_btree( * Fill in the root. */ block = ifp->if_broot; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, + xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 1, 1, ip->i_ino, XFS_BTREE_LONG_PTRS); - else - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS); - /* * Need a cursor. Can't allocate until bb_level is filled in. */ @@ -817,13 +811,8 @@ try_another_ag: */ abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); - else - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, ablock, abp->b_bn, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index a80bf8080b1c..f93072b58a58 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt( xfs_bmbt_key_t *tkp; __be64 *tpp; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); - else - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); - rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 18afab315445..421efa0ef778 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1100,13 +1100,14 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { int crc = xfs_sb_version_hascrc(&mp->m_sb); + __u32 magic = xfs_btree_magic(crc, btnum); buf->bb_magic = cpu_to_be32(magic); buf->bb_level = cpu_to_be16(level); @@ -1141,14 +1142,14 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - magic, level, numrecs, owner, flags); + btnum, level, numrecs, owner, flags); } STATIC void @@ -1159,8 +1160,6 @@ xfs_btree_init_block_cur( int numrecs) { __u64 owner; - int crc = xfs_sb_version_hascrc(&cur->bc_mp->m_sb); - xfs_btnum_t btnum = cur->bc_btnum; /* * we can pull the owner from the cursor right now as the different @@ -1174,7 +1173,7 @@ xfs_btree_init_block_cur( owner = cur->bc_private.a.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - xfs_btree_magic(crc, btnum), level, numrecs, + cur->bc_btnum, level, numrecs, owner, cur->bc_flags); } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 95ea6ed0c14b..cdd4f05a5976 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -380,7 +380,7 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, @@ -391,7 +391,7 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 21e3cdbaebbc..6ccaae9eb0ee 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -352,12 +352,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1, - agno, 0); - else - xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); @@ -381,12 +376,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1, - agno, 0); - else - xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); @@ -413,7 +403,7 @@ xfs_growfs_data_private( goto error0; } - xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0, + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0, agno, 0); block = XFS_BUF_TO_BLOCK(bp); @@ -488,12 +478,7 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0, - agno, 0); - else - xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, - agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -513,12 +498,8 @@ xfs_growfs_data_private( goto error0; } - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, + xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, 0, 0, agno, 0); - else - xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, - 0, agno, 0); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -539,7 +520,7 @@ xfs_growfs_data_private( goto error0; } - xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC, + xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, 0, 0, agno, 0); error = xfs_bwrite(bp); -- cgit v1.2.3 From 8ff6daa17b6a64e59bbabaa116b9bd854fa4da1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 23:20:26 -0800 Subject: iomap: constify struct iomap_ops Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/dax.c | 6 +++--- fs/ext2/ext2.h | 2 +- fs/ext2/inode.c | 4 ++-- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 2 +- fs/internal.h | 2 +- fs/iomap.c | 18 +++++++++--------- fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_iomap.h | 4 ++-- include/linux/dax.h | 8 ++++---- include/linux/iomap.h | 14 +++++++------- 11 files changed, 33 insertions(+), 33 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 3af2da5e64ce..78b9651576c6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1074,7 +1074,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, */ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; @@ -1118,7 +1118,7 @@ static int dax_fault_return(int error) * necessary locking for the page fault to proceed successfully. */ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; @@ -1317,7 +1317,7 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, } int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) + pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; unsigned long pmd_addr = address & PMD_MASK; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 37e2be784ac7..5e64de9c5093 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -814,7 +814,7 @@ extern const struct file_operations ext2_file_operations; /* inode.c */ extern const struct address_space_operations ext2_aops; extern const struct address_space_operations ext2_nobh_aops; -extern struct iomap_ops ext2_iomap_ops; +extern const struct iomap_ops ext2_iomap_ops; /* namei.c */ extern const struct inode_operations ext2_dir_inode_operations; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index f073bfca694b..128cce540645 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -842,13 +842,13 @@ ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, return 0; } -struct iomap_ops ext2_iomap_ops = { +const struct iomap_ops ext2_iomap_ops = { .iomap_begin = ext2_iomap_begin, .iomap_end = ext2_iomap_end, }; #else /* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */ -struct iomap_ops ext2_iomap_ops; +const struct iomap_ops ext2_iomap_ops; #endif /* CONFIG_FS_DAX */ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2163c1e69f2a..ce70403c4707 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3253,7 +3253,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) } } -extern struct iomap_ops ext4_iomap_ops; +extern const struct iomap_ops ext4_iomap_ops; #endif /* __KERNEL__ */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 88d57af1b516..96c2e12cc5d6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3420,7 +3420,7 @@ orphan_del: return ret; } -struct iomap_ops ext4_iomap_ops = { +const struct iomap_ops ext4_iomap_ops = { .iomap_begin = ext4_iomap_begin, .iomap_end = ext4_iomap_end, }; diff --git a/fs/internal.h b/fs/internal.h index b63cf3af2dc2..11c6d89dce9c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -182,7 +182,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, void *data, struct iomap *iomap); loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, - unsigned flags, struct iomap_ops *ops, void *data, + unsigned flags, const struct iomap_ops *ops, void *data, iomap_actor_t actor); /* direct-io.c: */ diff --git a/fs/iomap.c b/fs/iomap.c index 354a123f170e..7f08ca03d95d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -41,7 +41,7 @@ */ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, - struct iomap_ops *ops, void *data, iomap_actor_t actor) + const struct iomap_ops *ops, void *data, iomap_actor_t actor) { struct iomap iomap = { 0 }; loff_t written = 0, ret; @@ -232,7 +232,7 @@ again: ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct inode *inode = iocb->ki_filp->f_mapping->host; loff_t pos = iocb->ki_pos, ret = 0, written = 0; @@ -315,7 +315,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, - struct iomap_ops *ops) + const struct iomap_ops *ops) { loff_t ret; @@ -395,7 +395,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, - struct iomap_ops *ops) + const struct iomap_ops *ops) { loff_t ret; @@ -415,7 +415,7 @@ EXPORT_SYMBOL_GPL(iomap_zero_range); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - struct iomap_ops *ops) + const struct iomap_ops *ops) { unsigned blocksize = (1 << inode->i_blkbits); unsigned off = pos & (blocksize - 1); @@ -443,7 +443,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, } int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) + const struct iomap_ops *ops) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); @@ -542,7 +542,7 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, } int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, - loff_t start, loff_t len, struct iomap_ops *ops) + loff_t start, loff_t len, const struct iomap_ops *ops) { struct fiemap_ctx ctx; loff_t ret; @@ -836,8 +836,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, } ssize_t -iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops, - iomap_dio_end_io_t end_io) +iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, iomap_dio_end_io_t end_io) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1aa3abd67b36..25ed98324b27 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1144,7 +1144,7 @@ xfs_file_iomap_end( return 0; } -struct iomap_ops xfs_iomap_ops = { +const struct iomap_ops xfs_iomap_ops = { .iomap_begin = xfs_file_iomap_begin, .iomap_end = xfs_file_iomap_end, }; @@ -1190,6 +1190,6 @@ out_unlock: return error; } -struct iomap_ops xfs_xattr_iomap_ops = { +const struct iomap_ops xfs_xattr_iomap_ops = { .iomap_begin = xfs_xattr_iomap_begin, }; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 6d45cf01fcff..705224b66b6a 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -33,7 +33,7 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); -extern struct iomap_ops xfs_iomap_ops; -extern struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_iomap_ops; +extern const struct iomap_ops xfs_xattr_iomap_ops; #endif /* __XFS_IOMAP_H__*/ diff --git a/include/linux/dax.h b/include/linux/dax.h index 24ad71173995..2983e52efd07 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -37,9 +37,9 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) } ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops); + const struct iomap_ops *ops); int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); + const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, @@ -72,7 +72,7 @@ static inline unsigned int dax_radix_order(void *entry) return 0; } int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); + pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { @@ -80,7 +80,7 @@ static inline unsigned int dax_radix_order(void *entry) } static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags, - struct iomap_ops *ops) + const struct iomap_ops *ops) { return VM_FAULT_FALLBACK; } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a4c94b86401e..891459caa278 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -72,17 +72,17 @@ struct iomap_ops { }; ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, struct iomap_ops *ops); + bool *did_zero, const struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - loff_t start, loff_t len, struct iomap_ops *ops); + loff_t start, loff_t len, const struct iomap_ops *ops); /* * Flags for direct I/O ->end_io: @@ -92,6 +92,6 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, unsigned flags); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops, iomap_dio_end_io_t end_io); + const struct iomap_ops *ops, iomap_dio_end_io_t end_io); #endif /* LINUX_IOMAP_H */ -- cgit v1.2.3 From 64f61ab6040c9f04ba181cca7580212f23b89f74 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:21:08 -0800 Subject: xfs: remove unused struct declarations After scratching my head looking for "xfs_busy_extent" I realized it's not used; it's xfs_extent_busy, and the declaration for the other name is bogus. Remove that and a few others as well. (struct xfs_log_callback is used, but the 2nd declaration is unnecessary). Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_log_recover.h | 1 - fs/xfs/xfs_log.h | 1 - fs/xfs/xfs_trace.h | 1 - fs/xfs/xfs_trans.h | 1 - 4 files changed, 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index d9f65e2d5cc8..29a01ec89dd0 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -42,7 +42,6 @@ typedef struct xlog_recover_item { xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; -struct xlog_tid; typedef struct xlog_recover { struct hlist_node r_list; xlog_tid_t r_log_tid; /* log's transaction id */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b5e71072fde5..cc5a9f1574e7 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -124,7 +124,6 @@ struct xlog_ticket; struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; -struct xfs_log_callback; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 69c5bcd9a51b..643222784c3b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2245,7 +2245,6 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); /* deferred ops */ struct xfs_defer_pending; -struct xfs_defer_intake; struct xfs_defer_ops; DECLARE_EVENT_CLASS(xfs_defer_class, diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 61b7fbdd3ebd..1646f659b60f 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -32,7 +32,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_trans_res; struct xfs_dquot_acct; -struct xfs_busy_extent; struct xfs_rud_log_item; struct xfs_rui_log_item; struct xfs_btree_cur; -- cgit v1.2.3 From a36b926180cda375ac2ec89e1748b47137cfc51c Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Jan 2017 23:22:55 -0800 Subject: xfs: pull up iolock from xfs_free_eofblocks() xfs_free_eofblocks() requires the IOLOCK_EXCL lock, but is called from different contexts where the lock may or may not be held. The need_iolock parameter exists for this reason, to indicate whether xfs_free_eofblocks() must acquire the iolock itself before it can proceed. This is ugly and confusing. Simplify the semantics of xfs_free_eofblocks() to require the caller to acquire the iolock appropriately and kill the need_iolock parameter. While here, the mp param can be removed as well as the xfs_mount is accessible from the xfs_inode structure. This patch does not change behavior. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 41 ++++++++++++++++------------------------ fs/xfs/xfs_bmap_util.h | 3 +-- fs/xfs/xfs_icache.c | 24 +++++++++++++++--------- fs/xfs/xfs_inode.c | 51 +++++++++++++++++++++++++++----------------------- 4 files changed, 60 insertions(+), 59 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c1417919ab0a..9319ee9759d4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) */ int xfs_free_eofblocks( - xfs_mount_t *mp, - xfs_inode_t *ip, - bool need_iolock) + struct xfs_inode *ip) { - xfs_trans_t *tp; - int error; - xfs_fileoff_t end_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t map_len; - int nimaps; - xfs_bmbt_irec_t imap; + struct xfs_trans *tp; + int error; + xfs_fileoff_t end_fsb; + xfs_fileoff_t last_fsb; + xfs_filblks_t map_len; + int nimaps; + struct xfs_bmbt_irec imap; + struct xfs_mount *mp = ip->i_mount; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); /* * Figure out if there are any blocks beyond the end @@ -944,6 +945,10 @@ xfs_free_eofblocks( error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* + * If there are blocks after the end of file, truncate the file to its + * current size to free them up. + */ if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { @@ -954,22 +959,10 @@ xfs_free_eofblocks( if (error) return error; - /* - * There are blocks after the end of file. - * Free them up now by truncating the file to - * its current size. - */ - if (need_iolock) { - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) - return -EAGAIN; - } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -997,8 +990,6 @@ xfs_free_eofblocks( } xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; } @@ -1415,7 +1406,7 @@ xfs_shift_file_space( * into the accessible region of the file. */ if (xfs_can_free_eofblocks(ip, true)) { - error = xfs_free_eofblocks(mp, ip, false); + error = xfs_free_eofblocks(ip); if (error) return error; } diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 68a621a8e0c0..f1005393785c 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); -int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, - bool need_iolock); +int xfs_free_eofblocks(struct xfs_inode *ip); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, struct xfs_swapext *sx); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 70ca4f608321..c6b698f0fed9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1322,7 +1322,7 @@ xfs_inode_free_eofblocks( int flags, void *args) { - int ret; + int ret = 0; struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; @@ -1358,19 +1358,25 @@ xfs_inode_free_eofblocks( return 0; /* - * A scan owner implies we already hold the iolock. Skip it in - * xfs_free_eofblocks() to avoid deadlock. This also eliminates - * the possibility of EAGAIN being returned. + * A scan owner implies we already hold the iolock. Skip it here + * to avoid deadlock. */ if (eofb->eof_scan_owner == ip->i_ino) need_iolock = false; } - ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); - - /* don't revisit the inode if we're not waiting */ - if (ret == -EAGAIN && !(flags & SYNC_WAIT)) - ret = 0; + /* + * If the caller is waiting, return -EAGAIN to keep the background + * scanner moving and revisit the inode in a subsequent pass. + */ + if (need_iolock && !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (flags & SYNC_WAIT) + ret = -EAGAIN; + return ret; + } + ret = xfs_free_eofblocks(ip); + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index de32f0fe47c8..edfa6a55b064 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1691,33 +1691,35 @@ xfs_release( if (xfs_can_free_eofblocks(ip, false)) { + /* + * Check if the inode is being opened, written and closed + * frequently and we have delayed allocation blocks outstanding + * (e.g. streaming writes from the NFS server), truncating the + * blocks past EOF will cause fragmentation to occur. + * + * In this case don't do the truncation, but we have to be + * careful how we detect this case. Blocks beyond EOF show up as + * i_delayed_blks even when the inode is clean, so we need to + * truncate them away first before checking for a dirty release. + * Hence on the first dirty close we will still remove the + * speculative allocation, but after that we will leave it in + * place. + */ + if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) + return 0; /* * If we can't get the iolock just skip truncating the blocks * past EOF because we could deadlock with the mmap_sem - * otherwise. We'll get another chance to drop them once the + * otherwise. We'll get another chance to drop them once the * last reference to the inode is dropped, so we'll never leak * blocks permanently. - * - * Further, check if the inode is being opened, written and - * closed frequently and we have delayed allocation blocks - * outstanding (e.g. streaming writes from the NFS server), - * truncating the blocks past EOF will cause fragmentation to - * occur. - * - * In this case don't do the truncation, either, but we have to - * be careful how we detect this case. Blocks beyond EOF show - * up as i_delayed_blks even when the inode is clean, so we - * need to truncate them away first before checking for a dirty - * release. Hence on the first dirty close we will still remove - * the speculative allocation, but after that we will leave it - * in place. */ - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) - return 0; - - error = xfs_free_eofblocks(mp, ip, true); - if (error && error != -EAGAIN) - return error; + if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + error = xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + if (error) + return error; + } /* delalloc blocks after truncation means it really is dirty */ if (ip->i_delayed_blks) @@ -1904,8 +1906,11 @@ xfs_inactive( * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. */ - if (xfs_can_free_eofblocks(ip, true)) - xfs_free_eofblocks(mp, ip, false); + if (xfs_can_free_eofblocks(ip, true)) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } return; } -- cgit v1.2.3 From c3155097ad89a956579bc305856a1f2878494e52 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Jan 2017 23:22:56 -0800 Subject: xfs: sync eofblocks scans under iolock are livelock prone The xfs_eofblocks.eof_scan_owner field is an internal field to facilitate invoking eofb scans from the kernel while under the iolock. This is necessary because the eofb scan acquires the iolock of each inode. Synchronous scans are invoked on certain buffered write failures while under iolock. In such cases, the scan owner indicates that the context for the scan already owns the particular iolock and prevents a double lock deadlock. eofblocks scans while under iolock are still livelock prone in the event of multiple parallel scans, however. If multiple buffered writes to different inodes fail and invoke eofblocks scans at the same time, each scan avoids a deadlock with its own inode by virtue of the eof_scan_owner field, but will never be able to acquire the iolock of the inode from the parallel scan. Because the low free space scans are invoked with SYNC_WAIT, the scan will not return until it has processed every tagged inode and thus both scans will spin indefinitely on the iolock being held across the opposite scan. This problem can be reproduced reliably by generic/224 on systems with higher cpu counts (x16). To avoid this problem, simplify the semantics of eofblocks scans to never invoke a scan while under iolock. This means that the buffered write context must drop the iolock before the scan. It must reacquire the lock before the write retry and also repeat the initial write checks, as the original state might no longer be valid once the iolock was dropped. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 13 +++++++++---- fs/xfs/xfs_icache.c | 45 +++++++-------------------------------------- fs/xfs/xfs_icache.h | 2 -- 3 files changed, 16 insertions(+), 44 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bbb9eb6811b2..0a29739f785e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -614,8 +614,10 @@ xfs_file_buffered_aio_write( struct xfs_inode *ip = XFS_I(inode); ssize_t ret; int enospc = 0; - int iolock = XFS_IOLOCK_EXCL; + int iolock; +write_retry: + iolock = XFS_IOLOCK_EXCL; xfs_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); @@ -625,7 +627,6 @@ xfs_file_buffered_aio_write( /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); -write_retry: trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); if (likely(ret >= 0)) @@ -641,18 +642,21 @@ write_retry: * running at the same time. */ if (ret == -EDQUOT && !enospc) { + xfs_iunlock(ip, iolock); enospc = xfs_inode_free_quota_eofblocks(ip); if (enospc) goto write_retry; enospc = xfs_inode_free_quota_cowblocks(ip); if (enospc) goto write_retry; + iolock = 0; } else if (ret == -ENOSPC && !enospc) { struct xfs_eofblocks eofb = {0}; enospc = 1; xfs_flush_inodes(ip->i_mount); - eofb.eof_scan_owner = ip->i_ino; /* for locking */ + + xfs_iunlock(ip, iolock); eofb.eof_flags = XFS_EOF_FLAGS_SYNC; xfs_icache_free_eofblocks(ip->i_mount, &eofb); goto write_retry; @@ -660,7 +664,8 @@ write_retry: current->backing_dev_info = NULL; out: - xfs_iunlock(ip, iolock); + if (iolock) + xfs_iunlock(ip, iolock); return ret; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c6b698f0fed9..7234b9748c36 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1324,11 +1324,8 @@ xfs_inode_free_eofblocks( { int ret = 0; struct xfs_eofblocks *eofb = args; - bool need_iolock = true; int match; - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); @@ -1356,27 +1353,19 @@ xfs_inode_free_eofblocks( if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; - - /* - * A scan owner implies we already hold the iolock. Skip it here - * to avoid deadlock. - */ - if (eofb->eof_scan_owner == ip->i_ino) - need_iolock = false; } /* * If the caller is waiting, return -EAGAIN to keep the background * scanner moving and revisit the inode in a subsequent pass. */ - if (need_iolock && !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { if (flags & SYNC_WAIT) ret = -EAGAIN; return ret; } ret = xfs_free_eofblocks(ip); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } @@ -1423,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks( struct xfs_eofblocks eofb = {0}; struct xfs_dquot *dq; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - /* - * Set the scan owner to avoid a potential livelock. Otherwise, the scan - * can repeatedly trylock on the inode we're currently processing. We - * run a sync scan to increase effectiveness and use the union filter to + * Run a sync scan to increase effectiveness and use the union filter to * cover all applicable quotas in a single scan. */ - eofb.eof_scan_owner = ip->i_ino; eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { @@ -1583,12 +1567,9 @@ xfs_inode_free_cowblocks( { int ret; struct xfs_eofblocks *eofb = args; - bool need_iolock = true; int match; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1621,28 +1602,16 @@ xfs_inode_free_cowblocks( if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; - - /* - * A scan owner implies we already hold the iolock. Skip it in - * xfs_free_eofblocks() to avoid deadlock. This also eliminates - * the possibility of EAGAIN being returned. - */ - if (eofb->eof_scan_owner == ip->i_ino) - need_iolock = false; } /* Free the CoW blocks */ - if (need_iolock) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - } + xfs_ilock(ip, XFS_IOLOCK_EXCL); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); - if (need_iolock) { - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index a1e02f4708ab..8a7c849b4dea 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -27,7 +27,6 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; - xfs_ino_t eof_scan_owner; }; #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ @@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user( dst->eof_flags = src->eof_flags; dst->eof_prid = src->eof_prid; dst->eof_min_file_size = src->eof_min_file_size; - dst->eof_scan_owner = NULLFSINO; dst->eof_uid = INVALID_UID; if (src->eof_flags & XFS_EOF_FLAGS_UID) { -- cgit v1.2.3 From e4229d6b0bc9280f29624faf170cf76a9f1ca60e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 27 Jan 2017 23:22:57 -0800 Subject: xfs: fix eofblocks race with file extending async dio writes It's possible for post-eof blocks to end up being used for direct I/O writes. dio write performs an upfront unwritten extent allocation, sends the dio and then updates the inode size (if necessary) on write completion. If a file release occurs while a file extending dio write is in flight, it is possible to mistake the post-eof blocks for speculative preallocation and incorrectly truncate them from the inode. This means that the resulting dio write completion can discover a hole and allocate new blocks rather than perform unwritten extent conversion. This requires a strange mix of I/O and is thus not likely to reproduce in real world workloads. It is intermittently reproduced by generic/299. The error manifests as an assert failure due to transaction overrun because the aforementioned write completion transaction has only reserved enough blocks for btree operations: XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, \ file: fs/xfs//xfs_trans.c, line: 309 The root cause is that xfs_free_eofblocks() uses i_size to truncate post-eof blocks from the inode, but async, file extending direct writes do not update i_size until write completion, long after inode locks are dropped. Therefore, xfs_free_eofblocks() effectively truncates the inode to the incorrect size. Update xfs_free_eofblocks() to serialize against dio similar to how extending writes are serialized against i_size updates before post-eof block zeroing. Specifically, wait on dio while under the iolock. This ensures that dio write completions have updated i_size before post-eof blocks are processed. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 9319ee9759d4..eb890ed1ed5c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -959,6 +959,9 @@ xfs_free_eofblocks( if (error) return error; + /* wait on dio to ensure i_size has settled */ + inode_dio_wait(VFS_I(ip)); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { -- cgit v1.2.3 From 1dbba0863468f509f3fccb498b34f1b1e24356d9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jan 2017 23:24:28 -0800 Subject: xfs: remove unused full argument from bmap The "full" argument was used only by the fiemap formatter, which is now gone with the iomap updates. Remove the unused arg. Signed-off-by: Eric Sandeen Reviewed-by: Alex Elder Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 6 ++---- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_ioctl.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index eb890ed1ed5c..7c3bfafffba8 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -787,11 +787,9 @@ xfs_getbmap( xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { - int full = 0; /* user array is full */ - /* format results & advance arg */ - error = formatter(&arg, &out[i], &full); - if (error || full) + error = formatter(&arg, &out[i]); + if (error) break; } diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index f1005393785c..135d8267e284 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, xfs_fileoff_t start_fsb, xfs_fileoff_t length); /* bmap to userspace formatter - copy to user & advance pointer */ -typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); +typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, xfs_bmap_format_t formatter, void *arg); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c67cfb451fd3..cf1363dbf32b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1524,7 +1524,7 @@ out_drop_write: } STATIC int -xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmap_format(void **ap, struct getbmapx *bmv) { struct getbmap __user *base = (struct getbmap __user *)*ap; @@ -1567,7 +1567,7 @@ xfs_ioc_getbmap( } STATIC int -xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +xfs_getbmapx_format(void **ap, struct getbmapx *bmv) { struct getbmapx __user *base = (struct getbmapx __user *)*ap; -- cgit v1.2.3 From 4b5bd5bf3fb182dc504b1b64e0331300f156e756 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:13:57 -0800 Subject: xfs: fix toctou race when locking an inode to access the data map We use di_format and if_flags to decide whether we're grabbing the ilock in btree mode (btree extents not loaded) or shared mode (anything else), but the state of those fields can be changed by other threads that are also trying to load the btree extents -- IFEXTENTS gets set before the _bmap_read_extents call and cleared if it fails. We don't actually need to have IFEXTENTS set until after the bmbt records are successfully loaded and validated, which will fix the race between multiple threads trying to read the same directory. The next patch strengthens directory bmbt validation by refusing to open the directory if reading the bmbt to start directory readahead fails. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_fork.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 222e103356c6..421341f93bea 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -497,15 +497,14 @@ xfs_iread_extents( * We know that the size is valid (it's checked in iformat_btree) */ ifp->if_bytes = ifp->if_real_bytes = 0; - ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); error = xfs_bmap_read_extents(tp, ip, whichfork); if (error) { xfs_iext_destroy(ifp); - ifp->if_flags &= ~XFS_IFEXTENTS; return error; } xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); + ifp->if_flags |= XFS_IFEXTENTS; return 0; } /* -- cgit v1.2.3 From 7a652bbe366464267190c2792a32ce4fff5595ef Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:13:58 -0800 Subject: xfs: fail _dir_open when readahead fails When we open a directory, we try to readahead block 0 of the directory on the assumption that we're going to need it soon. If the bmbt is corrupt, the directory will never be usable and the readahead fails immediately, so we might as well prevent the directory from being opened at all. This prevents a subsequent read or modify operation from hitting it and taking the fs offline. NOTE: We're only checking for early failures in the block mapping, not the readahead directory block itself. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_da_btree.c | 6 ++---- fs/xfs/libxfs/xfs_da_btree.h | 2 +- fs/xfs/xfs_file.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index f2dc1a950c85..1bdf2888295b 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2633,7 +2633,7 @@ out_free: /* * Readahead the dir/attr block. */ -xfs_daddr_t +int xfs_da_reada_buf( struct xfs_inode *dp, xfs_dablk_t bno, @@ -2664,7 +2664,5 @@ out_free: if (mapp != &map) kmem_free(mapp); - if (error) - return -1; - return mappedbno; + return error; } diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 98c75cbe6ac2..4e29cb6a3627 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops); -xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, +int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, int whichfork, const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0a29739f785e..032c8a74824a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -913,9 +913,9 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(ip, 0, -1); + error = xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); - return 0; + return error; } STATIC int -- cgit v1.2.3 From d5a91baeb6033c3392121e4d5c011cdc08dfa9f7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:13:58 -0800 Subject: xfs: filter out obviously bad btree pointers Don't let anybody load an obviously bad btree pointer. Since the values come from disk, we must return an error, not just ASSERT. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/libxfs/xfs_bmap.c | 5 +---- fs/xfs/libxfs/xfs_btree.c | 3 ++- fs/xfs/libxfs/xfs_btree.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d3da53e6a927..2e91eb66d32f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1267,7 +1267,6 @@ xfs_bmap_read_extents( /* REFERENCED */ xfs_extnum_t room; /* number of entries there's room for */ - bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : @@ -1280,9 +1279,7 @@ xfs_bmap_read_extents( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLFSBLOCK); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 421efa0ef778..c3decedc9455 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -826,7 +826,8 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - ASSERT(fsbno != NULLFSBLOCK); + if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, mp->m_bsize, lock, &bp, ops); diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index cdd4f05a5976..4bb62580a7fd 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -458,7 +458,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) #define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ + (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) /* -- cgit v1.2.3 From b3bf607d58520ea8c0666aeb4be60dbb724cd3a2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:13:59 -0800 Subject: xfs: check for obviously bad level values in the bmbt root We can't handle a bmbt that's taller than BTREE_MAXLEVELS, and there's no such thing as a zero-level bmbt (for that we have extents format), so if we see this, send back an error code. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_fork.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 421341f93bea..25c1e078aef6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,6 +26,7 @@ #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" +#include "xfs_btree.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -429,11 +430,13 @@ xfs_iformat_btree( /* REFERENCED */ int nrecs; int size; + int level; ifp = XFS_IFORK_PTR(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = XFS_BMAP_BROOT_SPACE(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); + level = be16_to_cpu(dfp->bb_level); /* * blow out if -- fork has less extents than can fit in @@ -446,7 +449,8 @@ xfs_iformat_btree( XFS_IFORK_MAXEXT(ip, whichfork) || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || + level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, -- cgit v1.2.3 From de14c5f541e78c59006bee56f6c5c2ef1ca07272 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:14:00 -0800 Subject: xfs: verify free block header fields Perform basic sanity checking of the directory free block header fields so that we avoid hanging the system on invalid data. (Granted that just means that now we shutdown on directory write, but that seems better than hanging...) Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_dir2_node.c | 51 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 75a557432d0f..bbd1238852b3 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .verify_write = xfs_dir3_free_write_verify, }; +/* Everything ok in the free block header? */ +static bool +xfs_dir3_free_header_check( + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + unsigned int firstdb; + int maxbests; + + maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); + firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - + xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * + maxbests; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; + + if (be32_to_cpu(hdr3->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr3->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) + return false; + } else { + struct xfs_dir2_free_hdr *hdr = bp->b_addr; + + if (be32_to_cpu(hdr->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) + return false; + } + return true; +} static int __xfs_dir3_free_read( @@ -168,11 +204,22 @@ __xfs_dir3_free_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_free_buf_ops); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { + xfs_buf_ioerror(*bpp, -EFSCORRUPTED); + xfs_verifier_error(*bpp); + xfs_trans_brelse(tp, *bpp); + return -EFSCORRUPTED; + } /* try read returns without an error or *bpp if it lands in a hole */ - if (!err && tp && *bpp) + if (tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); - return err; + + return 0; } int -- cgit v1.2.3 From 05a630d76bd3f39baf0eecfa305bed2820796dee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 15:14:01 -0800 Subject: xfs: allow unwritten extents in the CoW fork In the data fork, we only allow extents to perform the following state transitions: delay -> real <-> unwritten There's no way to move directly from a delalloc reservation to an /unwritten/ allocated extent. However, for the CoW fork we want to be able to do the following to each extent: delalloc -> unwritten -> written -> remapped to data fork This will help us to avoid a race in the speculative CoW preallocation code between a first thread that is allocating a CoW extent and a second thread that is remapping part of a file after a write. In order to do this, however, we need two things: first, we have to be able to transition from da to unwritten, and second the function that converts between real and unwritten has to be made aware of the cow fork. Do both of those things. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 80 ++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2e91eb66d32f..dcffbb09444e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1850,6 +1850,7 @@ xfs_bmap_add_extent_delay_real( */ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_state(ep, new->br_state); trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); (*nextents)++; @@ -2188,6 +2189,7 @@ STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ + int whichfork, xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ @@ -2207,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - struct xfs_mount *mp = tp->t_m