summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_btree.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2014-06-25 14:57:53 +1000
committerDave Chinner <david@fromorbit.com>2014-06-25 14:57:53 +1000
commit30f712c9dd69348aa51351d5cb6d366bf4fae31d (patch)
tree9b246ca3986b784d8f9f6841466dfd4c90909e47 /fs/xfs/libxfs/xfs_btree.c
parent84be0ffc9043f7c56044294eb775a2200452c76d (diff)
downloadlinux-30f712c9dd69348aa51351d5cb6d366bf4fae31d.tar.gz
linux-30f712c9dd69348aa51351d5cb6d366bf4fae31d.tar.bz2
linux-30f712c9dd69348aa51351d5cb6d366bf4fae31d.zip
libxfs: move source files
Move all the source files that are shared with userspace into libxfs/. This is done as one big chunk simpy to get it done quickly Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_btree.c')
-rw-r--r--fs/xfs/libxfs/xfs_btree.c3989
1 files changed, 3989 insertions, 0 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
new file mode 100644
index 000000000000..036b4fd34bf7
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -0,0 +1,3989 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_btree.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+
+/*
+ * Cursor allocation zone.
+ */
+kmem_zone_t *xfs_btree_cur_zone;
+
+/*
+ * Btree magic numbers.
+ */
+static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+ { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+ XFS_FIBT_MAGIC },
+ { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
+};
+#define xfs_btree_magic(cur) \
+ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
+
+
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree long form block pointer */
+ int level, /* level of the btree block */
+ struct xfs_buf *bp) /* buffer for block, if any */
+{
+ int lblock_ok = 1; /* block passes checks */
+ struct xfs_mount *mp; /* file system mount point */
+
+ mp = cur->bc_mp;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ lblock_ok = lblock_ok &&
+ uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.l.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ lblock_ok = lblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be16_to_cpu(block->bb_level) == level &&
+ be16_to_cpu(block->bb_numrecs) <=
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ block->bb_u.l.bb_leftsib &&
+ (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+ block->bb_u.l.bb_rightsib &&
+ (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_rightsib)));
+
+ if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
+ XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+ XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+ if (bp)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ return EFSCORRUPTED;
+ }
+ return 0;
+}
+
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree short form block pointer */
+ int level, /* level of the btree block */
+ struct xfs_buf *bp) /* buffer containing block */
+{
+ struct xfs_mount *mp; /* file system mount point */
+ struct xfs_buf *agbp; /* buffer for ag. freespace struct */
+ struct xfs_agf *agf; /* ag. freespace structure */
+ xfs_agblock_t agflen; /* native ag. freespace length */
+ int sblock_ok = 1; /* block passes checks */
+
+ mp = cur->bc_mp;
+ agbp = cur->bc_private.a.agbp;
+ agf = XFS_BUF_TO_AGF(agbp);
+ agflen = be32_to_cpu(agf->agf_length);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ sblock_ok = sblock_ok &&
+ uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.s.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ sblock_ok = sblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be16_to_cpu(block->bb_level) == level &&
+ be16_to_cpu(block->bb_numrecs) <=
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
+ be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
+ block->bb_u.s.bb_leftsib &&
+ (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
+ be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
+ block->bb_u.s.bb_rightsib;
+
+ if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
+ XFS_ERRTAG_BTREE_CHECK_SBLOCK,
+ XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+ if (bp)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ return EFSCORRUPTED;
+ }
+ return 0;
+}
+
+/*
+ * Debug routine: check that block header is ok.
+ */
+int
+xfs_btree_check_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* generic btree block pointer */
+ int level, /* level of the btree block */
+ struct xfs_buf *bp) /* buffer containing block, if any */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_check_lblock(cur, block, level, bp);
+ else
+ return xfs_btree_check_sblock(cur, block, level, bp);
+}
+
+/*
+ * Check that (long) pointer is ok.
+ */
+int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_dfsbno_t bno, /* btree block disk address */
+ int level) /* btree block level */
+{
+ XFS_WANT_CORRUPTED_RETURN(
+ level > 0 &&
+ bno != NULLDFSBNO &&
+ XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
+ return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that (short) pointer is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* btree block disk address */
+ int level) /* btree block level */
+{
+ xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
+
+ XFS_WANT_CORRUPTED_RETURN(
+ level > 0 &&
+ bno != NULLAGBLOCK &&
+ bno != 0 &&
+ bno < agblocks);
+ return 0;
+}
+
+/*
+ * Check that block ptr is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_ptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ union xfs_btree_ptr *ptr, /* btree block disk address */
+ int index, /* offset from ptr to check */
+ int level) /* btree block level */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ return xfs_btree_check_lptr(cur,
+ be64_to_cpu((&ptr->l)[index]), level);
+ } else {
+ return xfs_btree_check_sptr(cur,
+ be32_to_cpu((&ptr->s)[index]), level);
+ }
+}
+#endif
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * long-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_lblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_lblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
+ return true;
+}
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * short-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_sblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_sblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
+ return true;
+}
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int error) /* del because of error */
+{
+ int i; /* btree level */
+
+ /*
+ * Clear the buffer pointers, and release the buffers.
+ * If we're doing this in the face of an error, we
+ * need to make sure to inspect all of the entries
+ * in the bc_bufs array for buffers to be unlocked.
+ * This is because some of the btree code works from
+ * level n down to 0, and if we get an error along
+ * the way we won't have initialized all the entries
+ * down to 0.
+ */
+ for (i = 0; i < cur->bc_nlevels; i++) {
+ if (cur->bc_bufs[i])
+ xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+ else if (!error)
+ break;
+ }
+ /*
+ * Can't free a bmap cursor without having dealt with the
+ * allocated indirect blocks' accounting.
+ */
+ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+ cur->bc_private.b.allocated == 0);
+ /*
+ * Free the cursor.
+ */
+ kmem_zone_free(xfs_btree_cur_zone, cur);
+}
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int /* error */
+xfs_btree_dup_cursor(
+ xfs_btree_cur_t *cur, /* input cursor */
+ xfs_btree_cur_t **ncur) /* output cursor */
+{
+ xfs_buf_t *bp; /* btree block's buffer pointer */
+ int error; /* error return value */
+ int i; /* level number of btree block */
+ xfs_mount_t *mp; /* mount structure for filesystem */
+ xfs_btree_cur_t *new; /* new cursor value */
+ xfs_trans_t *tp; /* transaction pointer, can be NULL */
+
+ tp = cur->bc_tp;
+ mp = cur->bc_mp;
+
+ /*
+ * Allocate a new cursor like the old one.
+ */
+ new = cur->bc_ops->dup_cursor(cur);
+
+ /*
+ * Copy the record currently in the cursor.
+ */
+ new->bc_rec = cur->bc_rec;
+
+ /*
+ * For each level current, re-get the buffer and copy the ptr value.
+ */
+ for (i = 0; i < new->bc_nlevels; i++) {
+ new->bc_ptrs[i] = cur->bc_ptrs[i];
+ new->bc_ra[i] = cur->bc_ra[i];
+ bp = cur->bc_bufs[i];
+ if (bp) {
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_BUF_ADDR(bp), mp->m_bsize,
+ 0, &bp,
+ cur->bc_ops->buf_ops);
+ if (error) {
+ xfs_btree_del_cursor(new, error);
+ *ncur = NULL;
+ return error;
+ }
+ }
+ new->bc_bufs[i] = bp;
+ }
+ *ncur = new;
+ return 0;
+}
+
+/*
+ * XFS btree block layout and addressing:
+ *
+ * There are two types of blocks in the btree: leaf and non-leaf blocks.
+ *
+ * The leaf record start with a header then followed by records containing
+ * the values. A non-leaf block also starts with the same header, and
+ * then first contains lookup keys followed by an equal number of pointers
+ * to the btree blocks at the previous level.
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * The header is called struct xfs_btree_block for reasons better left unknown
+ * and comes in different versions for short (32bit) and long (64bit) block
+ * pointers. The record and key structures are defined by the btree instances
+ * and opaque to the btree core. The block pointers are simple disk endian
+ * integers, available in a short (32bit) and long (64bit) variant.
+ *
+ * The helpers below calculate the offset of a given record, key or pointer
+ * into a btree block (xfs_btree_*_offset) or return a pointer to the given
+ * record, key or pointer (xfs_btree_*_addr). Note that all addressing
+ * inside the btree block is done using indices starting at one, not zero!
+ */
+
+/*
+ * Return size of the btree block header for this btree instance.
+ */
+static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_LBLOCK_CRC_LEN;
+ return XFS_BTREE_LBLOCK_LEN;
+ }
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_SBLOCK_CRC_LEN;
+ return XFS_BTREE_SBLOCK_LEN;
+}
+
+/*
+ * Return size of btree block pointers for this btree instance.
+ */
+static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
+{
+ return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ sizeof(__be64) : sizeof(__be32);
+}
+
+/*
+ * Calculate offset of the n-th record in a btree block.
+ */
+STATIC size_t
+xfs_btree_rec_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->rec_len;
+}
+
+/*
+ * Calculate offset of the n-th key in a btree block.
+ */
+STATIC size_t
+xfs_btree_key_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->key_len;
+}
+
+/*
+ * Calculate offset of the n-th block pointer in a btree block.
+ */
+STATIC size_t
+xfs_btree_ptr_offset(
+ struct xfs_btree_cur *cur,
+ int n,
+ int level)
+{
+ return xfs_btree_block_len(cur) +
+ cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
+ (n - 1) * xfs_btree_ptr_len(cur);
+}
+
+/*
+ * Return a pointer to the n-th record in the btree block.
+ */
+STATIC union xfs_btree_rec *
+xfs_btree_rec_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_rec *)
+ ((char *)block + xfs_btree_rec_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th key in the btree block.
+ */
+STATIC union xfs_btree_key *
+xfs_btree_key_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_key *)
+ ((char *)block + xfs_btree_key_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th block pointer in the btree block.
+ */
+STATIC union xfs_btree_ptr *
+xfs_btree_ptr_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ int level = xfs_btree_get_level(block);
+
+ ASSERT(block->bb_level != 0);
+
+ return (union xfs_btree_ptr *)
+ ((char *)block + xfs_btree_ptr_offset(cur, n, level));
+}
+
+/*
+ * Get the root block which is stored in the inode.
+ *
+ * For now this btree implementation assumes the btree root is always
+ * stored in the if_broot field of an inode fork.
+ */
+STATIC struct xfs_btree_block *
+xfs_btree_get_iroot(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+ return (struct xfs_btree_block *)ifp->if_broot;
+}
+
+/*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be an inode btree root or from a buffer.
+ */
+STATIC struct xfs_btree_block * /* generic btree block pointer */
+xfs_btree_get_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level in btree */
+ struct xfs_buf **bpp) /* buffer containing the block */
+{
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1)) {
+ *bpp = NULL;
+ return xfs_btree_get_iroot(cur);
+ }
+
+ *bpp = cur->bc_bufs[level];
+ return XFS_BUF_TO_BLOCK(*bpp);
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+xfs_buf_t * /* buffer for fsbno */
+xfs_btree_get_bufl(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_fsblock_t fsbno, /* file system block number */
+ uint lock) /* lock flags for get_buf */
+{
+ xfs_daddr_t d; /* real disk block address */
+
+ ASSERT(fsbno != NULLFSBLOCK);
+ d = XFS_FSB_TO_DADDR(mp, fsbno);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+xfs_buf_t * /* buffer for agno/agbno */
+xfs_btree_get_bufs(
+ xfs_mount_t *mp, /* file system mount point */
+ xfs_trans_t *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_agblock_t agbno, /* allocation group block number */
+ uint lock) /* lock flags for get_buf */
+{
+ xfs_daddr_t d; /* real disk block address */
+
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agbno != NULLAGBLOCK);
+ d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+}
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int /* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int level) /* level to check */
+{
+ struct xfs_btree_block *block; /* generic btree block pointer */
+ xfs_buf_t *bp; /* buffer containing block */
+
+ block = xfs_btree_get_block(cur, level, &bp);
+ xfs_btree_check_block(cur, block, level, bp);
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
+ else
+ return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Change the cursor to point to the first record at the given level.
+ * Other levels are unaffected.
+ */
+STATIC int /* success=1, failure=0 */
+xfs_btree_firstrec(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int level) /* level to change */
+{
+ struct xfs_btree_block *block; /* generic btree block pointer */
+ xfs_buf_t *bp; /* buffer containing block */
+
+ /*
+ * Get the block pointer for this level.
+ */
+ block = xfs_btree_get_block(cur, level, &bp);
+ xfs_btree_check_block(cur, block, level, bp);
+ /*
+ * It's empty, there is no such record.
+ */
+ if (!block->bb_numrecs)
+ return 0;
+ /*
+ * Set the ptr value to 1, that's the first record/key.
+ */
+ cur->bc_ptrs[level] = 1;
+ return 1;
+}
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level. Other levels are unaffected.
+ */
+STATIC int /* success=1, failure=0 */
+xfs_btree_lastrec(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int level) /* level to change */
+{
+ struct xfs_btree_block *block; /* generic btree block pointer */
+ xfs_buf_t *bp; /* buffer containing block */
+
+ /*
+ * Get the block pointer for this level.
+ */
+ block = xfs_btree_get_block(cur, level, &bp);
+ xfs_btree_check_block(cur, block, level, bp);
+ /*
+ * It's empty, there is no such record.
+ */
+ if (!block->bb_numrecs)
+ return 0;
+ /*
+ * Set the ptr value to numrecs, that's the last record/key.
+ */
+ cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
+ return 1;
+}
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+ __int64_t fields, /* bitmask of fields */
+ const short *offsets, /* table of field offsets */
+ int nbits, /* number of bits to inspect */
+ int *first, /* output: first byte offset */
+ int *last) /* output: last byte offset */
+{
+ int i; /* current bit number */
+ __int64_t imask; /* mask for current bit number */
+
+ ASSERT(fields != 0);
+ /*
+ * Find the lowest bit, so the first byte offset.
+ */
+ for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
+ if (imask & fields) {
+ *first = offsets[i];
+ break;
+ }
+ }
+ /*
+ * Find the highest bit, so the last byte offset.
+ */
+ for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
+ if (imask & fields) {
+ *last = offsets[i + 1] - 1;
+ break;
+ }
+ }
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int
+xfs_btree_read_bufl(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_fsblock_t fsbno, /* file system block number */
+ uint lock, /* lock flags for read_buf */
+ struct xfs_buf **bpp, /* buffer for fsbno */
+ int refval, /* ref count value for buffer */
+ const struct xfs_buf_ops *ops)
+{
+ struct xfs_buf *bp; /* return value */
+ xfs_daddr_t d; /* real disk block address */
+ int error;
+
+ ASSERT(fsbno != NULLFSBLOCK);
+ d = XFS_FSB_TO_DADDR(mp, fsbno);
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+ mp->m_bsize, lock, &bp, ops);
+ if (error)
+ return error;
+ if (bp)
+ xfs_buf_set_ref(bp, refval);
+ *bpp = bp;
+ return 0;
+}
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Long-form addressing.
+ */
+/* ARGSUSED */
+void
+xfs_btree_reada_bufl(
+ struct xfs_mount *mp, /* file system mount point */
+ xfs_fsblock_t fsbno, /* file system block number */
+ xfs_extlen_t count, /* count of filesystem blocks */
+ const struct xfs_buf_ops *ops)
+{
+ xfs_daddr_t d;
+
+ ASSERT(fsbno != NULLFSBLOCK);
+ d = XFS_FSB_TO_DADDR(mp, fsbno);
+ xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+}
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Short-form addressing.
+ */
+/* ARGSUSED */
+void
+xfs_btree_reada_bufs(
+ struct xfs_mount *mp, /* file system mount point */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_agblock_t agbno, /* allocation group block number */
+ xfs_extlen_t count, /* count of filesystem blocks */
+ const struct xfs_buf_ops *ops)
+{
+ xfs_daddr_t d;
+
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agbno != NULLAGBLOCK);
+ d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+}
+
+STATIC int
+xfs_btree_readahead_lblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, left, 1,
+ cur->bc_ops->buf_ops);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, right, 1,
+ cur->bc_ops->buf_ops);
+ rval++;
+ }
+
+ return rval;
+}
+
+STATIC int
+xfs_btree_readahead_sblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
+ xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
+
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ left, 1, cur->bc_ops->buf_ops);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ right, 1, cur->bc_ops->buf_ops);
+ rval++;
+ }
+
+ return rval;
+}
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+STATIC int
+xfs_btree_readahead(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int lev, /* level in btree */
+ int lr) /* left/right bits */
+{
+ struct xfs_btree_block *block;
+
+ /*
+ * No readahead needed if we are at the root level and the
+ * btree root is stored in the inode.
+ */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (lev == cur->bc_nlevels - 1))
+ return 0;
+
+ if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+ return 0;
+
+ cur->bc_ra[lev] |= lr;
+ block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_readahead_lblock(cur, lr, block);
+ return xfs_btree_readahead_sblock(cur, lr, block);
+}
+
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+
+ return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ } else {
+ ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+ ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+
+ return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+ be32_to_cpu(ptr->s));
+ }
+}
+
+/*
+ * Readahead @count btree blocks at the given @ptr location.
+ *
+ * We don't need to care about long or short form btrees here as we have a
+ * method of converting the ptr directly to a daddr available to us.
+ */
+STATIC void
+xfs_btree_readahead_ptr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ xfs_extlen_t count)
+{
+ xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
+ xfs_btree_ptr_to_daddr(cur, ptr),
+ cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+}
+
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+STATIC void
+xfs_btree_setbuf(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int lev, /* level in btree */
+ xfs_buf_t *bp) /* new buffer to set */
+{
+ struct xfs_btree_block *b; /* btree block */
+
+ if (cur->bc_bufs[lev])
+ xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
+ cur->bc_bufs[lev] = bp;
+ cur->bc_ra[lev] = 0;
+
+ b = XFS_BUF_TO_BLOCK(bp);
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
+ cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+ if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
+ cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+ } else {
+ if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
+ cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+ if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+ cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+ }
+}
+
+STATIC int
+xfs_btree_ptr_is_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return ptr->l == cpu_to_be64(NULLDFSBNO);
+ else
+ return ptr->s == cpu_to_be32(NULLAGBLOCK);
+}
+
+STATIC void
+xfs_btree_set_ptr_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(NULLDFSBNO);
+ else
+ ptr->s = cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Get/set/init sibling pointers
+ */
+STATIC void
+xfs_btree_get_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->l = block->bb_u.l.bb_rightsib;
+ else
+ ptr->l = block->bb_u.l.bb_leftsib;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->s = block->bb_u.s.bb_rightsib;
+ else
+ ptr->s = block->bb_u.s.bb_leftsib;
+ }
+}
+
+STATIC void
+xfs_btree_set_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.l.bb_rightsib = ptr->l;
+ else
+ block->bb_u.l.bb_leftsib = ptr->l;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.s.bb_rightsib = ptr->s;
+ else
+ block->bb_u.s.bb_leftsib = ptr->s;
+ }
+}
+
+void
+xfs_btree_init_block_int(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *buf,
+ xfs_daddr_t blkno,
+ __u32 magic,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner,
+ unsigned int flags)
+{
+ buf->bb_magic = cpu_to_be32(magic);
+ buf->bb_level = cpu_to_be16(level);
+ buf->bb_numrecs = cpu_to_be16(numrecs);
+
+ if (flags & XFS_BTREE_LONG_PTRS) {
+ buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.l.bb_owner = cpu_to_be64(owner);
+ uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
+ buf->bb_u.l.bb_pad = 0;
+ buf->bb_u.l.bb_lsn = 0;
+ }
+ } else {
+ /* owner is a 32 bit value on short blocks */
+ __u32 __owner = (__u32)owner;
+
+ buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+ uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+ buf->bb_u.s.bb_lsn = 0;
+ }
+ }
+}
+
+void
+xfs_btree_init_block(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ __u32 magic,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner,
+ unsigned int flags)
+{
+ xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ magic, level, numrecs, owner, flags);
+}
+
+STATIC void
+xfs_btree_init_block_cur(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int level,
+ int numrecs)
+{
+ __u64 owner;
+
+ /*
+ * we can pull the owner from the cursor right now as the different
+ * owners align directly with the pointer size of the btree. This may
+ * change in future, but is safe for current users of the generic btree
+ * code.
+ */
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ owner = cur->bc_private.b.ip->i_ino;
+ else
+ owner = cur->bc_private.a.agno;
+
+ xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ xfs_btree_magic(cur), level, numrecs,
+ owner, cur->bc_flags);
+}
+
+/*
+ * Return true if ptr is the last record in the btree and
+ * we need to track updates to this record. The decision
+ * will be further refined in the update_lastrec method.
+ */
+STATIC int
+xfs_btree_is_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level)
+{
+ union xfs_btree_ptr ptr;
+
+ if (level > 0)
+ return 0;
+ if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+ return 0;
+
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &ptr))
+ return 0;
+ return 1;
+}
+
+STATIC void
+xfs_btree_buf_to_ptr(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ else {
+ ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ }
+}
+
+STATIC void
+xfs_btree_set_refs(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ switch (cur->bc_btnum) {
+ case XFS_BTNUM_BNO:
+ case XFS_BTNUM_CNT:
+ xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
+ break;
+ case XFS_BTNUM_INO:
+ case XFS_BTNUM_FINO:
+ xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
+ break;
+ case XFS_BTNUM_BMAP:
+ xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
+ break;
+ default:
+ ASSERT(0);