diff options
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/pgtable.h | 4 | ||||
-rw-r--r-- | arch/powerpc/include/asm/nohash/64/pgtable.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 17 | ||||
-rw-r--r-- | drivers/staging/erofs/utils.c | 18 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 2 | ||||
-rw-r--r-- | fs/dax.c | 112 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 2 | ||||
-rw-r--r-- | include/linux/radix-tree.h | 36 | ||||
-rw-r--r-- | include/linux/swapops.h | 19 | ||||
-rw-r--r-- | include/linux/xarray.h | 102 | ||||
-rw-r--r-- | lib/idr.c | 60 | ||||
-rw-r--r-- | lib/radix-tree.c | 21 | ||||
-rw-r--r-- | mm/filemap.c | 10 | ||||
-rw-r--r-- | mm/khugepaged.c | 2 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/mincore.c | 2 | ||||
-rw-r--r-- | mm/readahead.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 10 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 12 | ||||
-rw-r--r-- | mm/workingset.c | 13 | ||||
-rw-r--r-- | tools/testing/radix-tree/idr-test.c | 6 | ||||
-rw-r--r-- | tools/testing/radix-tree/linux/radix-tree.h | 1 | ||||
-rw-r--r-- | tools/testing/radix-tree/multiorder.c | 47 | ||||
-rw-r--r-- | tools/testing/radix-tree/test.c | 2 |
26 files changed, 278 insertions, 232 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2fdc865ca374..62039e557ac0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -723,9 +723,7 @@ static inline bool pte_user(pte_t pte) BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ } while (0) -/* - * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; - */ + #define SWP_TYPE_BITS 5 #define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ & ((1UL << SWP_TYPE_BITS) - 1)) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 7cd6809f4d33..05765c2d2c1f 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -313,9 +313,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define MAX_SWAPFILES_CHECK() do { \ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ } while (0) -/* - * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; - */ + #define SWP_TYPE_BITS 5 #define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ & ((1UL << SWP_TYPE_BITS) - 1)) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fcc73a6ab503..316730b45f84 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5996,7 +5996,8 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, count = __sg_page_count(sg); while (idx + count <= n) { - unsigned long exception, i; + void *entry; + unsigned long i; int ret; /* If we cannot allocate and insert this entry, or the @@ -6011,12 +6012,9 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (ret && ret != -EEXIST) goto scan; - exception = - RADIX_TREE_EXCEPTIONAL_ENTRY | - idx << RADIX_TREE_EXCEPTIONAL_SHIFT; + entry = xa_mk_value(idx); for (i = 1; i < count; i++) { - ret = radix_tree_insert(&iter->radix, idx + i, - (void *)exception); + ret = radix_tree_insert(&iter->radix, idx + i, entry); if (ret && ret != -EEXIST) goto scan; } @@ -6054,15 +6052,14 @@ lookup: GEM_BUG_ON(!sg); /* If this index is in the middle of multi-page sg entry, - * the radixtree will contain an exceptional entry that points + * the radix tree will contain a value entry that points * to the start of that range. We will return the pointer to * the base page and the offset of this page within the * sg entry's range. */ *offset = 0; - if (unlikely(radix_tree_exception(sg))) { - unsigned long base = - (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + if (unlikely(xa_is_value(sg))) { + unsigned long base = xa_to_value(sg); sg = radix_tree_lookup(&iter->radix, base); GEM_BUG_ON(!sg); diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c index 595cf90af9bb..bdee9bd09f11 100644 --- a/drivers/staging/erofs/utils.c +++ b/drivers/staging/erofs/utils.c @@ -35,7 +35,6 @@ static atomic_long_t erofs_global_shrink_cnt; #ifdef CONFIG_EROFS_FS_ZIP -/* radix_tree and the future XArray both don't use tagptr_t yet */ struct erofs_workgroup *erofs_find_workgroup( struct super_block *sb, pgoff_t index, bool *tag) { @@ -47,9 +46,8 @@ repeat: rcu_read_lock(); grp = radix_tree_lookup(&sbi->workstn_tree, index); if (grp != NULL) { - *tag = radix_tree_exceptional_entry(grp); - grp = (void *)((unsigned long)grp & - ~RADIX_TREE_EXCEPTIONAL_ENTRY); + *tag = xa_pointer_tag(grp); + grp = xa_untag_pointer(grp); if (erofs_workgroup_get(grp, &oldcount)) { /* prefer to relax rcu read side */ @@ -83,9 +81,7 @@ int erofs_register_workgroup(struct super_block *sb, sbi = EROFS_SB(sb); erofs_workstn_lock(sbi); - if (tag) - grp = (void *)((unsigned long)grp | - 1UL << RADIX_TREE_EXCEPTIONAL_SHIFT); + grp = xa_tag_pointer(grp, tag); err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); @@ -131,9 +127,7 @@ repeat: for (i = 0; i < found; ++i) { int cnt; - struct erofs_workgroup *grp = (void *) - ((unsigned long)batch[i] & - ~RADIX_TREE_EXCEPTIONAL_ENTRY); + struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); first_index = grp->index + 1; @@ -150,8 +144,8 @@ repeat: #endif continue; - if (radix_tree_delete(&sbi->workstn_tree, - grp->index) != grp) { + if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, + grp->index)) != grp) { #ifdef EROFS_FS_HAS_MANAGED_CACHE skip: erofs_workgroup_unfreeze(grp, 1); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9bfa66592aa7..fd25e125303c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -440,7 +440,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, rcu_read_lock(); page = radix_tree_lookup(&mapping->i_pages, pg_index); rcu_read_unlock(); - if (page && !radix_tree_exceptional_entry(page)) { + if (page && !xa_is_value(page)) { misses++; if (misses > 4) break; @@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void) fs_initcall(init_dax_wait_table); /* - * We use lowest available bit in exceptional entry for locking, one bit for - * the entry size (PMD) and two more to tell us if the entry is a zero page or - * an empty entry that is just used for locking. In total four special bits. + * DAX pagecache entries use XArray value entries so they can't be mistaken + * for pages. We use one bit for locking, one bit for the entry size (PMD) + * and two more to tell us if the entry is a zero page or an empty entry that + * is just used for locking. In total four special bits. * * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem * block allocation. */ -#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) -#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) -#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) +#define DAX_SHIFT (4) +#define DAX_LOCKED (1UL << 0) +#define DAX_PMD (1UL << 1) +#define DAX_ZERO_PAGE (1UL << 2) +#define DAX_EMPTY (1UL << 3) static unsigned long dax_radix_pfn(void *entry) { - return (unsigned long)entry >> RADIX_DAX_SHIFT; + return xa_to_value(entry) >> DAX_SHIFT; } static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) { - return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | - (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); + return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) | + DAX_LOCKED); } static unsigned int dax_radix_order(void *entry) { - if ((unsigned long)entry & RADIX_DAX_PMD) + if (xa_to_value(entry) & DAX_PMD) return PMD_SHIFT - PAGE_SHIFT; return 0; } static int dax_is_pmd_entry(void *entry) { - return (unsigned long)entry & RADIX_DAX_PMD; + return xa_to_value(entry) & DAX_PMD; } static int dax_is_pte_entry(void *entry) { - return !((unsigned long)entry & RADIX_DAX_PMD); + return !(xa_to_value(entry) & DAX_PMD); } static int dax_is_zero_entry(void *entry) { - return (unsigned long)entry & RADIX_DAX_ZERO_PAGE; + return xa_to_value(entry) & DAX_ZERO_PAGE; } static int dax_is_empty_entry(void *entry) { - return (unsigned long)entry & RADIX_DAX_EMPTY; + return xa_to_value(entry) & DAX_EMPTY; } /* @@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, */ static inline int slot_locked(struct address_space *mapping, void **slot) { - unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); - return entry & RADIX_DAX_ENTRY_LOCK; + unsigned long entry = xa_to_value( + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); + return entry & DAX_LOCKED; } /* @@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot) */ static inline void *lock_slot(struct address_space *mapping, void **slot) { - unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); - - entry |= RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); - return (void *)entry; + unsigned long v = xa_to_value( + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); + void *entry = xa_mk_value(v | DAX_LOCKED); + radix_tree_replace_slot(&mapping->i_pages, slot, entry); + return entry; } /* @@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) */ static inline void *unlock_slot(struct address_space *mapping, void **slot) { - unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); - - entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); - return (void *)entry; + unsigned long v = xa_to_value( + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); + void *entry = xa_mk_value(v & ~DAX_LOCKED); + radix_tree_replace_slot(&mapping->i_pages, slot, entry); + return entry; } /* * Lookup entry in radix tree, wait for it to become unlocked if it is - * exceptional entry and return it. The caller must call + * a DAX entry and return it. The caller must call * put_unlocked_mapping_entry() when he decided not to lock the entry or * put_locked_mapping_entry() when he locked the entry and now wants to * unlock it. @@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping, entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (!entry || - WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || + WARN_ON_ONCE(!xa_is_value(entry)) || !slot_locked(mapping, slot)) { if (slotp) *slotp = slot; @@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) xa_lock_irq(&mapping->i_pages); entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); - if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || + if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || !slot_locked(mapping, slot))) { xa_unlock_irq(&mapping->i_pages); return; @@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page) } /* - * Find radix tree entry at given index. If it points to an exceptional entry, - * return it with the radix tree entry locked. If the radix tree doesn't - * contain given index, create an empty exceptional entry for the index and - * return with it locked. + * Find radix tree entry at given index. If it is a DAX entry, return it + * with the radix tree entry locked. If the radix tree doesn't contain the + * given index, create an empty entry for the index and return with it locked. * - * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will + * When requesting an entry with size DAX_PMD, grab_mapping_entry() will * either return that locked entry or will return an error. This error will * happen if there are any 4k entries within the 2MiB range that we are * requesting. @@ -507,13 +505,13 @@ restart: xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, &slot); - if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { + if (WARN_ON_ONCE(entry && !xa_is_value(entry))) { entry = ERR_PTR(-EIO); goto out_unlock; } if (entry) { - if (size_flag & RADIX_DAX_PMD) { + if (size_flag & DAX_PMD) { if (dax_is_pte_entry(entry)) { put_unlocked_mapping_entry(mapping, index, entry); @@ -584,7 +582,7 @@ restart: true); } - entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); + entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY); err = __radix_tree_insert(&mapping->i_pages, index, dax_radix_order(entry), entry); @@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) if (index >= end) break; - if (WARN_ON_ONCE( - !radix_tree_exceptional_entry(pvec_ent))) + if (WARN_ON_ONCE(!xa_is_value(pvec_ent))) continue; xa_lock_irq(&mapping->i_pages); @@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, xa_lock_irq(pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) + if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto out; if (!trunc && (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || @@ -729,8 +726,8 @@ out: return ret; } /* - * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree - * entry to get unlocked before deleting it. + * Delete DAX entry at @index from @mapping. Wait for it + * to be unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { @@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the * radix tree (usually fs-private i_mmap_sem for writing). Since the - * caller has seen exceptional entry for this index, we better find it + * caller has seen a DAX entry for this index, we better find it * at that index as well... */ WARN_ON_ONCE(!ret); @@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) } /* - * Invalidate exceptional DAX entry if it is clean. + * Invalidate DAX entry if it is clean. */ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index) @@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { + if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, @@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev, * A page got tagged dirty in DAX mapping? Something is seriously * wrong. */ - if (WARN_ON(!radix_tree_exceptional_entry(entry))) + if (WARN_ON(!xa_is_value(entry))) return -EIO; xa_lock_irq(pages); entry2 = get_unlocked_mapping_entry(mapping, index, &slot); /* Entry got punched out / reallocated? */ - if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) + if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2))) goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. We have to @@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry, pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; - dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, - false); + dax_insert_mapping_entry(mapping, vmf, entry, pfn, + DAX_ZERO_PAGE, false); + ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); return ret; @@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, pfn = page_to_pfn_t(zero_page); ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, - RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); + DAX_PMD | DAX_ZERO_PAGE, false); ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { @@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * is already in the tree, for instance), it will return -EEXIST and * we just fall back to 4k entries. */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + entry = grab_mapping_entry(mapping, pgoff, DAX_PMD); if (IS_ERR(entry)) goto fallback; @@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, - RADIX_DAX_PMD, write && !sync); + DAX_PMD, write && !sync); /* * If we are doing synchronous page fault and inode needs fsync, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5ea1d64cb0b4..669abb617321 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -521,7 +521,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - if (radix_tree_exceptional_entry(page)) + if (xa_is_value(page)) mss->swap += PAGE_SIZE; else put_page(page); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 34149e8b5f73..e9e76ab4fbbf 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -28,34 +28,26 @@ #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/xarray.h> /* * The bottom two bits of the slot determine how the remaining bits in the * slot are interpreted: * * 00 - data pointer - * 01 - internal entry - * 10 - exceptional entry - * 11 - this bit combination is currently unused/reserved + * 10 - internal entry + * x1 - value entry * * The internal entry may be a pointer to the next level in the tree, a * sibling entry, or an indicator that the entry in this slot has been moved * to another location in the tree and the lookup should be restarted. While * NULL fits the 'data pointer' pattern, it means that there is no entry in * the tree for this index (no matter what level of the tree it is found at). - * This means that you cannot store NULL in the tree as a value for the index. + * This means that storing a NULL entry in the tree is the same as deleting + * the entry from the tree. */ #define RADIX_TREE_ENTRY_MASK 3UL -#define RADIX_TREE_INTERNAL_NODE 1UL - -/* - * Most users of the radix tree store pointers but shmem/tmpfs stores swap - * entries in the same tree. They are marked as exceptional entries to - * distinguish them from pointers to struct page. - * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it. - */ -#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 -#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 +#define RADIX_TREE_INTERNAL_NODE 2UL static inline bool radix_tree_is_internal_node(void *ptr) { @@ -83,11 +75,10 @@ static inline bool radix_tree_is_internal_node(void *ptr) /* * @count is the count of every non-NULL element in the ->slots array - * whether that is an exceptional entry, a retry entry, a user pointer, + * whether that is a value entry, a retry entry, a user pointer, * a sibling entry or a pointer to the next level of the tree. * @exceptional is the count of every element in ->slots which is - * either radix_tree_exceptional_entry() or is a sibling entry for an - * exceptional entry. + * either a value entry or a sibling of a value entry. */ struct radix_tree_node { unsigned char shift; /* Bits remaining in each slot */ @@ -269,17 +260,6 @@ static inline int radix_tree_deref_retry(void *arg) } /** - * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry? - * @arg: value returned by radix_tree_deref_slot - * Returns: 0 if well-aligned pointer, non-0 if exceptional entry. - */ -static inline int radix_tree_exceptional_entry(void *arg) -{ - /* Not unlikely because radix_tree_exception often tested first */ - return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY; -} - -/** * radix_tree_exception - radix_tree_deref_slot returned either exception? * @arg: value returned by radix_tree_deref_slot * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 22af9d8a84ae..4d961668e5fc 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -18,9 +18,8 @@ * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ - (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) -#define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) +#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) +#define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) /* * Store a type+offset into a swp_entry_t in an arch-independent format @@ -29,8 +28,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset) { swp_entry_t ret; - ret.val = (type << SWP_TYPE_SHIFT(ret)) | - (offset & SWP_OFFSET_MASK(ret)); + ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK); return ret; } @@ -40,7 +38,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset) */ static inline unsigned swp_type(swp_entry_t entry) { - return (entry.val >> SWP_TYPE_SHIFT(entry)); + return (entry.val >> SWP_TYPE_SHIFT); } /* @@ -49,7 +47,7 @@ static inline unsigned swp_type(swp_entry_t entry) */ static inline pgoff_t swp_offset(swp_entry_t entry) { - return entry.val & SWP_OFFSET_MASK(entry); + return entry.val & SWP_OFFSET_MASK; } #ifdef CONFIG_MMU @@ -90,16 +88,13 @@ static inline swp_entry_t radix_to_swp_entry(void *arg) { swp_entry_t entry; - entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + entry.val = xa_to_value(arg); return entry; } static inline void *swp_to_radix_entry(swp_entry_t entry) { - unsigned long value; - - value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT; - return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); + return xa_mk_value(entry.val); } #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 9e4c86853fa4..5c8acfc4ff55 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -5,9 +5,111 @@ * eXtensible Arrays * Copyright (c) 2017 Microsoft Corporation * Author: Matthew Wilcox <willy@infradead.org> + * + * See Documentation/core-api/xarray.rst for how to use the XArray. */ +#include <linux/bug.h> #include <linux/spinlock.h> +#include <linux/types.h> + +/* + * The bottom two bits of the entry determine how the XArray interprets + * the contents: + * + * 00: Pointer entry + * 10: Internal entry + * x1: Value entry or tagged pointer + * + * Attempting to store internal entries in the XArray is a bug. + */ + +#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1) + +/** + * xa_mk_value() - Create an XArray entry from an integer. + * @v: Value to store in XArray. + * + * Context: Any context. + * Return: An entry suitable for storing in the XArray. + */ +static inline void *xa_mk_value(unsigned long v) +{ + WARN_ON((long)v < 0); + return (void *)((v << 1) | 1); +} + +/** + * xa_to_value() - Get value stored in an XArray entry. + * @entry: XArray entry. + * + * Context: Any context. + * Return: The value stored in the XArray entry. + */ +static inline unsigned long xa_to_value(const void *entry) +{ + return (unsigned long)entry >> 1; +} + +/** + * xa_is_value() - Determine if an entry is a value. + * @entry: XArray entry. + * + * Context: Any context. + * Return: True if the entry is a value, false if it is a pointer. + */ +static inline bool xa_is_value(const void *entry) +{ + return (unsigned long)entry & 1; +} + +/** + * xa_tag_pointer() - Create an XArray entry for a tagged pointer. + * @p: Plain pointer. + * @tag: Tag value (0, 1 or 3). + * + * If the user of the XArray prefers, they can tag their pointers instead + * of storing value entries. Three tags are available (0, 1 and 3). + * These are distinct from the xa_mark_t as they are not replicated up + * through the array and cannot be searched for. + * + * Context: Any context. + * Return: An XArray entry. + */ +static inline void *xa_tag_pointer(void *p, unsigned long tag) +{ + return (void *)((unsigned long)p | tag); +} + +/** + * xa_untag_pointer() - Turn an XArray entry into a plain pointer. + * @entry: XArray entry. + * + * If you have stored a tagged pointer in the XArray, call this function + * to get the untagged version of the pointer. + * + * Context: Any context. + * Return: A pointer. + */ +static inline void *xa_untag_pointer(void *entry) +{ + return (void *)((unsigned long)entry & ~3UL); +} + +/** + * xa_pointer_tag() - Get the tag stored in an XArray entry. + * @entry: XArray entry. + * + * If you have stored a tagged pointer in the XArray, call this function + * to get the tag of that pointer. + * + * Context: Any context. + * Return: A tag. + */ +static inline unsigned int xa_pointer_tag(void *entry) +{ + return (unsigned long)entry & 3UL; +} #define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) #define xa_lock(xa) spin_lock(&(xa)->xa_lock) diff --git a/lib/idr.c b/lib/idr.c index 729e381e23b4..88419fbc5737 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -338,11 +338,8 @@ EXPORT_SYMBOL(idr_replace); * by the number of bits in the leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given - * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional - * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits - * directly in the entry. By being really tricksy, we could store - * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising - * for 0-3 allocated IDs. + * leaf, instead of allocating a 128-byte bitmap, we store the bits + * directly in the entry. * * We allow the radix tree 'exceptional' count to get out of date. Nothing * in the IDA nor the radix tree code checks it. If it becomes important @@ -366,12 +363,11 @@ static int ida_get_new_above(struct ida *ida, int start) struct radix_tree_iter iter; struct ida_bitmap *bitmap; unsigned long index; - unsigned bit, ebit; + unsigned bit; int new; index = start / IDA_BITMAP_BITS; bit = start % IDA_BITMAP_BITS; - ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; slot = radix_tree_iter_init(&iter, index); for (;;) { @@ -386,25 +382,23 @@ static int ida_get_new_above(struct ida *ida, int start) return PTR_ERR(slot); } } - if (iter.index > index) { + if (iter.index > index) bit = 0; - ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; - } new = iter.index * IDA_BITMAP_BITS; bitmap = rcu_dereference_raw(*slot); - if (radix_tree_exception(bitmap)) { - unsigned long tmp = (unsigned long)bitmap; - ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); - if (ebit < BITS_PER_LONG) { - tmp |= 1UL << ebit; - rcu_assign_pointer(*slot, (void *)tmp); - return new + ebit - - RADIX_TREE_EXCEPTIONAL_SHIFT; + if (xa_is_value(bitmap)) { + unsigned long tmp = xa_to_value(bitmap); + int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, + bit); + if (vbit < BITS_PER_XA_VALUE) { + tmp |= 1UL << vbit; + rcu_assign_pointer(*slot, xa_mk_value(tmp)); + return new + vbit; } bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; + bitmap->bitmap[0] = tmp; rcu_assign_pointer(*slot, bitmap); } @@ -425,17 +419,14 @@ static int ida_get_new_above(struct ida *ida, int start) new += bit; if (new < 0) return -ENOSPC; - if (ebit < BITS_PER_LONG) { - bitmap = (void *)((1UL << ebit) | - RADIX_TREE_EXCEPTIONAL_ENTRY); - radix_tree_iter_replace(root, &iter, slot, - bitmap); - return new; + if (bit < BITS_PER_XA_VALUE) { + bitmap = xa_mk_value(1UL << bit); + } else { + bitmap = this_cpu_xchg(ida_bitmap, NULL); + if (!bitmap) + return -EAGAIN; + __set_bit(bit, bitmap->bitmap); } - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - __set_bit(bit, bitmap->bitmap); radix_tree_iter_replace(root, &iter, slot, bitmap); } @@ -457,9 +448,9 @@ static void ida_remove(struct ida *ida, int id) goto err; bitmap = rcu_dereference_raw(*slot); - if (radix_tree_exception(bitmap)) { + if (xa_is_value(bitmap)) { btmp = (unsigned long *)slot; - offset += RADIX_TREE_EXCEPTIONAL_SHIFT; + offset += 1; /* Intimate knowledge of the value encoding */ if (offset >= BITS_PER_LONG) goto err; } else { @@ -470,9 +461,8 @@ static void ida_remove(struct ida *ida, int id) __clear_bit(offset, btmp); radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); - if (radix_tree_exception(bitmap)) { - if (rcu_dereference_raw(*slot) == - (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) + if (xa_is_value(bitmap)) { + if (xa_to_value(rcu_dereference_raw(*slot)) == 0) radix_tree_iter_delete(&ida->ida_rt, &iter, slot); } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) |