diff options
| author | Andrew Morton <akpm@linux-foundation.org> | 2023-04-16 12:31:58 -0700 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2023-04-16 12:31:58 -0700 |
| commit | e492cd61b986590a45c674ede7dd1c4dbf94cf24 (patch) | |
| tree | 37dc59ea66842b5b7e57f32aba6cbce5143c1282 | |
| parent | d46031f40e0f7f7bf63914bb3f2e404ad3886ecd (diff) | |
| parent | 2ff559f31a5d50c31a3f9d849f8af90dc36c7105 (diff) | |
| download | linux-e492cd61b986590a45c674ede7dd1c4dbf94cf24.tar.gz linux-e492cd61b986590a45c674ede7dd1c4dbf94cf24.tar.bz2 linux-e492cd61b986590a45c674ede7dd1c4dbf94cf24.zip | |
sync mm-stable with mm-hotfixes-stable to pick up depended-upon upstream changes
| -rw-r--r-- | .mailmap | 2 | ||||
| -rw-r--r-- | Documentation/mm/zsmalloc.rst | 135 | ||||
| -rw-r--r-- | fs/dax.c | 37 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 17 | ||||
| -rw-r--r-- | fs/nilfs2/btree.c | 1 | ||||
| -rw-r--r-- | fs/nilfs2/direct.c | 1 | ||||
| -rw-r--r-- | fs/nilfs2/segment.c | 3 | ||||
| -rw-r--r-- | fs/nilfs2/super.c | 2 | ||||
| -rw-r--r-- | fs/nilfs2/the_nilfs.c | 12 | ||||
| -rw-r--r-- | fs/userfaultfd.c | 6 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 3 | ||||
| -rw-r--r-- | kernel/fork.c | 3 | ||||
| -rw-r--r-- | lib/maple_tree.c | 306 | ||||
| -rw-r--r-- | mm/backing-dev.c | 12 | ||||
| -rw-r--r-- | mm/huge_memory.c | 19 | ||||
| -rw-r--r-- | mm/hugetlb.c | 14 | ||||
| -rw-r--r-- | mm/khugepaged.c | 4 | ||||
| -rw-r--r-- | mm/memory.c | 16 | ||||
| -rw-r--r-- | mm/mempolicy.c | 104 | ||||
| -rw-r--r-- | mm/mmap.c | 3 | ||||
| -rw-r--r-- | mm/mprotect.c | 2 | ||||
| -rw-r--r-- | mm/swap.c | 2 | ||||
| -rw-r--r-- | mm/swapfile.c | 3 | ||||
| -rw-r--r-- | mm/vmalloc.c | 8 | ||||
| -rw-r--r-- | tools/mm/page_owner_sort.c | 2 | ||||
| -rw-r--r-- | tools/testing/radix-tree/maple.c | 16 |
26 files changed, 476 insertions, 257 deletions
@@ -232,6 +232,8 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com> John Crispin <john@phrozen.org> <blogic@openwrt.org> John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> John Stultz <johnstul@us.ibm.com> +<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com> +<jon.toppins+linux@gmail.com> <jtoppins@redhat.com> Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org> <josh@joshtriplett.org> <josh@freedesktop.org> <josh@joshtriplett.org> <josh@kernel.org> diff --git a/Documentation/mm/zsmalloc.rst b/Documentation/mm/zsmalloc.rst index 64d127bfc221..a3c26d587752 100644 --- a/Documentation/mm/zsmalloc.rst +++ b/Documentation/mm/zsmalloc.rst @@ -39,13 +39,12 @@ With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via # cat /sys/kernel/debug/zsmalloc/zram0/classes - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage + class size 10% 20% 30% 40% 50% 60% 70% 80% 90% 99% 100% obj_allocated obj_used pages_used pages_per_zspage freeable ... ... - 9 176 0 1 186 129 8 4 - 10 192 1 0 2880 2872 135 3 - 11 208 0 1 819 795 42 2 - 12 224 0 1 219 159 12 4 + 30 512 0 12 4 1 0 1 0 0 1 0 414 3464 3346 433 1 14 + 31 528 2 7 2 2 1 0 1 0 0 2 117 4154 3793 536 4 44 + 32 544 6 3 4 1 2 1 0 0 0 1 260 4170 3965 556 2 26 ... ... @@ -54,10 +53,28 @@ class index size object size zspage stores -almost_empty - the number of ZS_ALMOST_EMPTY zspages(see below) -almost_full - the number of ZS_ALMOST_FULL zspages(see below) +10% + the number of zspages with usage ratio less than 10% (see below) +20% + the number of zspages with usage ratio between 10% and 20% +30% + the number of zspages with usage ratio between 20% and 30% +40% + the number of zspages with usage ratio between 30% and 40% +50% + the number of zspages with usage ratio between 40% and 50% +60% + the number of zspages with usage ratio between 50% and 60% +70% + the number of zspages with usage ratio between 60% and 70% +80% + the number of zspages with usage ratio between 70% and 80% +90% + the number of zspages with usage ratio between 80% and 90% +99% + the number of zspages with usage ratio between 90% and 99% +100% + the number of zspages with usage ratio 100% obj_allocated the number of objects allocated obj_used @@ -66,19 +83,14 @@ pages_used the number of pages allocated for the class pages_per_zspage the number of 0-order pages to make a zspage +freeable + the approximate number of pages class compaction can free -We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where - -* n = number of allocated objects -* N = total number of objects zspage can store -* f = fullness_threshold_frac(ie, 4 at the moment) - -Similarly, we assign zspage to: - -* ZS_ALMOST_FULL when n > N / f -* ZS_EMPTY when n == 0 -* ZS_FULL when n == N - +Each zspage maintains inuse counter which keeps track of the number of +objects stored in the zspage. The inuse counter determines the zspage's +"fullness group" which is calculated as the ratio of the "inuse" objects to +the total number of objects the zspage can hold (objs_per_zspage). The +closer the inuse counter is to objs_per_zspage, the better. Internals ========= @@ -94,10 +106,10 @@ of objects that each zspage can store. For instance, consider the following size classes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable ... - 94 1536 0 0 0 0 0 3 0 - 100 1632 0 0 0 0 0 2 0 + 94 1536 0 .... 0 0 0 0 3 0 + 100 1632 0 .... 0 0 0 0 2 0 ... @@ -134,10 +146,11 @@ reduces memory wastage. Let's take a closer look at the bottom of `/sys/kernel/debug/zsmalloc/zramX/classes`::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 254 4096 0 .. 0 0 0 0 1 0 ... Size class #202 stores objects of size 3264 bytes and has a maximum of 4 pages @@ -151,40 +164,42 @@ efficient storage of large objects. For zspage chain size of 8, huge class watermark becomes 3632 bytes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 211 3408 0 0 0 0 0 5 0 - 217 3504 0 0 0 0 0 6 0 - 222 3584 0 0 0 0 0 7 0 - 225 3632 0 0 0 0 0 8 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 211 3408 0 .. 0 0 0 0 5 0 + 217 3504 0 .. 0 0 0 0 6 0 + 222 3584 0 .. 0 0 0 0 7 0 + 225 3632 0 .. 0 0 0 0 8 0 + 254 4096 0 .. 0 0 0 0 1 0 ... For zspage chain size of 16, huge class watermark becomes 3840 bytes::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - 202 3264 0 0 0 0 0 4 0 - 206 3328 0 0 0 0 0 13 0 - 207 3344 0 0 0 0 0 9 0 - 208 3360 0 0 0 0 0 14 0 - 211 3408 0 0 0 0 0 5 0 - 212 3424 0 0 0 0 0 16 0 - 214 3456 0 0 0 0 0 11 0 - 217 3504 0 0 0 0 0 6 0 - 219 3536 0 0 0 0 0 13 0 - 222 3584 0 0 0 0 0 7 0 - 223 3600 0 0 0 0 0 15 0 - 225 3632 0 0 0 0 0 8 0 - 228 3680 0 0 0 0 0 9 0 - 230 3712 0 0 0 0 0 10 0 - 232 3744 0 0 0 0 0 11 0 - 234 3776 0 0 0 0 0 12 0 - 235 3792 0 0 0 0 0 13 0 - 236 3808 0 0 0 0 0 14 0 - 238 3840 0 0 0 0 0 15 0 - 254 4096 0 0 0 0 0 1 0 + 202 3264 0 .. 0 0 0 0 4 0 + 206 3328 0 .. 0 0 0 0 13 0 + 207 3344 0 .. 0 0 0 0 9 0 + 208 3360 0 .. 0 0 0 0 14 0 + 211 3408 0 .. 0 0 0 0 5 0 + 212 3424 0 .. 0 0 0 0 16 0 + 214 3456 0 .. 0 0 0 0 11 0 + 217 3504 0 .. 0 0 0 0 6 0 + 219 3536 0 .. 0 0 0 0 13 0 + 222 3584 0 .. 0 0 0 0 7 0 + 223 3600 0 .. 0 0 0 0 15 0 + 225 3632 0 .. 0 0 0 0 8 0 + 228 3680 0 .. 0 0 0 0 9 0 + 230 3712 0 .. 0 0 0 0 10 0 + 232 3744 0 .. 0 0 0 0 11 0 + 234 3776 0 .. 0 0 0 0 12 0 + 235 3792 0 .. 0 0 0 0 13 0 + 236 3808 0 .. 0 0 0 0 14 0 + 238 3840 0 .. 0 0 0 0 15 0 + 254 4096 0 .. 0 0 0 0 1 0 ... Overall the combined zspage chain size effect on zsmalloc pool configuration::: @@ -214,9 +229,10 @@ zram as a build artifacts storage (Linux kernel compilation). zsmalloc classes stats::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - Total 13 51 413836 412973 159955 3 + Total 13 .. 51 413836 412973 159955 3 zram mm_stat::: @@ -227,9 +243,10 @@ zram as a build artifacts storage (Linux kernel compilation). zsmalloc classes stats::: - class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable + class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable + ... - Total 18 87 414852 412978 156666 0 + Total 18 .. 87 414852 412978 156666 0 zram mm_stat::: @@ -781,6 +781,33 @@ out: return ret; } +static int __dax_clear_dirty_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) +{ + XA_STATE(xas, &mapping->i_pages, start); + unsigned int scanned = 0; + void *entry; + + xas_lock_irq(&xas); + xas_for_each(&xas, entry, end) { + entry = get_unlocked_entry(&xas, 0); + xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + put_unlocked_entry(&xas, entry, WAKE_NEXT); + + if (++scanned % XA_CHECK_SCHED) + continue; + + xas_pause(&xas); + xas_unlock_irq(&xas); + cond_resched(); + xas_lock_irq(&xas); + } + xas_unlock_irq(&xas); + + return 0; +} + /* * Delete DAX entry at @index from @mapping. Wait for it * to be unlocked before deleting it. @@ -1440,6 +1467,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, * written by write(2) is visible in mmap. */ if (iomap->flags & IOMAP_F_NEW || cow) { + /* + * Filesystem allows CoW on non-shared extents. The src extents + * may have been mmapped with dirty mark before. To be able to + * invalidate its dax entries, we need to clear the dirty mark + * in advance. + */ + if (cow) + __dax_clear_dirty_range(iomi->inode->i_mapping, + pos >> PAGE_SHIFT, + (end - 1) >> PAGE_SHIFT); invalidate_inode_pages2_range(iomi->inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 195dc23e0d83..1db3e3c24b43 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -978,6 +978,16 @@ restart: continue; } + /* + * If wb_tryget fails, the wb has been shutdown, skip it. + * + * Pin @wb so that it stays on @bdi->wb_list. This allows + * continuing iteration from @wb after dropping and + * regrabbing rcu read lock. + */ + if (!wb_tryget(wb)) + continue; + /* alloc failed, execute synchronously using on-stack fallback */ work = &fallback_work; *work = *base_work; @@ -986,13 +996,6 @@ restart: work->done = &fallback_work_done; wb_queue_work(wb, work); - - /* - * Pin @wb so that it stays on @bdi->wb_list. This allows - * continuing iteration from @wb after dropping and - * regrabbing rcu read lock. - */ - wb_get(wb); last_wb = wb; rcu_read_unlock(); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 2681a449edc1..13592e82eaf6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2219,6 +2219,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index a35f2795b242..4c85914f2abc 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -314,6 +314,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct, binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; + memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 19446a8243d7..6ad41390fa74 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2609,11 +2609,10 @@ static int nilfs_segctor_thread(void *arg) goto loop; end_thread: - spin_unlock(&sci->sc_state_lock); - /* end sync. */ sci->sc_task = NULL; wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + spin_unlock(&sci->sc_state_lock); return 0; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1422b8ba24ed..77f1e5778d1c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -482,6 +482,7 @@ static void nilfs_put_super(struct super_block *sb) up_write(&nilfs->ns_sem); } + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -1105,6 +1106,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); failed_unload: + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 3a4c9c150cbf..2894152a6b25 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -87,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs) { might_sleep(); if (nilfs_init(nilfs)) { - nilfs_sysfs_delete_device_group(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -305,6 +304,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; } + err = nilfs_sysfs_create_device_group(sb); + if (unlikely(err)) + goto sysfs_error; + if (valid_fs) goto skip_recovery; @@ -366,6 +369,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; failed_unload: + nilfs_sysfs_delete_device_group(nilfs); + + sysfs_error: iput(nilfs->ns_cpfile); iput(nilfs->ns_sufile); iput(nilfs->ns_dat); @@ -697,10 +703,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - err = nilfs_sysfs_create_device_group(sb); - if (err) - goto failed_sbh; - set_nilfs_init(nilfs); err = 0; out: diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 8395605790f6..3b2a41c330e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1977,8 +1977,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; - /* Ignore unsupported features (userspace built against newer kernel) */ - features = uffdio_api.features & UFFD_API_FEATURES; + features = uffdio_api.features; + ret = -EINVAL; + if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) goto err_out; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 092f842a854f..3fc9e680f174 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -800,7 +800,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index 2bde99037652..9051bc07b600 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -683,6 +683,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto out; + mt_clear_in_rcu(vmi.mas.tree); for_each_vma(old_vmi, mpnt) { struct file *file; @@ -766,6 +767,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, retval = arch_dup_mmap(oldmm, mm); loop_out: vma_iter_free(&vmi); + if (!retval) + mt_set_in_rcu(vmi.mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ae37a167e25d..5577e6da6fe3 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head) */ static void ma_free_rcu(struct maple_node *node) { - node->parent = ma_parent_ptr(node); + WARN_ON(node->parent != ma_parent_ptr(node)); call_rcu(&node->rcu, mt_free_rcu); } @@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode) */ static inline bool ma_dead_node(const struct maple_node *node) { - struct maple_node *parent = (void *)((unsigned long) - node->parent & ~MAPLE_NODE_MASK); + struct maple_node *parent; + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); + parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent == node); } + /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node @@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode) struct maple_node *parent, *node; node = mte_to_node(enode); + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); parent = mte_parent(enode); return (parent == node); } @@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas) * @node - the maple node * @type - the node type * + * In the event of a dead node, this array may be %NULL + * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, @@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt, return rcu_dereference_check(slots[offset], mt_locked(mt)); } +static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, + unsigned char offset) +{ + return rcu_dereference_protected(slots[offset], mt_locked(mt)); +} /* * mas_slot_locked() - Get the slot value when holding the maple tree lock. * @mas: The maple state @@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt, static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, unsigned char offset) { - return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); + return mt_slot_locked(mas->tree, slots, offset); } /* @@ -900,6 +912,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, } /* + * mt_clear_meta() - clear the metadata information of a node, if it exists + * @mt: The maple tree + * @mn: The maple node + * @type: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, + enum maple_type type) +{ + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + + switch (type) { + case maple_range_64: + pivots = mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots = mn->mr64.slot; + next = mt_slot_locked(mt, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && + mte_node_type(next)))) + return; /* no metadata, could be node */ + } + fallthrough; + case maple_arange_64: + meta = ma_meta(mn, type); + break; + default: + return; + } + + meta->gap = 0; + meta->end = 0; +} + +/* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node * @mt: The maple node type @@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas) a_type = mas_parent_enum(mas, p_enode); a_node = mte_parent(p_enode); a_slot = mte_parent_slot(p_enode); - pivots = ma_pivots(a_node, a_type); a_enode = mt_mk_node(a_node, a_type); + pivots = ma_pivots(a_node, a_type); + + if (unlikely(ma_dead_node(a_node))) + return 1; if (!set_min && a_slot) { set_min = true; @@ -1249,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) node = mas->alloc; node->request_count = 0; while (requested) { - max_req = MAPLE_ALLOC_SLOTS; - if (node->node_count) { - unsigned int offset = node->node_count; - - slots = (void **)&node->slot[offset]; - max_req -= offset; - } else { - slots = (void **)&node->slot; - } - + max_req = MAPLE_ALLOC_SLOTS - node->node_count; + slots = (void **)&node->slot[node->node_count]; max_req = min(requested, max_req); count = mt_alloc_bulk(gfp, max_req, slots); if (!count) goto nomem_bulk; + if (node->node_count == 0) { + node->slot[0]->node_count = 0; + node->slot[0]->request_count = 0; + } + node->node_count += count; allocated += count; node = node->slot[0]; - node->node_count = 0; - node->request_count = 0; requested -= count; } mas->alloc->total = allocated; @@ -1354,12 +1403,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas) mas->max = ULONG_MAX; mas->depth = 0; +retry: root = mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 1; mas->node = mte_safe_root(root); mas->offset = 0; + if (mte_dead_node(mas->node)) + goto retry; + return NULL; } @@ -1401,6 +1454,9 @@ static inline unsigned char ma_data_end(struct maple_node *node, { unsigned char offset; + if (!pivots) + return 0; + if (type == maple_arange_64) return ma_meta_end(node, type); @@ -1436,6 +1492,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas) return ma_meta_end(node, type); pivots = ma_pivots(node, type); + if (unlikely(ma_dead_node(node))) + return 0; + offset = mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); @@ -1724,8 +1783,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced) rcu_assign_pointer(slots[offset], mas->node); } - if (!advanced) + if (!advanced) { + mte_set_node_dead(old_enode); mas_free(mas, old_enode); + } } /* @@ -3659,10 +3720,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry) slot++; mas->depth = 1; mas_set_height(mas); - + ma_set_meta(node, maple_leaf_64, 0, slot); /* swap the new root into the tree */ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); - ma_set_meta(node, maple_leaf_64, 0, slot); return slot; } @@ -3875,18 +3935,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas) end = ma_data_end(node, type, pivots, max); if (unlikely(ma_dead_node(node))) goto dead_node; - - if (pivots[offset] >= mas->index) - goto next; - do { - offset++; - } while ((offset < end) && (pivots[offset] < mas->index)); - - if (likely(offset > end)) - max = pivots[offset]; + if (pivots[offset] >= mas->index) { + max = pivots[offset]; + break; + } + } while (++offset < end); -next: slots = ma_slots(node, type); next = mt_slot(mas->tree, slots, offset); if (unlikely(ma_dead_node(node))) @@ -4164,6 +4219,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas) done: mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); if (in_rcu) { + mte_set_node_dead(mas->node); mas->node = mt_mk_node(newnode, wr_mas->type); mas_replace(mas, false); } else { @@ -4505,6 +4561,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) node = mas_mn(mas); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + mas->max = pivots[offset]; if (offset) mas->min = pivots[offset - 1] + 1; @@ -4526,6 +4585,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); offset = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + if (offset) mas->min = pivots[offset - 1] + 1; @@ -4574,6 +4636,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, struct maple_enode *enode; int level = 0; unsigned char offset; + unsigned char node_end; enum maple_type mt; void __rcu **slots; @@ -4597,7 +4660,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, node = mas_mn(mas); mt = mte_node_type(mas->node); pivots = ma_pivots(node, mt); - } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); + node_end = ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + + } while (unlikely(offset == node_end)); slots = ma_slots(node, mt); pivot = mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4613,6 +4680,9 @@ static inline int mas_next_node(str |
