summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-03 10:01:57 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-03 10:01:57 -0800
commit0c272a1d33965627653f4fafd6eab55d0d50f21f (patch)
tree326686bcebc172580079739d7def13b6739bf60f
parent66a87fff1a87c260452f5a57123891ca5258c449 (diff)
parentac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 (diff)
downloadlinux-0c272a1d33965627653f4fafd6eab55d0d50f21f.tar.gz
linux-0c272a1d33965627653f4fafd6eab55d0d50f21f.tar.bz2
linux-0c272a1d33965627653f4fafd6eab55d0d50f21f.zip
Merge tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: "25 hotfixes, mainly for MM. 13 are cc:stable" * tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits) mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() Kconfig.debug: fix the help description in SCHED_DEBUG mm/swapfile: add cond_resched() in get_swap_pages() mm: use stack_depot_early_init for kmemleak Squashfs: fix handling and sanity checking of xattr_ids count sh: define RUNTIME_DISCARD_EXIT highmem: round down the address passed to kunmap_flush_on_unmap() migrate: hugetlb: check for hugetlb shared PMD in node migration mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups Revert "mm: kmemleak: alloc gray object for reserved region with direct map" freevxfs: Kconfig: fix spelling maple_tree: should get pivots boundary by type .mailmap: update e-mail address for Eugen Hristev mm, mremap: fix mremap() expanding for vma's with vm_ops->close() squashfs: harden sanity check in squashfs_read_xattr_id_table ia64: fix build error due to switch case label appearing next to declaration mm: multi-gen LRU: fix crash during cgroup migration Revert "mm: add nodes= arg to memory.reclaim" zsmalloc: fix a race with deferred_handles storing ...
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst15
-rw-r--r--arch/ia64/kernel/sys_ia64.c7
-rw-r--r--arch/sh/kernel/vmlinux.lds.S1
-rw-r--r--drivers/of/fdt.c6
-rw-r--r--fs/freevxfs/Kconfig2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c4
-rw-r--r--include/linux/highmem-internal.h4
-rw-r--r--include/linux/hugetlb.h13
-rw-r--r--include/linux/memcontrol.h5
-rw-r--r--include/linux/swap.h3
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/maple_tree.c22
-rw-r--r--lib/test_maple_tree.c89
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/khugepaged.c22
-rw-r--r--mm/kmemleak.c5
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/memory.c14
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/mprotect.c8
-rw-r--r--mm/mremap.c25
-rw-r--r--mm/swapfile.c1
-rw-r--r--mm/vmscan.c9
-rw-r--r--mm/zsmalloc.c237
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/filesystems/fat/run_fat_tests.sh0
-rw-r--r--tools/testing/selftests/vm/hugetlb-madvise.c1
31 files changed, 430 insertions, 152 deletions
diff --git a/.mailmap b/.mailmap
index ac3decd2c756..318e63f338b1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -130,6 +130,7 @@ Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
Ed L. Cashin <ecashin@coraid.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
+Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Felipe W Damasio <felipewd@terra.com.br>
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index c8ae7c897f14..74cec76be9f2 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1245,13 +1245,17 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.
- This file accepts a string which contains the number of bytes to
- reclaim.
+ This file accepts a single key, the number of bytes to reclaim.
+ No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
+ The interface can be later extended with nested keys to
+ configure the reclaim behavior. For example, specify the
+ type of memory to reclaim from (anon, file, ..).
+
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
@@ -1263,13 +1267,6 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
- This file also allows the user to specify the nodes to reclaim from,
- via the 'nodes=' key, for example::
-
- echo "1G nodes=0,1" > memory.reclaim
-
- The above instructs the kernel to reclaim memory from nodes 0,1.
-
memory.peak
A read-only single value file which exists on non-root
cgroups.
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index f6a502e8f02c..6e948d015332 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -170,6 +170,9 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u
asmlinkage long
ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp)
{
+ struct timespec64 rtn_tp;
+ s64 tick_ns;
+
/*
* ia64's clock_gettime() syscall is implemented as a vdso call
* fsys_clock_gettime(). Currently it handles only
@@ -185,8 +188,8 @@ ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *
switch (which_clock) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
- s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
- struct timespec64 rtn_tp = ns_to_timespec64(tick_ns);
+ tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
+ rtn_tp = ns_to_timespec64(tick_ns);
return put_timespec64(&rtn_tp, tp);
}
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 3161b9ccd2a5..b6276a3521d7 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -4,6 +4,7 @@
* Written by Niibe Yutaka and Paul Mundt
*/
OUTPUT_ARCH(sh)
+#define RUNTIME_DISCARD_EXIT
#include <asm/thread_info.h>
#include <asm/cache.h>
#include <asm/vmlinux.lds.h>
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index f08b25195ae7..d1a68b6d03b3 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -26,7 +26,6 @@
#include <linux/serial_core.h>
#include <linux/sysfs.h>
#include <linux/random.h>
-#include <linux/kmemleak.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include <asm/page.h>
@@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
- early_init_dt_reserve_memory(base, size, nomap) == 0) {
+ early_init_dt_reserve_memory(base, size, nomap) == 0)
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
- if (!nomap)
- kmemleak_alloc_phys(base, size, 0);
- }
else
pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
diff --git a/fs/freevxfs/Kconfig b/fs/freevxfs/Kconfig
index c05c71d57291..0e2fc08f7de4 100644
--- a/fs/freevxfs/Kconfig
+++ b/fs/freevxfs/Kconfig
@@ -8,7 +8,7 @@ config VXFS_FS
of SCO UnixWare (and possibly others) and optionally available
for Sunsoft Solaris, HP-UX and many other operating systems. However
these particular OS implementations of vxfs may differ in on-disk
- data endianess and/or superblock offset. The vxfs module has been
+ data endianness and/or superblock offset. The vxfs module has been
tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.)
Currently only readonly access is supported and VxFX versions
2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e35a0398db63..af1c49ae11b1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
page = pfn_swap_entry_to_page(swpent);
}
if (page) {
- int mapcount = page_mapcount(page);
-
- if (mapcount >= 2)
+ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b3fdc8212c5f..95f8e8901768 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
+#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 659082e9e51d..72f6f4b37863 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -63,7 +63,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
- int xattr_ids;
+ unsigned int xattr_ids;
unsigned int ids;
bool panic_on_errors;
const struct squashfs_decompressor_thread_ops *thread_ops;
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index d8a270d3ac4c..f1a463d8bfa0 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -10,12 +10,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
- u64 *, int *);
+ u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
- u64 start, u64 *xattr_table_start, int *xattr_ids)
+ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 087cab8c78f4..b88d19e9581e 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
- u64 *xattr_table_start, int *xattr_ids)
+ u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
@@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
/* Sanity check values */
/* there is always at least one xattr id */
- if (*xattr_ids == 0)
+ if (*xattr_ids <= 0)
return ERR_PTR(-EINVAL);
len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 034b1106d022..e098f38422af 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
}
@@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
pagefault_enable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 551834cd5299..db194e2ba69f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/hugetlb_inline.h>
#include <linux/cgroup.h>
+#include <linux/page_ref.h>
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/pgtable.h>
@@ -1187,6 +1188,18 @@ static inline __init void hugetlb_cma_reserve(int order)
}
#endif
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return page_count(virt_to_page(pte)) > 1;
+}
+#else
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return false;
+}
+#endif
+
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d3c8203cab6c..85dc9b88ea37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
+ struct mem_cgroup *memcg;
+
if (mem_cgroup_disabled())
return;
- if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ memcg = folio_memcg(folio);
+ if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2787b84eaf12..0ceed49516ad 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options,
- nodemask_t *nodemask);
+ unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 61a9425a311f..02ee440f7be3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -754,6 +754,7 @@ config DEBUG_KMEMLEAK
select KALLSYMS
select CRC32
select STACKDEPOT
+ select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF
help
Say Y here if you want to enable the memory leak
detector. The memory allocation/freeing is traced in a way
@@ -1207,7 +1208,7 @@ config SCHED_DEBUG
depends on DEBUG_KERNEL && PROC_FS
default y
help
- If you say Y here, the /proc/sched_debug file will be provided
+ If you say Y here, the /sys/kernel/debug/sched file will be provided
that can help debug the scheduler. The runtime overhead of this
option is minimal.
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 26e2045d3cda..5a976393c9ae 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -670,12 +670,13 @@ static inline unsigned long mte_pivot(const struct maple_enode *mn,
unsigned char piv)
{
struct maple_node *node = mte_to_node(mn);
+ enum maple_type type = mte_node_type(mn);
- if (piv >= mt_pivots[piv]) {
+ if (piv >= mt_pivots[type]) {
WARN_ON(1);
return 0;
}
- switch (mte_node_type(mn)) {
+ switch (type) {
case maple_arange_64:
return node->ma64.pivot[piv];
case maple_range_64:
@@ -4887,7 +4888,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
unsigned long *pivots, *gaps;
void __rcu **slots;
unsigned long gap = 0;
- unsigned long max, min, index;
+ unsigned long max, min;
unsigned char offset;
if (unlikely(mas_is_err(mas)))
@@ -4909,8 +4910,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, --offset);
max = mas_safe_pivot(mas, pivots, offset, type);
- index = mas->index;
- while (index <= max) {
+ while (mas->index <= max) {
gap = 0;
if (gaps)
gap = gaps[offset];
@@ -4941,10 +4941,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, offset);
}
- if (unlikely(index > max)) {
- mas_set_err(mas, -EBUSY);
- return false;
- }
+ if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
+ goto no_space;
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
@@ -4961,9 +4959,11 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
return false;
ascend:
- if (mte_is_root(mas->node))
- mas_set_err(mas, -EBUSY);
+ if (!mte_is_root(mas->node))
+ return false;
+no_space:
+ mas_set_err(mas, -EBUSY);
return false;
}
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 497fc93ccf9e..ec847bf4dcb4 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2517,6 +2517,91 @@ static noinline void check_bnode_min_spanning(struct maple_tree *mt)
mt_set_non_kernel(0);
}
+static noinline void check_empty_area_window(struct maple_tree *mt)
+{
+ unsigned long i, nr_entries = 20;
+ MA_STATE(mas, mt, 0, 0);
+
+ for (i = 1; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 9,
+ xa_mk_value(i), GFP_KERNEL);
+
+ /* Create another hole besides the one at 0 */
+ mtree_store_range(mt, 160, 169, NULL, GFP_KERNEL);
+
+ /* Check lower bounds that don't fit */
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 10) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 6, 90, 5) != -EBUSY);
+
+ /* Check lower bound that does fit */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 5) != 0);
+ MT_BUG_ON(mt, mas.index != 5);
+ MT_BUG_ON(mt, mas.last != 9);
+ rcu_read_unlock();
+
+ /* Check one gap that doesn't fit and one that does */
+ rcu_read_lock();
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 217, 9) != 0);
+ MT_BUG_ON(mt, mas.index != 161);
+ MT_BUG_ON(mt, mas.last != 169);
+
+ /* Check one gap that does fit above the min */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 3) != 0);
+ MT_BUG_ON(mt, mas.index != 216);
+ MT_BUG_ON(mt, mas.last != 218);
+
+ /* Check size that doesn't fit any gap */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 16) != -EBUSY);
+
+ /*
+ * Check size that doesn't fit the lower end of the window but
+ * does fit the gap
+ */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 167, 200, 4) != -EBUSY);
+
+ /*
+ * Check size that doesn't fit the upper end of the window but
+ * does fit the gap
+ */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 162, 4) != -EBUSY);
+
+ /* Check mas_empty_area forward */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 9) != 0);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 8);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 4) != 0);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 3);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 11) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 5, 100, 6) != -EBUSY);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 0, 8, 10) != -EBUSY);
+
+ mas_reset(&mas);
+ mas_empty_area(&mas, 100, 165, 3);
+
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area(&mas, 100, 163, 6) != -EBUSY);
+ rcu_read_unlock();
+}
+
static DEFINE_MTREE(tree);
static int maple_tree_seed(void)
{
@@ -2765,6 +2850,10 @@ static int maple_tree_seed(void)
check_bnode_min_spanning(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_empty_area_window(&tree);
+ mtree_destroy(&tree);
+
#if defined(BENCH)
skip:
#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7fcdb98c9e68..bdbfeb6fb393 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5051,6 +5051,9 @@ again:
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
+ /* No swap on hugetlb */
+ WARN_ON_ONCE(
+ is_swapin_error_entry(pte_to_swp_entry(entry)));
/*
* We copy the pte marker only if the dst vma has
* uffd-wp enabled.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..90acfea40c13 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_SUCCEED;
}
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
@@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
+ if (!pmd_present(pmde))
+ return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
+ if (pmd_devmap(pmde))
+ return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
@@ -1642,7 +1650,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1671,6 +1679,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
/*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
+ /*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
* markers installed. Skip it only, so the rest mm/vma
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 92f670edbf51..55dc8b8b0616 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -2070,8 +2070,10 @@ static int __init kmemleak_boot_config(char *str)
return -EINVAL;
if (strcmp(str, "off") == 0)
kmemleak_disable();
- else if (strcmp(str, "on") == 0)
+ else if (strcmp(str, "on") == 0) {
kmemleak_skip_disable = 1;
+ stack_depot_want_early_init();
+ }
else
return -EINVAL;
return 0;
@@ -2093,7 +2095,6 @@ void __init kmemleak_init(void)
if (kmemleak_error)
return;
- stack_depot_init();
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab457f0394ab..73afff8062f9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -63,7 +63,6 @@
#include <linux/resume_user_mode.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
-#include <linux/parser.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -2393,8 +2392,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
gfp_mask,
- MEMCG_RECLAIM_MAY_SWAP,
- NULL);
+ MEMCG_RECLAIM_MAY_SWAP);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2685,8 +2683,7 @@ retry:
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, reclaim_options,
- NULL);
+ gfp_mask, reclaim_options);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3506,8 +3503,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP,
- NULL)) {
+ memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
ret = -EBUSY;
break;
}
@@ -3618,8 +3614,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- MEMCG_RECLAIM_MAY_SWAP,
- NULL))
+ MEMCG_RECLAIM_MAY_SWAP))
nr_retries--;
}
@@ -6429,8 +6424,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
- NULL);
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
if (!reclaimed && !nr_retries--)
break;
@@ -6479,8 +6473,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
- NULL))
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
nr_reclaims--;
continue;
}
@@ -6603,54 +6596,21 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
-enum {
- MEMORY_RECLAIM_NODES = 0,
- MEMORY_RECLAIM_NULL,
-};
-
-static const match_table_t if_tokens = {
- { MEMORY_RECLAIM_NODES, "nodes=%s" },
- { MEMORY_RECLAIM_NULL, NULL },
-};
-
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
- unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
- MEMCG_RECLAIM_PROACTIVE;
- char *old_buf, *start;
- substring_t args[MAX_OPT_ARGS];
- int token;
- char value[256];
- nodemask_t nodemask = NODE_MASK_ALL;
-
- buf = strstrip(buf);
-
- old_buf = buf;
- nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
- if (buf == old_buf)
- return -EINVAL;
+ unsigned int reclaim_options;
+ int err;
buf = strstrip(buf);
+ err = page_counter_memparse(buf, "", &nr_to_reclaim);
+ if (err)
+ return err;
- while ((start = strsep(&buf, " ")) != NULL) {
- if (!strlen(start))
- continue;
- token = match_token(start, if_tokens, args);
- match_strlcpy(value, args, sizeof(value));
- switch (token) {
- case MEMORY_RECLAIM_NODES:
- if (nodelist_parse(value, nodemask) < 0)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
- }
-
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;
@@ -6667,8 +6627,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
- GFP_KERNEL, reclaim_options,
- &nodemask);
+ GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
diff --git a/mm/memory.c b/mm/memory.c
index aad226daf41b..3e836fecd035 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -828,12 +828,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return -EBUSY;
return -ENOENT;
} else if (is_pte_marker_entry(entry)) {
- /*
- * We're copying the pgtabl