summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst7
-rw-r--r--Documentation/dev-tools/kasan.rst63
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/arc/include/asm/pgtable.h1
-rw-r--r--arch/arc/mm/fault.c10
-rw-r--r--arch/arc/mm/highmem.c4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h3
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/mm/kasan_init_64.c61
-rw-r--r--drivers/base/memory.c40
-rw-r--r--drivers/hv/hv_balloon.c4
-rw-r--r--drivers/xen/balloon.c1
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/direct-io.c21
-rw-r--r--fs/hugetlbfs/inode.c63
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/userfaultfd.c21
-rw-r--r--include/asm-generic/4level-fixup.h1
-rw-r--r--include/asm-generic/5level-fixup.h1
-rw-r--r--include/asm-generic/pgtable-nop4d.h2
-rw-r--r--include/asm-generic/pgtable-nopmd.h2
-rw-r--r--include/asm-generic/pgtable-nopud.h2
-rw-r--r--include/asm-generic/pgtable.h51
-rw-r--r--include/asm-generic/tlb.h4
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/hugetlb.h140
-rw-r--r--include/linux/kasan.h31
-rw-r--r--include/linux/memblock.h3
-rw-r--r--include/linux/memcontrol.h49
-rw-r--r--include/linux/memory_hotplug.h11
-rw-r--r--include/linux/mm.h34
-rw-r--r--include/linux/mmzone.h34
-rw-r--r--include/linux/moduleloader.h2
-rw-r--r--include/linux/page-isolation.h4
-rw-r--r--include/linux/slab.h20
-rw-r--r--include/linux/string.h2
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/vmalloc.h12
-rw-r--r--include/trace/events/kmem.h47
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--lib/Kconfig.kasan16
-rw-r--r--lib/test_kasan.c26
-rw-r--r--lib/vsprintf.c40
-rw-r--r--mm/Kconfig40
-rw-r--r--mm/cma.c6
-rw-r--r--mm/cma_debug.c10
-rw-r--r--mm/filemap.c54
-rw-r--r--mm/gup.c40
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c288
-rw-r--r--mm/hwpoison-inject.c4
-rw-r--r--mm/internal.h27
-rw-r--r--mm/kasan/common.c233
-rw-r--r--mm/kasan/generic_report.c3
-rw-r--r--mm/kasan/kasan.h1
-rw-r--r--mm/khugepaged.c18
-rw-r--r--mm/madvise.c14
-rw-r--r--mm/memblock.c111
-rw-r--r--mm/memcontrol.c167
-rw-r--r--mm/memory-failure.c61
-rw-r--r--mm/memory.c52
-rw-r--r--mm/memory_hotplug.c86
-rw-r--r--mm/mempolicy.c47
-rw-r--r--mm/migrate.c16
-rw-r--r--mm/mmap.c63
-rw-r--r--mm/mprotect.c8
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nommu.c10
-rw-r--r--mm/page_alloc.c137
-rw-r--r--mm/page_io.c15
-rw-r--r--mm/page_isolation.c12
-rw-r--r--mm/pgtable-generic.c9
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/shmem.c29
-rw-r--r--mm/slab.c7
-rw-r--r--mm/slab.h6
-rw-r--r--mm/slab_common.c99
-rw-r--r--mm/slub.c36
-rw-r--r--mm/sparse.c18
-rw-r--r--mm/swap.c29
-rw-r--r--mm/swapfile.c7
-rw-r--r--mm/userfaultfd.c73
-rw-r--r--mm/util.c22
-rw-r--r--mm/vmalloc.c192
-rw-r--r--mm/vmscan.c662
-rw-r--r--mm/workingset.c69
-rw-r--r--mm/z3fold.c375
-rw-r--r--scripts/spelling.txt28
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c36
-rw-r--r--tools/testing/selftests/vm/config1
95 files changed, 2661 insertions, 1506 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 007ba86aef78..6d13f2de6d69 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1288,7 +1288,12 @@ PAGE_SIZE multiple when read back.
inactive_anon, active_anon, inactive_file, active_file, unevictable
Amount of memory, swap-backed and filesystem-backed,
on the internal memory management lists used by the
- page reclaim algorithm
+ page reclaim algorithm.
+
+ As these represent internal list state (eg. shmem pages are on anon
+ memory management lists), inactive_foo + active_foo may not be equal to
+ the value for the foo counter, since the foo counter is type-based, not
+ list-based.
slab_reclaimable
Part of "slab" that might be reclaimed, such as
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index 525296121d89..e4d66e7c50de 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -218,3 +218,66 @@ brk handler is used to print bug reports.
A potential expansion of this mode is a hardware tag-based mode, which would
use hardware memory tagging support instead of compiler instrumentation and
manual shadow memory manipulation.
+
+What memory accesses are sanitised by KASAN?
+--------------------------------------------
+
+The kernel maps memory in a number of different parts of the address
+space. This poses something of a problem for KASAN, which requires
+that all addresses accessed by instrumented code have a valid shadow
+region.
+
+The range of kernel virtual addresses is large: there is not enough
+real memory to support a real shadow region for every address that
+could be accessed by the kernel.
+
+By default
+~~~~~~~~~~
+
+By default, architectures only map real memory over the shadow region
+for the linear mapping (and potentially other small areas). For all
+other areas - such as vmalloc and vmemmap space - a single read-only
+page is mapped over the shadow area. This read-only shadow page
+declares all memory accesses as permitted.
+
+This presents a problem for modules: they do not live in the linear
+mapping, but in a dedicated module space. By hooking in to the module
+allocator, KASAN can temporarily map real shadow memory to cover
+them. This allows detection of invalid accesses to module globals, for
+example.
+
+This also creates an incompatibility with ``VMAP_STACK``: if the stack
+lives in vmalloc space, it will be shadowed by the read-only page, and
+the kernel will fault when trying to set up the shadow data for stack
+variables.
+
+CONFIG_KASAN_VMALLOC
+~~~~~~~~~~~~~~~~~~~~
+
+With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
+cost of greater memory usage. Currently this is only supported on x86.
+
+This works by hooking into vmalloc and vmap, and dynamically
+allocating real shadow memory to back the mappings.
+
+Most mappings in vmalloc space are small, requiring less than a full
+page of shadow space. Allocating a full shadow page per mapping would
+therefore be wasteful. Furthermore, to ensure that different mappings
+use different shadow pages, mappings would have to be aligned to
+``KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE``.
+
+Instead, we share backing space across multiple mappings. We allocate
+a backing page when a mapping in vmalloc space uses a particular page
+of the shadow region. This page can be shared by other vmalloc
+mappings later on.
+
+We hook in to the vmap infrastructure to lazily clean up unused shadow
+memory.
+
+To avoid the difficulties around swapping mappings around, we expect
+that the part of the shadow region that covers the vmalloc space will
+not be covered by the early shadow page, but will be left
+unmapped. This will require changes in arch-specific code.
+
+This allows ``VMAP_STACK`` support on x86, and can simplify support of
+architectures that do not have a fixed module region.
diff --git a/arch/Kconfig b/arch/Kconfig
index da75bab22eee..7b861fe3f900 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -836,16 +836,17 @@ config HAVE_ARCH_VMAP_STACK
config VMAP_STACK
default y
bool "Use a virtually-mapped stack"
- depends on HAVE_ARCH_VMAP_STACK && !KASAN
+ depends on HAVE_ARCH_VMAP_STACK
+ depends on !KASAN || KASAN_VMALLOC
---help---
Enable this if you want the use virtually-mapped kernel stacks
with guard pages. This causes kernel stack overflows to be
caught immediately rather than causing difficult-to-diagnose
corruption.
- This is presently incompatible with KASAN because KASAN expects
- the stack to map directly to the KASAN shadow map using a formula
- that is incorrect if the stack is in vmalloc space.
+ To use this with KASAN, the architecture must support backing
+ virtual mappings with real shadow memory, and KASAN_VMALLOC must
+ be enabled.
config ARCH_OPTIONAL_KERNEL_RWX
def_bool n
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 7addd0301c51..b917b596f7fb 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -33,7 +33,6 @@
#define _ASM_ARC_PGTABLE_H
#include <linux/bits.h>
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
#include <asm/page.h>
#include <asm/mmu.h> /* to propagate CONFIG_ARC_MMU_VER <n> */
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 3861543b66a0..fb86bc3e9b35 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -30,6 +30,7 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
* with the 'reference' page table.
*/
pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
@@ -39,8 +40,13 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
if (!pgd_present(*pgd_k))
goto bad_area;
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d_k))
+ goto bad_area;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
goto bad_area;
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index a4856bfaedf3..fc8849e4f72e 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -111,12 +111,14 @@ EXPORT_SYMBOL(__kunmap_atomic);
static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
{
pgd_t *pgd_k;
+ p4d_t *p4d_k;
pud_t *pud_k;
pmd_t *pmd_k;
pte_t *pte_k;
pgd_k = pgd_offset_k(kvaddr);
- pud_k = pud_offset(pgd_k, kvaddr);
+ p4d_k = p4d_offset(pgd_k, kvaddr);
+ pud_k = pud_offset(p4d_k, kvaddr);
pmd_k = pmd_offset(pud_k, kvaddr);
pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index a069dfcac9a9..4e697bc2f4cd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -70,9 +70,6 @@ static inline int get_hugepd_cache_index(int index)
/* should not reach */
}
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
#endif /* CONFIG_HUGETLB_PAGE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index e3d4dd4ae2fa..34d1018896b3 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -59,9 +59,6 @@ static inline int get_hugepd_cache_index(int index)
BUG();
}
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
#endif /* CONFIG_HUGETLB_PAGE */
static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 6ee17d09649c..974109bb85db 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -13,6 +13,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
#include <linux/stop_machine.h>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0cb1756223be..5e8949953660 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -134,6 +134,7 @@ config X86
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64
+ select HAVE_ARCH_KASAN_VMALLOC if X86_64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 296da58f3013..cf5bc37c90ac 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -245,6 +245,49 @@ static void __init kasan_map_early_shadow(pgd_t *pgd)
} while (pgd++, addr = next, addr != end);
}
+static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end)
+{
+ p4d_t *p4d;
+ unsigned long next;
+ void *p;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+
+ if (p4d_none(*p4d)) {
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
+ p4d_populate(&init_mm, p4d, p);
+ }
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void __init kasan_shallow_populate_pgds(void *start, void *end)
+{
+ unsigned long addr, next;
+ pgd_t *pgd;
+ void *p;
+
+ addr = (unsigned long)start;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, (unsigned long)end);
+
+ if (pgd_none(*pgd)) {
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
+ pgd_populate(&init_mm, pgd, p);
+ }
+
+ /*
+ * we need to populate p4ds to be synced when running in
+ * four level mode - see sync_global_pgds_l4()
+ */
+ kasan_shallow_populate_p4ds(pgd, addr, next);
+ } while (pgd++, addr = next, addr != (unsigned long)end);
+}
+
#ifdef CONFIG_KASAN_INLINE
static int kasan_die_handler(struct notifier_block *self,
unsigned long val,
@@ -354,6 +397,24 @@ void __init kasan_init(void)
kasan_populate_early_shadow(
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+ kasan_mem_to_shadow((void *)VMALLOC_START));
+
+ /*
+ * If we're in full vmalloc mode, don't back vmalloc space with early
+ * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to
+ * the global table and we can populate the lower levels on demand.
+ */
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_shallow_populate_pgds(
+ kasan_mem_to_shadow((void *)VMALLOC_START),
+ kasan_mem_to_shadow((void *)VMALLOC_END));
+ else
+ kasan_populate_early_shadow(
+ kasan_mem_to_shadow((void *)VMALLOC_START),
+ kasan_mem_to_shadow((void *)VMALLOC_END));
+
+ kasan_populate_early_shadow(
+ kasan_mem_to_shadow((void *)VMALLOC_END + 1),
shadow_cpu_entry_begin);
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 84c4e1f72cbd..799b43191dea 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -19,15 +19,12 @@
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
-#include <linux/mutex.h>
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/uaccess.h>
-static DEFINE_MUTEX(mem_sysfs_mutex);
-
#define MEMORY_CLASS_NAME "memory"
#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
@@ -538,12 +535,7 @@ static ssize_t soft_offline_page_store(struct device *dev,
if (kstrtoull(buf, 0, &pfn) < 0)
return -EINVAL;
pfn >>= PAGE_SHIFT;
- if (!pfn_valid(pfn))
- return -ENXIO;
- /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
- if (!pfn_to_online_page(pfn))
- return -EIO;
- ret = soft_offline_page(pfn_to_page(pfn), 0);
+ ret = soft_offline_page(pfn, 0);
return ret == 0 ? count : ret;
}
@@ -705,6 +697,8 @@ static void unregister_memory(struct memory_block *memory)
* Create memory block devices for the given memory area. Start and size
* have to be aligned to memory block granularity. Memory block devices
* will be initial