summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vgetrandom-chacha.S178
-rw-r--r--arch/x86/entry/vdso/vgetrandom.c17
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h55
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h2
-rw-r--r--arch/x86/include/asm/vvar.h16
-rw-r--r--drivers/char/random.c18
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--include/linux/mm.h7
-rw-r--r--include/linux/userfaultfd_k.h3
-rw-r--r--include/trace/events/mmflags.h7
-rw-r--r--include/uapi/linux/mman.h1
-rw-r--r--include/uapi/linux/random.h17
-rw-r--r--include/vdso/datapage.h11
-rw-r--r--include/vdso/getrandom.h46
-rw-r--r--lib/vdso/Kconfig5
-rw-r--r--lib/vdso/getrandom.c251
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c5
-rw-r--r--mm/memory.c13
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c30
-rw-r--r--mm/rmap.c21
-rw-r--r--mm/vmscan.c9
-rw-r--r--tools/include/asm/rwonce.h0
-rw-r--r--tools/include/uapi/linux/mman.h1
-rw-r--r--tools/testing/selftests/mm/.gitignore1
-rw-r--r--tools/testing/selftests/mm/Makefile1
-rw-r--r--tools/testing/selftests/mm/droppable.c53
-rw-r--r--tools/testing/selftests/vDSO/.gitignore2
-rw-r--r--tools/testing/selftests/vDSO/Makefile18
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c43
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c288
37 files changed, 1121 insertions, 18 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 07f753f039d2..644fc7b545e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19057,7 +19057,13 @@ S: Maintained
T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
F: Documentation/devicetree/bindings/rng/microsoft,vmgenid.yaml
F: drivers/char/random.c
+F: include/linux/random.h
+F: include/uapi/linux/random.h
F: drivers/virt/vmgenid.c
+F: include/vdso/getrandom.h
+F: lib/vdso/getrandom.c
+F: arch/x86/entry/vdso/vgetrandom*
+F: arch/x86/include/asm/vdso/getrandom*
RAPIDIO SUBSYSTEM
M: Matt Porter <mporter@kernel.crashing.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbe5fac4b9dd..007bab9f2a0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -287,6 +287,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select HAVE_GENERIC_VDSO
+ select VDSO_GETRANDOM if X86_64
select HOTPLUG_PARALLEL if SMP && X86_64
select HOTPLUG_SMT if SMP
select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 215a1b202a91..c9216ac4fb1e 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -7,7 +7,7 @@
include $(srctree)/lib/vdso/Makefile
# Files to link into the vDSO:
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o
vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
@@ -73,6 +73,7 @@ CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg
CFLAGS_REMOVE_vsgx.o = -pg
+CFLAGS_REMOVE_vgetrandom.o = -pg
#
# X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index e8c60ae7a7c8..0bab5f4af6d1 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -30,6 +30,8 @@ VERSION {
#ifdef CONFIG_X86_SGX
__vdso_sgx_enter_enclave;
#endif
+ getrandom;
+ __vdso_getrandom;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..bcba5639b8ee
--- /dev/null
+++ b/arch/x86/entry/vdso/vgetrandom-chacha.S
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+.section .rodata, "a"
+.align 16
+CONSTANTS: .octa 0x6b20657479622d323320646e61707865
+.text
+
+/*
+ * Very basic SSE2 implementation of ChaCha20. Produces a given positive number
+ * of blocks of output with a nonce of 0, taking an input key and 8-byte
+ * counter. Importantly does not spill to the stack. Its arguments are:
+ *
+ * rdi: output bytes
+ * rsi: 32-byte key input
+ * rdx: 8-byte counter input/output
+ * rcx: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+.set output, %rdi
+.set key, %rsi
+.set counter, %rdx
+.set nblocks, %rcx
+.set i, %al
+/* xmm registers are *not* callee-save. */
+.set temp, %xmm0
+.set state0, %xmm1
+.set state1, %xmm2
+.set state2, %xmm3
+.set state3, %xmm4
+.set copy0, %xmm5
+.set copy1, %xmm6
+.set copy2, %xmm7
+.set copy3, %xmm8
+.set one, %xmm9
+
+ /* copy0 = "expand 32-byte k" */
+ movaps CONSTANTS(%rip),copy0
+ /* copy1,copy2 = key */
+ movups 0x00(key),copy1
+ movups 0x10(key),copy2
+ /* copy3 = counter || zero nonce */
+ movq 0x00(counter),copy3
+ /* one = 1 || 0 */
+ movq $1,%rax
+ movq %rax,one
+
+.Lblock:
+ /* state0,state1,state2,state3 = copy0,copy1,copy2,copy3 */
+ movdqa copy0,state0
+ movdqa copy1,state1
+ movdqa copy2,state2
+ movdqa copy3,state3
+
+ movb $10,i
+.Lpermute:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $16,temp
+ psrld $16,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $12,temp
+ psrld $20,state1
+ por temp,state1
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $8,temp
+ psrld $24,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $7,temp
+ psrld $25,state1
+ por temp,state1
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ pshufd $0x39,state1,state1
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ pshufd $0x4e,state2,state2
+ /* state3[0,1,2,3] = state3[3,0,1,2] */
+ pshufd $0x93,state3,state3
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $16,temp
+ psrld $16,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $12,temp
+ psrld $20,state1
+ por temp,state1
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ paddd state1,state0
+ pxor state0,state3
+ movdqa state3,temp
+ pslld $8,temp
+ psrld $24,state3
+ por temp,state3
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ paddd state3,state2
+ pxor state2,state1
+ movdqa state1,temp
+ pslld $7,temp
+ psrld $25,state1
+ por temp,state1
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ pshufd $0x93,state1,state1
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ pshufd $0x4e,state2,state2
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ pshufd $0x39,state3,state3
+
+ decb i
+ jnz .Lpermute
+
+ /* output0 = state0 + copy0 */
+ paddd copy0,state0
+ movups state0,0x00(output)
+ /* output1 = state1 + copy1 */
+ paddd copy1,state1
+ movups state1,0x10(output)
+ /* output2 = state2 + copy2 */
+ paddd copy2,state2
+ movups state2,0x20(output)
+ /* output3 = state3 + copy3 */
+ paddd copy3,state3
+ movups state3,0x30(output)
+
+ /* ++copy3.counter */
+ paddq one,copy3
+
+ /* output += 64, --nblocks */
+ addq $64,output
+ decq nblocks
+ jnz .Lblock
+
+ /* counter = copy3.counter */
+ movq copy3,0x00(counter)
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ pxor state0,state0
+ pxor state1,state1
+ pxor state2,state2
+ pxor state3,state3
+ pxor copy1,copy1
+ pxor copy2,copy2
+ pxor temp,temp
+
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vgetrandom.c
new file mode 100644
index 000000000000..52d3c7faae2e
--- /dev/null
+++ b/arch/x86/entry/vdso/vgetrandom.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
+
+ssize_t getrandom(void *, size_t, unsigned int, void *, size_t)
+ __attribute__((weak, alias("__vdso_getrandom")));
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..b96e674cafde
--- /dev/null
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <asm/vvar.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret) :
+ "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+ "rcx", "r11", "memory");
+
+ return ret;
+}
+
+#define __vdso_rng_data (VVAR(_vdso_rng_data))
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
+{
+ if (IS_ENABLED(CONFIG_TIME_NS) && __vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
+ return (void *)&__vdso_rng_data + ((void *)&__timens_vdso_data - (void *)&__vdso_data);
+ return &__vdso_rng_data;
+}
+
+/**
+ * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
+ * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output.
+ * @key: 32-byte input key.
+ * @counter: 8-byte counter, read on input and updated on return.
+ * @nblocks: Number of blocks to generate.
+ *
+ * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
+ * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
+ * leaking into forked child processes.
+ */
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index 93226281b450..972415a8be31 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -10,6 +10,8 @@
#include <asm/vvar.h>
DEFINE_VVAR(struct vdso_data, _vdso_data);
+DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data);
+
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 183e98e49ab9..9d9af37f7cab 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -26,6 +26,8 @@
*/
#define DECLARE_VVAR(offset, type, name) \
EMIT_VVAR(name, offset)
+#define DECLARE_VVAR_SINGLE(offset, type, name) \
+ EMIT_VVAR(name, offset)
#else
@@ -37,6 +39,10 @@ extern char __vvar_page;
extern type timens_ ## name[CS_BASES] \
__attribute__((visibility("hidden"))); \
+#define DECLARE_VVAR_SINGLE(offset, type, name) \
+ extern type vvar_ ## name \
+ __attribute__((visibility("hidden"))); \
+
#define VVAR(name) (vvar_ ## name)
#define TIMENS(name) (timens_ ## name)
@@ -44,12 +50,22 @@ extern char __vvar_page;
type name[CS_BASES] \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
+#define DEFINE_VVAR_SINGLE(type, name) \
+ type name \
+ __attribute__((section(".vvar_" #name), aligned(16))) __visible
+
#endif
/* DECLARE_VVAR(offset, type, name) */
DECLARE_VVAR(128, struct vdso_data, _vdso_data)
+#if !defined(_SINGLE_DATA)
+#define _SINGLE_DATA
+DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data)
+#endif
+
#undef DECLARE_VVAR
+#undef DECLARE_VVAR_SINGLE
#endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 2597cb43f438..b02a12436750 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
* Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
*
@@ -56,6 +56,10 @@
#include <linux/sched/isolation.h>
#include <crypto/chacha.h>
#include <crypto/blake2s.h>
+#ifdef CONFIG_VDSO_GETRANDOM
+#include <vdso/getrandom.h>
+#include <vdso/datapage.h>
+#endif
#include <asm/archrandom.h>
#include <asm/processor.h>
#include <asm/irq.h>
@@ -271,6 +275,15 @@ static void crng_reseed(struct work_struct *work)
if (next_gen == ULONG_MAX)
++next_gen;
WRITE_ONCE(base_crng.generation, next_gen);
+#ifdef CONFIG_VDSO_GETRANDOM
+ /* base_crng.generation's invalid value is ULONG_MAX, while
+ * _vdso_rng_data.generation's invalid value is 0, so add one to the
+ * former to arrive at the latter. Use smp_store_release so that this
+ * is ordered with the write above to base_crng.generation. Pairs with
+ * the smp_rmb() before the syscall in the vDSO code.
+ */
+ smp_store_release(&_vdso_rng_data.generation, next_gen + 1);
+#endif
if (!static_branch_likely(&crng_is_ready))
crng_init = CRNG_READY;
spin_unlock_irqrestore(&base_crng.lock, flags);
@@ -721,6 +734,9 @@ static void __cold _credit_init_bits(size_t bits)
if (static_key_initialized && system_unbound_wq)
queue_work(system_unbound_wq, &set_ready);
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
+#ifdef CONFIG_VDSO_GETRANDOM
+ WRITE_ONCE(_vdso_rng_data.is_ready, true);
+#endif
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
pr_notice("crng init done\n");
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 775a2e8d600c..5f171ad7b436 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -988,6 +988,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
#ifdef CONFIG_64BIT
+ [ilog2(VM_DROPPABLE)] = "dp",
[ilog2(VM_SEALED)] = "sl",
#endif
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d044e737dba..aa4fccb2a693 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -407,6 +407,13 @@ extern unsigned int kobjsize(const void *objp);
#endif
#ifdef CONFIG_64BIT
+#define VM_DROPPABLE_BIT 40
+#define VM_DROPPABLE BIT(VM_DROPPABLE_BIT)
+#else
+#define VM_DROPPABLE VM_NONE
+#endif
+
+#ifdef CONFIG_64BIT
/* VM is sealed, in vm_flags */
#define VM_SEALED _BITUL(63)
#endif
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 05d59f74fc88..a12bcf042551 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -218,6 +218,9 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
{
vm_flags &= __VM_UFFD_FLAGS;
+ if (vm_flags & VM_DROPPABLE)
+ return false;
+
if ((vm_flags & VM_UFFD_MINOR) &&
(!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
return false;
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index e46d6e82765e..b63d211bd141 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -165,6 +165,12 @@ IF_HAVE_PG_ARCH_X(arch_3)
# define IF_HAVE_UFFD_MINOR(flag, name)
#endif
+#ifdef CONFIG_64BIT
+# define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
+#else
+# define IF_HAVE_VM_DROPPABLE(flag, name)
+#endif
+
#define __def_vmaflag_names \
{VM_READ, "read" }, \
{VM_WRITE, "write" }, \
@@ -197,6 +203,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
{VM_MIXEDMAP, "mixedmap" }, \
{VM_HUGEPAGE, "hugepage" }, \
{VM_NOHUGEPAGE, "nohugepage" }, \
+IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \
{VM_MERGEABLE, "mergeable" } \
#define show_vma_flags(flags) \
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index a246e11988d5..e89d00528f2f 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index e744c23582eb..1dd047ec98a1 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -20,7 +20,7 @@
/* Add to (or subtract from) the entropy count. (Superuser only.) */
#define RNDADDTOENTCNT _IOW( 'R', 0x01, int )
-/* Get the contents of the entropy pool. (Superuser only.) */
+/* Get the contents of the entropy pool. (Superuser only.) (Removed in 2.6.9-rc2.) */
#define RNDGETPOOL _IOR( 'R', 0x02, int [2] )
/*
@@ -55,4 +55,19 @@ struct rand_pool_info {
#define GRND_RANDOM 0x0002
#define GRND_INSECURE 0x0004
+/**
+ * struct vgetrandom_opaque_params - arguments for allocating memory for vgetrandom
+ *
+ * @size_per_opaque_state: Size of each state that is to be passed to vgetrandom().
+ * @mmap_prot: Value of the prot argument in mmap(2).
+ * @mmap_flags: Value of the flags argument in mmap(2).
+ * @reserved: Reserved for future use.
+ */
+struct vgetrandom_opaque_params {
+ __u32 size_of_opaque_state;
+ __u32 mmap_prot;
+ __u32 mmap_flags;
+ __u32 reserved[13];
+};
+
#endif /* _UAPI_LINUX_RANDOM_H */
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 7647e0946f50..b85f24cac3f5 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -117,6 +117,16 @@ struct vdso_data {
struct arch_vdso_data arch_data;
};
+/**
+ * struct vdso_rng_data - vdso RNG state information
+ * @generation: counter representing the number of RNG reseeds
+ * @is_ready: boolean signaling whether the RNG is initialized
+ */
+struct vdso_rng_data {
+ u64 generation;
+ u8 is_ready;
+};
+
/*
* We use the hidden visibility to prevent the compiler from generating a GOT
* relocation. Not only is going through a GOT useless (the entry couldn't and
@@ -128,6 +138,7 @@ struct vdso_data {
*/
extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
+extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden")));
/**
* union vdso_data_store - Generic vDSO data page
diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h
new file mode 100644
index 000000000000..a8b7c14b0ae0
--- /dev/null
+++ b/include/vdso/getrandom.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _VDSO_GETRANDOM_H
+#define _VDSO_GETRANDOM_H
+
+#include <linux/types.h>
+
+#define CHACHA_KEY_SIZE 32
+#define CHACHA_BLOCK_SIZE 64
+
+/**
+ * struct vgetrandom_state - State used by vDSO getrandom().
+ *
+ * @batch: One and a half ChaCha20 blocks of buffered RNG output.
+ *
+ * @key: Key to be used for generating next batch.
+ *
+ * @batch_key: Union of the prior two members, which is exactly two full
+ * ChaCha20 blocks in size, so that @batch and @key can be filled
+ * together.
+ *
+ * @generation: Snapshot of @rng_info->generation in the vDSO data page at
+ * the time @key was generated.
+ *
+ * @pos: Offset into @batch of the next available random byte.
+ *
+ * @in_use: Reentrancy guard for reusing a state within the same thread
+ * due to signal handlers.
+ */
+struct vgetrandom_state {
+ union {
+ struct {
+ u8 batch[CHACHA_BLOCK_SIZE * 3 / 2];
+ u32 key[CHACHA_KEY_SIZE / sizeof(u32)];
+ };
+ u8 batch_key[CHACHA_BLOCK_SIZE * 2];
+ };
+ u64 generation;
+ u8 pos;
+ bool in_use;
+};
+
+#endif /* _VDSO_GETRANDOM_H */
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index c46c2300517c..82fe827af542 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -38,3 +38,8 @@ config GENERIC_VDSO_OVERFLOW_PROTECT
in the hotpath.
endif
+
+config VDSO_GETRANDOM
+ bool
+ help
+ Selected by architectures that support vDSO getrandom().
diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c
new file mode 100644
index 000000000000..b230f0b10832
--- /dev/null
+++ b/lib/vdso/getrandom.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <vdso/datapage.h>
+#include <vdso/getrandom.h>
+#include <asm/vdso/getrandom.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/unaligned.h>
+#include <uapi/linux/mman.h>
+
+#define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do { \
+ while (len >= sizeof(type)) { \
+ __put_unaligned_t(type, __get_unaligned_t(type, src), dst); \
+ __put_unaligned_t(type, 0, src); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ } \
+} while (0)
+
+static void memcpy_and_zero_src(void *dst, void *src, size_t len)
+{
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ if (IS_ENABLED(CONFIG_64BIT))
+ MEMCPY_AND_ZERO_SRC(u64, dst, src, len);
+ MEMCPY_AND_ZERO_SRC(u32, dst, src, len);
+ MEMCPY_AND_ZERO_SRC(u16, dst, src, len);
+ }
+ MEMCPY_AND_ZERO_SRC(u8, dst, src, len);
+}
+
+/**
+ * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall.
+ * @rng_info: Describes state of kernel RNG, memory shared with kernel.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * @opaque_state: Pointer to an opaque state area.
+ * @opaque_len: Length of opaque state area.
+ *
+ * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's
+ * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same
+ * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always
+ * calls into the syscall.
+ *
+ * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated
+ * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0,
+ * this function should not be used.
+ *
+ * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields
+ * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external
+ * locking is used, one state must be allocated per thread, as it is not safe to call this function
+ * concurrently with the same @opaque_state. However, it is safe to call this using the same
+ * @opaque_state that is shared between main code and signal handling code, within the same thread.
+ *
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t
+__cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len,
+ unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len);
+ struct vgetrandom_state *state = opaque_state;
+ size_t batch_len, nblocks, orig_len = len;
+ bool in_use, have_retried = false;
+ unsigned long current_generation;
+ void *orig_buffer = buffer;
+ u32 counter[2] = { 0 };
+
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) {
+ *(struct vgetrandom_opaque_params *)opaque_state = (struct vgetrandom_opaque_params) {
+ .size_of_opaque_state = sizeof(*state),
+ .mmap_prot = PROT_READ | PROT_WRITE,
+ .mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS
+ };
+ return 0;
+ }
+
+ /* The state must not straddle a page, since pages can be zeroed at any time. */
+ if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE))
+ return -EFAULT;
+
+ /* If the caller passes the wrong size, which might happen due to CRIU, fallback. */
+ if (unlikely(opaque_len != sizeof(*state)))
+ goto fallback_syscall;
+
+ /*
+ * If t