summaryrefslogtreecommitdiff
path: root/mm/vma_exec.c
diff options
context:
space:
mode:
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>2025-04-28 16:28:15 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-05-12 23:50:48 -0700
commitdd7a6246f4fd6e8a6dcb08f1f51c899f3e0d3b83 (patch)
tree0fc55b35d143fde8744251c100653f548e3fd1a8 /mm/vma_exec.c
parent6c36ac1e124f1be97cf0485a220865fce5a2020d (diff)
downloadlinux-dd7a6246f4fd6e8a6dcb08f1f51c899f3e0d3b83.tar.gz
linux-dd7a6246f4fd6e8a6dcb08f1f51c899f3e0d3b83.tar.bz2
linux-dd7a6246f4fd6e8a6dcb08f1f51c899f3e0d3b83.zip
mm: abstract initial stack setup to mm subsystem
There are peculiarities within the kernel where what is very clearly mm code is performed elsewhere arbitrarily. This violates separation of concerns and makes it harder to refactor code to make changes to how fundamental initialisation and operation of mm logic is performed. One such case is the creation of the VMA containing the initial stack upon execve()'ing a new process. This is currently performed in __bprm_mm_init() in fs/exec.c. Abstract this operation to create_init_stack_vma(). This allows us to limit use of vma allocation and free code to fork and mm only. We previously did the same for the step at which we relocate the initial stack VMA downwards via relocate_vma_down(), now we move the initial VMA establishment too. Take the opportunity to also move insert_vm_struct() to mm/vma.c as it's no longer needed anywhere outside of mm. Link: https://lkml.kernel.org/r/118c950ef7a8dd19ab20a23a68c3603751acd30e.1745853549.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Reviewed-by: Pedro Falcato <pfalcato@suse.de> Reviewed-by: Kees Cook <kees@kernel.org> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christian Brauner <brauner@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/vma_exec.c')
-rw-r--r--mm/vma_exec.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/mm/vma_exec.c b/mm/vma_exec.c
index 6736ae37f748..2dffb02ed6a2 100644
--- a/mm/vma_exec.c
+++ b/mm/vma_exec.c
@@ -90,3 +90,72 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
/* Shrink the vma to just the new range */
return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}
+
+/*
+ * Establish the stack VMA in an execve'd process, located temporarily at the
+ * maximum stack address provided by the architecture.
+ *
+ * We later relocate this downwards in relocate_vma_down().
+ *
+ * This function is almost certainly NOT what you want for anything other than
+ * early executable initialisation.
+ *
+ * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the
+ * maximum addressable location in the stack (that is capable of storing a
+ * system word of data).
+ */
+int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
+ unsigned long *top_mem_p)
+{
+ int err;
+ struct vm_area_struct *vma = vm_area_alloc(mm);
+
+ if (!vma)
+ return -ENOMEM;
+
+ vma_set_anonymous(vma);
+
+ if (mmap_write_lock_killable(mm)) {
+ err = -EINTR;
+ goto err_free;
+ }
+
+ /*
+ * Need to be called with mmap write lock
+ * held, to avoid race with ksmd.
+ */
+ err = ksm_execve(mm);
+ if (err)
+ goto err_ksm;
+
+ /*
+ * Place the stack at the largest stack address the architecture
+ * supports. Later, we'll move this to an appropriate place. We don't
+ * use STACK_TOP because that can depend on attributes which aren't
+ * configured yet.
+ */
+ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+ vma->vm_end = STACK_TOP_MAX;
+ vma->vm_start = vma->vm_end - PAGE_SIZE;
+ vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+
+ err = insert_vm_struct(mm, vma);
+ if (err)
+ goto err;
+
+ mm->stack_vm = mm->total_vm = 1;
+ mmap_write_unlock(mm);
+ *vmap = vma;
+ *top_mem_p = vma->vm_end - sizeof(void *);
+ return 0;
+
+err:
+ ksm_exit(mm);
+err_ksm:
+ mmap_write_unlock(mm);
+err_free:
+ *vmap = NULL;
+ vm_area_free(vma);
+ return err;
+}