From af80602799681c78f14fbe20b6185a56020dedee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:18 +0200 Subject: mm: Move mm_cachep initialization to mm_init() In order to allow using mm_alloc() much earlier, move initializing mm_cachep into mm_init(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.751153381@infradead.org --- include/linux/sched/task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index d6c48163c6de..8431558641a4 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -65,6 +65,7 @@ extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); void __noreturn make_task_dead(int signr); +extern void mm_cache_init(void); extern void proc_caches_init(void); extern void fork_init(void); -- cgit v1.2.3 From 3f4c8211d982099be693be9aa7d6fc4607dff290 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:21 +0200 Subject: x86/mm: Use mm_alloc() in poking_init() Instead of duplicating init_mm, allocate a fresh mm. The advantage is that mm_alloc() has much simpler dependencies. Additionally it makes more conceptual sense, init_mm has no (and must not have) user state to duplicate. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.816175235@infradead.org --- include/linux/sched/task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 8431558641a4..357e0068497c 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -91,7 +91,6 @@ extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); -struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); -- cgit v1.2.3 From d48567c9a0d1e605639f8a8705a61bbb55fb4e84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 26 Oct 2022 12:13:03 +0200 Subject: mm: Introduce set_memory_rox() Because endlessly repeating: set_memory_ro() set_memory_x() is getting tedious. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y1jek64pXOsougmz@hirez.programming.kicks-ass.net --- include/linux/filter.h | 3 +-- include/linux/set_memory.h | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed..f0b17aff4e66 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); - set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 369769ce7399..023ebc67a36c 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +static inline int set_memory_rox(unsigned long addr, int numpages) +{ + int ret = set_memory_ro(addr, numpages); + if (ret) + return ret; + return set_memory_x(addr, numpages); +} + #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) { -- cgit v1.2.3 From 60463628c9e0a8060ac6bef0457b0505c7532c7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Oct 2022 13:19:31 +0200 Subject: x86/mm: Implement native set_memory_rox() Provide a native implementation of set_memory_rox(), avoiding the double set_memory_ro();set_memory_x(); calls. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) --- include/linux/set_memory.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 023ebc67a36c..95ac8398ee72 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,7 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifndef set_memory_rox static inline int set_memory_rox(unsigned long addr, int numpages) { int ret = set_memory_ro(addr, numpages); @@ -21,6 +22,7 @@ static inline int set_memory_rox(unsigned long addr, int numpages) return ret; return set_memory_x(addr, numpages); } +#endif #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) -- cgit v1.2.3 From 93b3037a1482758349f3b0431406bcc457ca1cbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 14:04:46 +0100 Subject: mm: Update ptep_get_lockless()'s comment Improve the comment. Suggested-by: Matthew Wilcox Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.515572025%40infradead.org --- include/linux/pgtable.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..c0b29000c3c0 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -300,15 +300,12 @@ static inline pte_t ptep_get(pte_t *ptep) #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. + * For walking the pagetables without holding any locks. Some architectures + * (eg x86-32 PAE) cannot load the entries atomically without using expensive + * instructions. We are guaranteed that a PTE will only either go from not + * present to present, or present to not present -- it will not switch to a + * completely different present page without a TLB flush inbetween; which we + * are blocking by holding interrupts off. * * Setting ptes from not present to present goes: * -- cgit v1.2.3 From 024d232ae4fcd7a7ce8ea239607d6c1246d7adc8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:16:22 +0100 Subject: mm: Fix pmd_read_atomic() AFAICT there's no reason to do anything different than what we do for PTEs. Make it so (also affects SH). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.711181252%40infradead.org --- include/linux/pgtable.h | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c0b29000c3c0..765fd4bf420f 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -298,6 +298,13 @@ static inline pte_t ptep_get(pte_t *ptep) } #endif +#ifndef __HAVE_ARCH_PMDP_GET +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* * For walking the pagetables without holding any locks. Some architectures @@ -340,15 +347,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep) return pte; } -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#define ptep_get_lockless ptep_get_lockless + +#if CONFIG_PGTABLE_LEVELS > 2 +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + pmd_t pmd; + + do { + pmd.pmd_low = pmdp->pmd_low; + smp_rmb(); + pmd.pmd_high = pmdp->pmd_high; + smp_rmb(); + } while (unlikely(pmd.pmd_low != pmdp->pmd_low)); + + return pmd; +} +#define pmdp_get_lockless pmdp_get_lockless +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ + /* * We require that the PTE can be read atomically. */ +#ifndef ptep_get_lockless static inline pte_t ptep_get_lockless(pte_t *ptep) { return ptep_get(ptep); } -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif + +#ifndef pmdp_get_lockless +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + return pmdp_get(pmdp); +} +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1318,17 +1352,10 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -#ifndef pmd_read_atomic static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { - /* - * Depend on compiler for an atomic pmd read. NOTE: this is - * only going to work, if the pmdval_t isn't larger than - * an unsigned long. - */ - return *pmdp; + return pmdp_get_lockless(pmdp); } -#endif #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) -- cgit v1.2.3 From 6ca297d4784625de7b041e8451780643cf5751a4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Oct 2022 14:51:44 +0200 Subject: mm: Rename GUP_GET_PTE_LOW_HIGH Since it no longer applies to only PTEs, rename it to PXX. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.776404066%40infradead.org --- include/linux/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 765fd4bf420f..7dd3df742543 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -305,7 +305,7 @@ static inline pmd_t pmdp_get(pmd_t *pmdp) } #endif -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH /* * For walking the pagetables without holding any locks. Some architectures * (eg x86-32 PAE) cannot load the entries atomically without using expensive @@ -365,7 +365,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) } #define pmdp_get_lockless pmdp_get_lockless #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ /* * We require that the PTE can be read atomically. -- cgit v1.2.3 From dab6e717429e5ec795d558a0e9a5337a1ed33a3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:20:28 +0100 Subject: mm: Rename pmd_read_atomic() There's no point in having the identical routines for PTE/PMD have different names. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.841277397%40infradead.org --- include/linux/pgtable.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 7dd3df742543..23348528ff36 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1352,11 +1352,6 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - return pmdp_get_lockless(pmdp); -} - #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) #endif @@ -1383,13 +1378,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. * * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, - * pmd_read_atomic is allowed to return a not atomic pmdval + * pmdp_get_lockless is allowed to return a not atomic pmdval * (for example pointing to an hugepage that has never been * mapped in the pmd). The below checks will only care about * the low part of the pmd with 32bit PAE x86 anyway, with the -- cgit v1.2.3 From 2dff2c359e829245bc3d80e42e296876d1f0cf8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 1 Nov 2022 12:53:18 +0100 Subject: mm: Convert __HAVE_ARCH_P..P_GET to the new style Since __HAVE_ARCH_* style guards have been depricated in favour of defining the function name onto itself, convert pxxp_get(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y2EUEBlQXNgaJgoI@hirez.programming.kicks-ass.net --- include/linux/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 23348528ff36..70e2a7e06a76 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -291,14 +291,14 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, ptep_get_and_clear(mm, addr, ptep); } -#ifndef __HAVE_ARCH_PTEP_GET +#ifndef ptep_get static inline pte_t ptep_get(pte_t *ptep) { return READ_ONCE(*ptep); } #endif -#ifndef __HAVE_ARCH_PMDP_GET +#ifndef pmdp_get static inline pmd_t pmdp_get(pmd_t *pmdp) { return READ_ONCE(*pmdp); -- cgit v1.2.3