diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/vaddr.c | 3 | ||||
| -rw-r--r-- | mm/filemap.c | 4 | ||||
| -rw-r--r-- | mm/gup.c | 6 | ||||
| -rw-r--r-- | mm/hmm.c | 19 | ||||
| -rw-r--r-- | mm/hugetlb.c | 10 | ||||
| -rw-r--r-- | mm/ioremap.c | 26 | ||||
| -rw-r--r-- | mm/kasan/common.c | 3 | ||||
| -rw-r--r-- | mm/kfence/core.c | 18 | ||||
| -rw-r--r-- | mm/memory.c | 36 | ||||
| -rw-r--r-- | mm/memremap.c | 6 | ||||
| -rw-r--r-- | mm/page-writeback.c | 89 | ||||
| -rw-r--r-- | mm/page_alloc.c | 31 | ||||
| -rw-r--r-- | mm/rmap.c | 27 | ||||
| -rw-r--r-- | mm/secretmem.c | 42 | ||||
| -rw-r--r-- | mm/shmem.c | 7 | ||||
| -rw-r--r-- | mm/slab.c | 20 | ||||
| -rw-r--r-- | mm/slab.h | 39 | ||||
| -rw-r--r-- | mm/slab_common.c | 36 | ||||
| -rw-r--r-- | mm/slob.c | 33 | ||||
| -rw-r--r-- | mm/slub.c | 98 | ||||
| -rw-r--r-- | mm/sparse-vmemmap.c | 8 | ||||
| -rw-r--r-- | mm/swap_slots.c | 2 | ||||
| -rw-r--r-- | mm/usercopy.c | 2 | ||||
| -rw-r--r-- | mm/userfaultfd.c | 5 |
24 files changed, 305 insertions, 265 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 59e1653799f8..3c7b9d6dca95 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -336,8 +336,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/filemap.c b/mm/filemap.c index ffdfbc8b0e3c..254931a6e3ed 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1988,6 +1988,10 @@ no_page: gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) gfp &= ~__GFP_FS; + if (fgp_flags & FGP_NOWAIT) { + gfp &= ~GFP_KERNEL; + gfp |= GFP_NOWAIT | __GFP_NOWARN; + } folio = filemap_alloc_folio(gfp, 0); if (!folio) @@ -87,7 +87,8 @@ retry: * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); goto retry; } @@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); } /** @@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool hmm_is_device_private_entry(struct hmm_range *range, - swp_entry_t entry) -{ - return is_device_private_entry(entry) && - pfn_swap_entry_to_page(entry)->pgmap->owner == - range->dev_private_owner; -} - static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { @@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages, but just report - * the PFN even if not present. + * Don't fault in device private pages owned by the caller, + * just report the PFN. */ - if (hmm_is_device_private_entry(range, entry)) { + if (is_device_private_entry(entry) && + pfn_swap_entry_to_page(entry)->pgmap->owner == + range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; if (is_writable_device_private_entry(entry)) cpu_flags |= HMM_PFN_WRITE; @@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!non_swap_entry(entry)) goto fault; + if (is_device_private_entry(entry)) + goto fault; + if (is_device_exclusive_entry(entry)) goto fault; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401..a18c071c294e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4788,8 +4788,13 @@ again: * sharing with another vma. */ ; - } else if (unlikely(is_hugetlb_entry_migration(entry) || - is_hugetlb_entry_hwpoisoned(entry))) { + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { + bool uffd_wp = huge_pte_uffd_wp(entry); + + if (!userfaultfd_wp(dst_vma) && uffd_wp) + entry = huge_pte_clear_uffd_wp(entry); + set_huge_pte_at(dst, addr, dst_pte, entry); + } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = huge_pte_uffd_wp(entry); @@ -5947,6 +5952,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { + put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fe598ecd9b7..8652426282cc 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -11,29 +11,35 @@ #include <linux/io.h> #include <linux/export.h> -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { unsigned long offset, vaddr; phys_addr_t last_addr; struct vm_struct *area; /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) return NULL; /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; + offset = phys_addr & (~PAGE_MASK); + phys_addr -= offset; size = PAGE_ALIGN(size + offset); + if (!ioremap_allowed(phys_addr, size, prot)) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, __builtin_return_address(0)); if (!area) return NULL; vaddr = (unsigned long)area->addr; + area->phys_addr = phys_addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(prot))) { free_vm_area(area); return NULL; } @@ -44,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot); void iounmap(volatile void __iomem *addr) { - vunmap((void *)((unsigned long)addr & PAGE_MASK)); + void *vaddr = (void *)((unsigned long)addr & PAGE_MASK); + + if (!iounmap_allowed(vaddr)) + return; + + if (is_vmalloc_addr(vaddr)) + vunmap(vaddr); } EXPORT_SYMBOL(iounmap); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index c40c0e7b3b5f..78be2beb7453 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init) return; tag = kasan_random_tag(); + kasan_unpoison(set_tag(page_address(page), tag), + PAGE_SIZE << order, init); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - kasan_unpoison(page_address(page), PAGE_SIZE << order, init); } void __kasan_poison_pages(struct page *page, unsigned int order, bool init) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 4b5e5a3d3a63..6aff49f6b79e 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -603,14 +603,6 @@ static unsigned long kfence_init_pool(void) addr += 2 * PAGE_SIZE; } - /* - * The pool is live and will never be deallocated from this point on. - * Remove the pool object from the kmemleak object tree, as it would - * otherwise overlap with allocations returned by kfence_alloc(), which - * are registered with kmemleak through the slab post-alloc hook. - */ - kmemleak_free(__kfence_pool); - return 0; } @@ -623,8 +615,16 @@ static bool __init kfence_init_pool_early(void) addr = kfence_init_pool(); - if (!addr) + if (!addr) { + /* + * The pool is live and will never be deallocated from this point on. + * Ignore the pool object from the kmemleak phys object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_ignore_phys(__pa(__kfence_pool)); return true; + } /* * Only release unprotected pages, and do not try to go back and change diff --git a/mm/memory.c b/mm/memory.c index 7a089145cad4..1c6027adc542 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3043,7 +3043,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) pte_t entry; VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page)); + VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page)); /* * Clear the pages cpupid information as the existing @@ -4369,9 +4369,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return VM_FAULT_OOM; } - /* See comment in handle_pte_fault() */ + /* + * See comment in handle_pte_fault() for how this scenario happens, we + * need to return NOPAGE so that we drop this page. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) - return 0; + return VM_FAULT_NOPAGE; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); @@ -4802,6 +4805,19 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) goto split; if (vmf->vma->vm_ops->huge_fault) { vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); @@ -4812,19 +4828,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - return VM_FAULT_FALLBACK; -} - -static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* No support for anonymous transparent PUD pages yet */ - if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } diff --git a/mm/memremap.c b/mm/memremap.c index b870a659eee6..745eea0f99c3 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page) } #ifdef CONFIG_FS_DAX -bool __put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs) { if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) return false; @@ -509,9 +509,9 @@ bool __put_devmap_managed_page(struct page *page) * refcount is 1, then the page is free and the refcount is * stable because nobody holds a reference on the page. */ - if (page_ref_dec_return(page) == 1) + if (page_ref_sub_return(page, refs) == 1) wake_up_var(&page->_refcount); return true; } -EXPORT_SYMBOL(__put_devmap_managed_page); +EXPORT_SYMBOL(__put_devmap_managed_page_refs); #endif /* CONFIG_FS_DAX */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 55c2776ae699..d0d466a5c804 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1554,8 +1554,8 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) * If we're over `background_thresh' then the writeback threads are woken to * perform some writeout. */ -static void balance_dirty_pages(struct bdi_writeback *wb, - unsigned long pages_dirtied) +static int balance_dirty_pages(struct bdi_writeback *wb, + unsigned long pages_dirtied, unsigned int flags) { struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) }; @@ -1575,6 +1575,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb, struct backing_dev_info *bdi = wb->bdi; bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; unsigned long start_time = jiffies; + int ret = 0; for (;;) { unsigned long now = jiffies; @@ -1628,6 +1629,19 @@ static void balance_dirty_pages(struct bdi_writeback *wb, } /* + * In laptop mode, we wait until hitting the higher threshold + * before starting background writeout, and then write out all + * the way down to the lower threshold. So slow writers cause + * minimal disk activity. + * + * In normal mode, we start background writeout at the lower + * background_thresh, to keep the amount of dirty memory low. + */ + if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh && + !writeback_in_progress(wb)) + wb_start_background_writeback(wb); + + /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the wb limits are ramping up in case of !strictlimit. @@ -1657,6 +1671,7 @@ free_running: break; } + /* Start writeback even when in laptop mode */ if (unlikely(!writeback_in_progress(wb))) wb_start_background_writeback(wb); @@ -1715,8 +1730,8 @@ free_running: sdtc = mdtc; } - if (dirty_exceeded && !wb->dirty_exceeded) - wb->dirty_exceeded = 1; + if (dirty_exceeded != wb->dirty_exceeded) + wb->dirty_exceeded = dirty_exceeded; if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + BANDWIDTH_INTERVAL)) @@ -1789,6 +1804,10 @@ pause: period, pause, start_time); + if (flags & BDP_ASYNC) { + ret = -EAGAIN; + break; + } __set_current_state(TASK_KILLABLE); wb->dirty_sleep = now; io_schedule_timeout(pause); @@ -1820,26 +1839,7 @@ pause: if (fatal_signal_pending(current)) break; } - - if (!dirty_exceeded && wb->dirty_exceeded) - wb->dirty_exceeded = 0; - - if (writeback_in_progress(wb)) - return; - - /* - * In laptop mode, we wait until hitting the higher threshold before - * starting background writeout, and then write out all the way down - * to the lower threshold. So slow writers cause minimal disk activity. - * - * In normal mode, we start background writeout at the lower - * background_thresh, to keep the amount of dirty memory low. - */ - if (laptop_mode) - return; - - if (nr_reclaimable > gdtc->bg_thresh) - wb_start_background_writeback(wb); + return ret; } static DEFINE_PER_CPU(int, bdp_ratelimits); @@ -1861,27 +1861,34 @@ static DEFINE_PER_CPU(int, bdp_ratelimits); DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0; /** - * balance_dirty_pages_ratelimited - balance dirty memory state - * @mapping: address_space which was dirtied + * balance_dirty_pages_ratelimited_flags - Balance dirty memory state. + * @mapping: address_space which was dirtied. + * @flags: BDP flags. * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * - * Once we're over the dirty memory limit we decrease the ratelimiting - * by a lot, to prevent individual processes from overshooting the limit - * by (ratelimit_pages) each. + * See balance_dirty_pages_ratelimited() for details. + * + * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to + * indicate that memory is out of balance and the caller must wait + * for I/O to complete. Otherwise, it will return 0 to indicate + * that either memory was already in balance, or it was able to sleep + * until the amount of dirty memory returned to balance. */ -void balance_dirty_pages_ratelimited(struct address_space *mapping) +int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, + unsigned int flags) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); struct bdi_writeback *wb = NULL; int ratelimit; + int ret = 0; int *p; if (!(bdi->capabilities & BDI_CAP_WRITEBACK)) - return; + return ret; if (inode_cgwb_enabled(inode)) wb = wb_get_create_current(bdi, GFP_KERNEL); @@ -1921,9 +1928,27 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) preempt_enable(); if (unlikely(current->nr_dirtied >= ratelimit)) - balance_dirty_pages(wb, current->nr_dirtied); + ret = balance_dirty_pages(wb, current->nr_dirtied, flags); wb_put(wb); + return ret; +} + +/** + * balance_dirty_pages_ratelimited - balance dirty memory state. + * @mapping: address_space which was dirtied. + * + * Processes which are dirtying memory should call in here once for each page + * which was newly dirtied. The function will periodically check the system's + * dirty state and will initiate writeback if needed. + * + * Once we're over the dirty memory limit we decrease the ratelimiting + * by a lot, to prevent individual processes from overshooting the limit + * by (ratelimit_pages) each. + */ +void balance_dirty_pages_ratelimited(struct address_space *mapping) +{ + balance_dirty_pages_ratelimited_flags(mapping, 0); } EXPORT_SYMBOL(balance_dirty_pages_ratelimited); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e008a3df0485..b0bcab50f0a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) } #endif /* CONFIG_DEBUG_VM */ -static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +static inline bool should_skip_kasan_unpoison(gfp_t flags) { /* Don't skip if a software KASAN mode is enabled. */ if (IS_ENABLED(CONFIG_KASAN_GENERIC) || @@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) return true; /* - * With hardware tag-based KASAN enabled, skip if either: - * - * 1. Memory tags have already been cleared via tag_clear_highpage(). - * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + * With hardware tag-based KASAN enabled, skip if this has been + * requested via __GFP_SKIP_KASAN_UNPOISON. */ - return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); + return flags & __GFP_SKIP_KASAN_UNPOISON; } static inline bool should_skip_init(gfp_t flags) @@ -2397,6 +2395,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + int i; set_page_private(page, 0); set_page_refcounted(page); @@ -2422,8 +2421,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * should be initialized as well). */ if (init_tags) { - int i; - /* Initialize both memory and tags. */ for (i = 0; i != 1 << order; ++i) tag_clear_highpage(page + i); @@ -2431,13 +2428,17 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by the loop above. */ init = false; } - if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + if (!should_skip_kasan_unpoison(gfp_flags)) { /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); /* Note that memory is already initialized by KASAN. */ if (kasan_has_integrated_init()) init = false; + } else { + /* Ensure page_address() dereferencing does not fault. */ + for (i = 0; i != 1 << order; ++i) + page_kasan_tag_reset(page + i); } /* If memory is still not initialized, do it now. */ if (init) @@ -3968,11 +3969,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * need to be calculated. */ if (!order) { - long fast_free; + long usable_free; + long reserved; + + usable_free = free_pages; + reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); - fast_free = free_pages; - fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); - if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) + /* reserved may over estimate high-atomic reserves. */ + usable_free -= min(usable_free, reserved); + if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) return true; } diff --git a/mm/rmap.c b/mm/rmap.c index 5bcb334cd6f2..746c05acad27 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1899,8 +1899,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); - subpage = folio_page(folio, - pte_pfn(*pvmw.pte) - folio_pfn(folio)); + if (folio_is_zone_device(folio)) { + /* + * Our PTE is a non-present device exclusive entry and + * calculating the subpage as for the common case would + * result in an invalid pointer. + * + * Since only PAGE_SIZE pages can currently be + * migrated, just set it to page. This will need to be + * changed when hugepage migrations to device private + * memory are supported. + */ + VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio); + subpage = &folio->page; + } else { + subpage = folio_page(folio, + pte_pfn(*pvmw.pte) - folio_pfn(folio)); + } address = pvmw.address; anon_exclusive = folio_test_anon(folio) && PageAnonExclusive(subpage); @@ -1993,15 +2008,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* * No need to invalidate here it will synchronize on * against the special swap migration pte. - * - * The assignment to subpage above was computed from a - * swap PTE which results in an invalid pointer. - * Since only PAGE_SIZE pages can currently be - * migrated, just set it to page. This will need to be - * changed when hugepage migrations to device private - * memory are supported. */ - subpage = &folio->page; } else if (PageHWPoison(subpage)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { diff --git a/mm/secretmem.c b/mm/secretmem.c index 206ed6b40c1d..71fb78f59d72 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) gfp_t gfp = vmf->gfp_mask; unsigned long addr; struct page *page; + vm_fault_t ret; int err; if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return vmf_error(-EINVAL); + filemap_invalidate_lock_shared(mapping); + retry: page = find_lock_page(mapping, offset); if (!page) { page = alloc_page(gfp | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } err = set_direct_map_invalid_noflush(page); if (err) { put_page(page); - return vmf_error(err); + ret = vmf_error(err); + goto out; } __SetPageUptodate(page); @@ -86,7 +92,8 @@ retry: if (err == -EEXIST) goto retry; - return vmf_error(err); + ret = vmf_error(err); + goto out; } addr = (unsigned long)page_address(page); @@ -94,7 +101,11 @@ retry: } vmf->page = page; - return VM_FAULT_LOCKED; + ret = VM_FAULT_LOCKED; + +out: + filemap_invalidate_unlock_shared(mapping); + return ret; } static const struct vm_operations_struct secretmem_vm_ops = { @@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); + struct address_space *mapping = inode->i_mapping; unsigned int ia_valid = iattr->ia_valid; + int ret; + + filemap_invalidate_lock(mapping); if ((ia_valid & ATTR_SIZE) && inode->i_size) - return -EINVAL; + ret = -EINVAL; + else + ret = simple_setattr(mnt_userns, dentry, iattr); - return simple_setattr(mnt_userns, dentry, iattr); + filemap_invalidate_unlock(mapping); + + return ret; } static const struct inode_operations secretmem_iops = { @@ -180,11 +199,20 @@ static struct file *secretmem_file_create(unsigned long flags) { struct file *file = ERR_PTR(-ENOMEM); struct inode *inode; + const char *anon_name = "[secretmem]"; + const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); + int err; inode = alloc_anon_inode(secretmem_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); + err = security_inode_init_security_anon(inode, &qname, NULL); + if (err) { + file = ERR_PTR(err); + goto err_free_inode; + } + file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) diff --git a/mm/shmem.c b/mm/shmem.c index a6f565308133..b7f2d4a56867 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3392,7 +3392,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nr_blocks: ctx->blocks = memparse(param->string, &rest); - if (*rest) + if (*rest || ctx->blocks > S64_MAX) goto bad_value; ctx->seen |= SHMEM_SEEN_BLOCKS; break; @@ -3514,10 +3514,7 @@ static int shmem_reconfigure(struct fs_context *fc) raw_spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; - if (ctx->blocks > S64_MAX) { - err = "Number of blocks too large"; - goto out; - } + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; diff --git a/mm/slab.c b/mm/slab.c index f8cd00f4ba13..5e73e2d80222 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3230,7 +3230,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_ } /* ___cache_alloc_node can fall back to other nodes */ ptr = ____cache_alloc_node(cachep, flags, nodeid); - out: +out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); init = slab_want_init_on_alloc(flags, cachep); @@ -3259,7 +3259,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) if (!objp) objp = ____cache_alloc_node(cache, flags, numa_mem_id()); - out: +out: return objp; } #else @@ -3406,9 +3406,10 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, { bool init; + memcg_slab_free_hook(cachep, virt_to_slab(objp), &objp, 1); + if (is_kfence_address(objp)) { kmemleak_free_recursive(objp, cachep->flags); - memcg_slab_free_hook(cachep, &objp, 1); __kfence_free(objp); return; } @@ -3441,7 +3442,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, check_irq_off(); kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); - memcg_slab_free_hook(cachep, &objp, 1); /* * Skip calling cache_free_alien() when the platform is not numa. @@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, { void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, + trace_kmem_cache_alloc(_RET_IP_, ret, cachep, cachep->object_size, cachep->size, flags); return ret; @@ -3553,7 +3553,7 @@ error: local_irq_enable(); cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); slab_post_alloc_hook(s, objcg, flags, i, p, false); - __kmem_cache_free_bulk(s, i, p); + kmem_cache_free_bulk(s, i, p); return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); @@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); - trace_kmalloc(_RET_IP_, ret, + trace_kmalloc(_RET_IP_, ret, cachep, size, cachep->size, flags); return ret; } @@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc_node(_RET_IP_, ret, + trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep, cachep->object_size, cachep->size, flags, nodeid); @@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); - trace_kmalloc_node(_RET_IP_, ret, + trace_kmalloc_node(_RET_IP_, ret, cachep, size, cachep->si |
