diff options
Diffstat (limited to 'mm')
47 files changed, 724 insertions, 784 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index ff60bd7d74e0..95550b8fa7fe 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -231,20 +231,13 @@ static __init int bdi_class_init(void) } postcore_initcall(bdi_class_init); -static int bdi_init(struct backing_dev_info *bdi); - static int __init default_bdi_init(void) { - int err; - bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); if (!bdi_wq) return -ENOMEM; - - err = bdi_init(&noop_backing_dev_info); - - return err; + return 0; } subsys_initcall(default_bdi_init); @@ -781,7 +774,7 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ -static int bdi_init(struct backing_dev_info *bdi) +int bdi_init(struct backing_dev_info *bdi) { int ret; diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 4b8eab4b3f45..22c96fed70b5 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -228,10 +228,8 @@ static void balloon_page_putback(struct page *page) spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); } - /* move_to_new_page() counterpart for a ballooned page */ -static int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, struct page *page, +static int balloon_page_migrate(struct page *newpage, struct page *page, enum migrate_mode mode) { struct balloon_dev_info *balloon = balloon_page_device(page); @@ -250,11 +248,11 @@ static int balloon_page_migrate(struct address_space *mapping, return balloon->migratepage(balloon, newpage, page, mode); } -const struct address_space_operations balloon_aops = { - .migratepage = balloon_page_migrate, +const struct movable_operations balloon_mops = { + .migrate_page = balloon_page_migrate, .isolate_page = balloon_page_isolate, .putback_page = balloon_page_putback, }; -EXPORT_SYMBOL_GPL(balloon_aops); +EXPORT_SYMBOL_GPL(balloon_mops); #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/compaction.c b/mm/compaction.c index 1f89b969c12b..a2c53fcf933e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -110,28 +110,27 @@ static void split_map_pages(struct list_head *list) } #ifdef CONFIG_COMPACTION - -int PageMovable(struct page *page) +bool PageMovable(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops; VM_BUG_ON_PAGE(!PageLocked(page), page); if (!__PageMovable(page)) - return 0; + return false; - mapping = page_mapping(page); - if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) - return 1; + mops = page_movable_ops(page); + if (mops) + return true; - return 0; + return false; } EXPORT_SYMBOL(PageMovable); -void __SetPageMovable(struct page *page, struct address_space *mapping) +void __SetPageMovable(struct page *page, const struct movable_operations *mops) { VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); - page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); + VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page); + page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE); } EXPORT_SYMBOL(__SetPageMovable); @@ -139,12 +138,10 @@ void __ClearPageMovable(struct page *page) { VM_BUG_ON_PAGE(!PageMovable(page), page); /* - * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE - * flag so that VM can catch up released page by driver after isolation. - * With it, VM migration doesn't try to put it back. + * This page still has the type of a movable page, but it's + * actually not movable any more. */ - page->mapping = (void *)((unsigned long)page->mapping & - PAGE_MAPPING_MOVABLE); + page->mapping = (void *)PAGE_MAPPING_MOVABLE; } EXPORT_SYMBOL(__ClearPageMovable); @@ -1034,7 +1031,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Only pages without mappings or that have a - * ->migratepage callback are possible to migrate + * ->migrate_folio callback are possible to migrate * without blocking. However, we can be racing with * truncation so it's necessary to lock the page * to stabilise the mapping as truncation holds @@ -1045,7 +1042,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail_put; mapping = page_mapping(page); - migrate_dirty = !mapping || mapping->a_ops->migratepage; + migrate_dirty = !mapping || + mapping->a_ops->migrate_folio; unlock_page(page); if (!migrate_dirty) goto isolate_fail_put; diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 8efbfb24f3a1..4b07c29effe9 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -374,6 +374,8 @@ static void damon_reclaim_timer_fn(struct work_struct *work) } static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); +static bool damon_reclaim_initialized; + static int enabled_store(const char *val, const struct kernel_param *kp) { @@ -382,6 +384,10 @@ static int enabled_store(const char *val, if (rc < 0) return rc; + /* system_wq might not initialized yet */ + if (!damon_reclaim_initialized) + return rc; + if (enabled) schedule_delayed_work(&damon_reclaim_timer, 0); @@ -449,6 +455,8 @@ static int __init damon_reclaim_init(void) damon_add_target(ctx, target); schedule_delayed_work(&damon_reclaim_timer, 0); + + damon_reclaim_initialized = true; return 0; } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 59e1653799f8..3c7b9d6dca95 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -336,8 +336,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/filemap.c b/mm/filemap.c index 9daeaab36081..0dec96ea6688 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -929,26 +929,6 @@ error: } ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); -/** - * add_to_page_cache_locked - add a locked page to the pagecache - * @page: page to add - * @mapping: the page's address_space - * @offset: page index - * @gfp_mask: page allocation mode - * - * This function is used to add a page to the pagecache. It must be locked. - * This function does not add the page to the LRU. The caller must do that. - * - * Return: %0 on success, negative error code otherwise. - */ -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) -{ - return __filemap_add_folio(mapping, page_folio(page), offset, - gfp_mask, NULL); -} -EXPORT_SYMBOL(add_to_page_cache_locked); - int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp) { @@ -1988,6 +1968,10 @@ no_page: gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) gfp &= ~__GFP_FS; + if (fgp_flags & FGP_NOWAIT) { + gfp &= ~GFP_KERNEL; + gfp |= GFP_NOWAIT | __GFP_NOWARN; + } folio = filemap_alloc_folio(gfp, 0); if (!folio) @@ -2147,65 +2131,46 @@ put: return folio_batch_count(fbatch); } -static inline -bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) -{ - if (!folio_test_large(folio) || folio_test_hugetlb(folio)) - return false; - if (index >= max) - return false; - return index < folio->index + folio_nr_pages(folio) - 1; -} - /** - * find_get_pages_range - gang pagecache lookup + * filemap_get_folios - Get a batch of folios * @mapping: The address_space to search * @start: The starting page index * @end: The final page index (inclusive) - * @nr_pages: The maximum number of pages - * @pages: Where the resulting pages are placed + * @fbatch: The batch to fill. * - * find_get_pages_range() will search for and return a group of up to @nr_pages - * pages in the mapping starting at index @start and up to index @end - * (inclusive). The pages are placed at @pages. find_get_pages_range() takes - * a reference against the returned pages. + * Search for and return a batch of folios in the mapping starting at + * index @start and up to index @end (inclusive). The folios are returned + * in @fbatch with an elevated reference count. * - * The search returns a group of mapping-contiguous pages with ascending - * indexes. There may be holes in the indices due to not-present pages. - * We also update @start to index the next page for the traversal. + * The first folio may start before @start; if it does, it will contain + * @start. The final folio may extend beyond @end; if it does, it will + * contain @end. The folios have ascending indices. There may be gaps + * between the folios if there are indices which have no folio in the + * page cache. If folios are added to or removed from the page cache + * while this is running, they may or may not be found by this call. * - * Return: the number of pages which were found. If this number is - * smaller than @nr_pages, the end of specified range has been - * reached. + * Return: The number of folios which were found. + * We also update @start to index the next folio for the traversal. */ -unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, - pgoff_t end, unsigned int nr_pages, - struct page **pages) +unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch) { XA_STATE(xas, &mapping->i_pages, *start); struct folio *folio; - unsigned ret = 0; - - if (unlikely(!nr_pages)) - return 0; rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { + while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { /* Skip over shadow, swap and DAX entries */ if (xa_is_value(folio)) continue; + if (!folio_batch_add(fbatch, folio)) { + unsigned long nr = folio_nr_pages(folio); -again: - pages[ret] = folio_file_page(folio, xas.xa_index); - if (++ret == nr_pages) { - *start = xas.xa_index + 1; + if (folio_test_hugetlb(folio)) + nr = 1; + *start = folio->index + nr; goto out; } - if (folio_more_pages(folio, xas.xa_index, end)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } } /* @@ -2221,7 +2186,18 @@ again: out: rcu_read_unlock(); - return ret; + return folio_batch_count(fbatch); +} +EXPORT_SYMBOL(filemap_get_folios); + +static inline +bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) +{ + if (!folio_test_large(folio) || folio_test_hugetlb(folio)) + return false; + if (index >= max) + return false; + return index < folio->index + folio_nr_pages(folio) - 1; } /** @@ -2385,6 +2361,8 @@ static void filemap_get_read_batch(struct address_space *mapping, continue; if (xas.xa_index > max || xa_is_value(folio)) break; + if (xa_is_sibling(folio)) + break; if (!folio_try_get_rcu(folio)) goto retry; @@ -2407,7 +2385,7 @@ retry: rcu_read_unlock(); } -static int filemap_read_folio(struct file *file, struct address_space *mapping, +static int filemap_read_folio(struct file *file, filler_t filler, struct folio *folio) { int error; @@ -2419,7 +2397,7 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping, */ folio_clear_error(folio); /* Start the actual read. The read will unlock the page. */ - error = mapping->a_ops->read_folio(file, folio); + error = filler(file, folio); if (error) return error; @@ -2428,7 +2406,8 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping, return error; if (folio_test_uptodate(folio)) return 0; - shrink_readahead_size_eio(&file->f_ra); + if (file) + shrink_readahead_size_eio(&file->f_ra); return -EIO; } @@ -2501,7 +2480,8 @@ static int filemap_update_page(struct kiocb *iocb, if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_folio(iocb->ki_filp, mapping, folio); + error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio, + folio); goto unlock_mapping; unlock: folio_unlock(folio); @@ -2544,7 +2524,7 @@ static int filemap_create_folio(struct file *file, if (error) goto error; - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_folio(file, mapping->a_ops->read_folio, folio); if (error) goto error; @@ -2629,6 +2609,13 @@ err: return err; } +static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) +{ + unsigned int shift = folio_shift(folio); + + return (pos1 >> shift == pos2 >> shift); +} + /** * filemap_read - Read data from the page cache. * @iocb: The iocb to read. @@ -2700,11 +2687,11 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, writably_mapped = mapping_writably_mapped(mapping); /* - * When a sequential read accesses a page several times, only + * When a read accesses the same folio several times, only * mark it as accessed the first time. */ - if (iocb->ki_pos >> PAGE_SHIFT != - ra->prev_pos >> PAGE_SHIFT) + if (!pos_same_folio(iocb->ki_pos, ra->prev_pos - 1, + fbatch.folios[0])) folio_mark_accessed(fbatch.folios[0]); for (i = 0; i < folio_batch_count(&fbatch); i++) { @@ -2991,11 +2978,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; + unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if (vmf->vma->vm_flags & VM_HUGEPAGE) { + if (vm_flags & VM_HUGEPAGE) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3003,7 +2991,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) * Fetch two PMD folios, so we get the chance to actually * readahead, unless we've been told not to. */ - if (!(vmf->vma->vm_flags & VM_RAND_READ)) + if (!(vm_flags & VM_RAND_READ)) ra->size *= 2; ra->async_size = HPAGE_PMD_NR; page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER); @@ -3012,12 +3000,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) #endif /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vm_flags & VM_RAND_READ) return fpin; if (!ra->ra_pages) return fpin; - if (vmf->vma->vm_flags & VM_SEQ_READ) { + if (vm_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_sync_ra(&ractl, ra->ra_pages); return fpin; @@ -3220,7 +3208,7 @@ page_not_uptodate: * and we need to check for errors. */ fpin = maybe_unlock_mmap_for_io(vmf, fpin); - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_folio(file, mapping->a_ops->read_folio, folio); if (fpin) goto out_retry; folio_put(folio); @@ -3510,20 +3498,7 @@ repeat: return ERR_PTR(err); } -filler: - err = filler(file, folio); - if (err < 0) { - folio_put(folio); - return ERR_PTR(err); - } - - folio_wait_locked(folio); - if (!folio_test_uptodate(folio)) { - folio_put(folio); - return ERR_PTR(-EIO); - } - - goto out; + goto filler; } if (folio_test_uptodate(folio)) goto out; @@ -3546,14 +3521,14 @@ filler: goto out; } - /* - * A previous I/O error may have been due to temporary - * failures. - * Clear page error before actual read, PG_error will be - * set again if read page fails. - */ - folio_clear_error(folio); - goto filler; +filler: + err = filemap_read_folio(file, filler, folio); + if (err) { + folio_put(folio); + if (err == AOP_TRUNCATED_PAGE) + goto repeat; + return ERR_PTR(err); + } out: folio_mark_accessed(folio); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 20bc15b57d93..458618c7302c 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -51,28 +51,6 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -#ifdef CONFIG_MIGRATION -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) -{ - return folio_migrate_mapping(mapping, page_folio(newpage), - page_folio(page), extra_count); -} -EXPORT_SYMBOL(migrate_page_move_mapping); - -void migrate_page_states(struct page *newpage, struct page *page) -{ - folio_migrate_flags(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_states); - -void migrate_page_copy(struct page *newpage, struct page *page) -{ - folio_migrate_copy(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_copy); -#endif - bool set_page_writeback(struct page *page) { return folio_start_writeback(page_folio(page)); @@ -87,7 +87,8 @@ retry: * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); goto retry; } @@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); } /** @@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool hmm_is_device_private_entry(struct hmm_range *range, - swp_entry_t entry) -{ - return is_device_private_entry(entry) && - pfn_swap_entry_to_page(entry)->pgmap->owner == - range->dev_private_owner; -} - static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { @@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages, but just report - * the PFN even if not present. + * Don't fault in device private pages owned by the caller, + * just report the PFN. */ - if (hmm_is_device_private_entry(range, entry)) { + if (is_device_private_entry(entry) && + pfn_swap_entry_to_page(entry)->pgmap->owner == + range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; if (is_writable_device_private_entry(entry)) cpu_flags |= HMM_PFN_WRITE; @@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!non_swap_entry(entry)) goto fault; + if (is_device_private_entry(entry)) + goto fault; + if (is_device_exclusive_entry(entry)) goto fault; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a77c78a2b6b5..15965084816d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -18,6 +18,7 @@ #include <linux/shrinker.h> #include <linux/mm_inline.h> #include <linux/swapops.h> +#include <linux/backing-dev.h> #include <linux/dax.h> #include <linux/khugepaged.h> #include <linux/freezer.h> @@ -2377,6 +2378,7 @@ static void __split_huge_page_tail(struct page *head, int tail, page_tail); page_tail->mapping = head->mapping; page_tail->index = head->index + tail; + page_tail->private = 0; /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); @@ -2439,11 +2441,15 @@ static void __split_huge_page(struct page *page, struct list_head *list, __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ if (head[i].index >= end) { - ClearPageDirty(head + i); - __delete_from_page_cache(head + i, NULL); + struct folio *tail = page_folio(head + i); + if (shmem_mapping(head->mapping)) shmem_uncharge(head->mapping->host, 1); - put_page(head + i); + else if (folio_test_clear_dirty(tail)) + folio_account_cleaned(tail, + inode_to_wb(folio->mapping->host)); + __filemap_remove_folio(tail, NULL); + folio_put(tail); } else if (!PageAnon(page)) { __xa_store(&head->mapping->i_pages, head[i].index, head + i, 0); @@ -2672,8 +2678,7 @@ out_unlock: if (mapping) i_mmap_unlock_read(mapping); out: - /* Free any memory we didn't use */ - xas_nomem(&xas, 0); + xas_destroy(&xas); count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); return ret; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401..aa39534898e0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4788,8 +4788,13 @@ again: * sharing with another vma. */ ; - } else if (unlikely(is_hugetlb_entry_migration(entry) || - is_hugetlb_entry_hwpoisoned(entry))) { + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { + bool uffd_wp = huge_pte_uffd_wp(entry); + + if (!userfaultfd_wp(dst_vma) && uffd_wp) + entry = huge_pte_clear_uffd_wp(entry); + set_huge_pte_at(dst, addr, dst_pte, entry); + } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = huge_pte_uffd_wp(entry); @@ -5414,19 +5419,25 @@ static bool hugetlbfs_pagecache_present(struct hstate *h, int huge_add_to_page_cache(struct page *page, struct address_space * |
