summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
commit27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch)
tree75fc525fbfec8c07a97a7875a89592317bcad4ca /fs
parent70442fc54e6889a2a77f0e9554e8188a1557f00e (diff)
parentbbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (diff)
downloadlinux-27bc50fc90647bbf7b734c3fc306a5e61350da53.tar.gz
linux-27bc50fc90647bbf7b734c3fc306a5e61350da53.tar.bz2
linux-27bc50fc90647bbf7b734c3fc306a5e61350da53.zip
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in linux-next for a couple of months without, to my knowledge, any negative reports (or any positive ones, come to that). - Also the Maple Tree from Liam Howlett. An overlapping range-based tree for vmas. It it apparently slightly more efficient in its own right, but is mainly targeted at enabling work to reduce mmap_lock contention. Liam has identified a number of other tree users in the kernel which could be beneficially onverted to mapletrees. Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat at [1]. This has yet to be addressed due to Liam's unfortunately timed vacation. He is now back and we'll get this fixed up. - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses clang-generated instrumentation to detect used-unintialized bugs down to the single bit level. KMSAN keeps finding bugs. New ones, as well as the legacy ones. - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of memory into THPs. - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to support file/shmem-backed pages. - userfaultfd updates from Axel Rasmussen - zsmalloc cleanups from Alexey Romanov - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and memory-failure - Huang Ying adds enhancements to NUMA balancing memory tiering mode's page promotion, with a new way of detecting hot pages. - memcg updates from Shakeel Butt: charging optimizations and reduced memory consumption. - memcg cleanups from Kairui Song. - memcg fixes and cleanups from Johannes Weiner. - Vishal Moola provides more folio conversions - Zhang Yi removed ll_rw_block() :( - migration enhancements from Peter Xu - migration error-path bugfixes from Huang Ying - Aneesh Kumar added ability for a device driver to alter the memory tiering promotion paths. For optimizations by PMEM drivers, DRM drivers, etc. - vma merging improvements from Jakub Matěn. - NUMA hinting cleanups from David Hildenbrand. - xu xin added aditional userspace visibility into KSM merging activity. - THP & KSM code consolidation from Qi Zheng. - more folio work from Matthew Wilcox. - KASAN updates from Andrey Konovalov. - DAMON cleanups from Kaixu Xia. - DAMON work from SeongJae Park: fixes, cleanups. - hugetlb sysfs cleanups from Muchun Song. - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core. Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1] * tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits) hugetlb: allocate vma lock for all sharable vmas hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer hugetlb: fix vma lock handling during split vma and range unmapping mglru: mm/vmscan.c: fix imprecise comments mm/mglru: don't sync disk for each aging cycle mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol mm: memcontrol: use do_memsw_account() in a few more places mm: memcontrol: deprecate swapaccounting=0 mode mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled mm/secretmem: remove reduntant return value mm/hugetlb: add available_huge_pages() func mm: remove unused inline functions from include/linux/mm_inline.h selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd selftests/vm: add thp collapse shmem testing selftests/vm: add thp collapse file and tmpfs testing selftests/vm: modularize thp collapse memory operations selftests/vm: dedup THP helpers mm/khugepaged: add tracepoint to hpage_collapse_scan_file() mm/madvise: add file and shmem support to MADV_COLLAPSE ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/btrfs/compression.c31
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/subpage.c2
-rw-r--r--fs/btrfs/tests/extent-io-tests.c32
-rw-r--r--fs/buffer.c158
-rw-r--r--fs/coredump.c34
-rw-r--r--fs/exec.c14
-rw-r--r--fs/ext2/balloc.c7
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/gfs2/meta_io.c7
-rw-r--r--fs/gfs2/quota.c8
-rw-r--r--fs/hugetlbfs/inode.c300
-rw-r--r--fs/isofs/compress.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/recovery.c16
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nilfs2/page.c45
-rw-r--r--fs/ntfs3/inode.c7
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/posix_acl.c3
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/task_mmu.c96
-rw-r--r--fs/proc/task_nommu.c45
-rw-r--r--fs/ramfs/file-nommu.c50
-rw-r--r--fs/reiserfs/journal.c11
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/directory.c2
-rw-r--r--fs/udf/inode.c8
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/userfaultfd.c133
35 files changed, 643 insertions, 472 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a547307c1ae8..2685a4d0d353 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -235,6 +235,7 @@ config ARCH_SUPPORTS_HUGETLBFS
config HUGETLBFS
bool "HugeTLB file system support"
depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
+ depends on (SYSFS || SYSCTL)
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c0a08064b0a7..f1f051ad3147 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -8,6 +8,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/pagevec.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/time.h>
@@ -219,8 +220,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
- struct page *pages[16];
- unsigned long nr_pages = end_index - index + 1;
+ struct folio_batch fbatch;
const int errno = blk_status_to_errno(cb->status);
int i;
int ret;
@@ -228,24 +228,23 @@ static noinline void end_compressed_writeback(struct inode *inode,
if (errno)
mapping_set_error(inode->i_mapping, errno);
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- nr_pages -= 1;
- index += 1;
- continue;
- }
+ folio_batch_init(&fbatch);
+ while (index <= end_index) {
+ ret = filemap_get_folios(inode->i_mapping, &index, end_index,
+ &fbatch);
+
+ if (ret == 0)
+ return;
+
for (i = 0; i < ret; i++) {
+ struct folio *folio = fbatch.folios[i];
+
if (errno)
- SetPageError(pages[i]);
- btrfs_page_clamp_clear_writeback(fs_info, pages[i],
+ folio_set_error(folio);
+ btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
- put_page(pages[i]);
}
- nr_pages -= ret;
- index += ret;
+ folio_batch_release(&fbatch);
}
/* the inode may be gone now */
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1eae68fbae21..4dcf22e051ff 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -270,9 +270,8 @@ static int __process_pages_contig(struct address_space *mapping,
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
- unsigned long nr_pages = end_index - start_index + 1;
unsigned long pages_processed = 0;
- struct page *pages[16];
+ struct folio_batch fbatch;
int err = 0;
int i;
@@ -281,16 +280,17 @@ static int __process_pages_contig(struct address_space *mapping,
ASSERT(processed_end && *processed_end == start);
}
- if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
+ if ((page_ops & PAGE_SET_ERROR) && start_index <= end_index)
mapping_set_error(mapping, -EIO);
- while (nr_pages > 0) {
- int found_pages;
+ folio_batch_init(&fbatch);
+ while (index <= end_index) {
+ int found_folios;
+
+ found_folios = filemap_get_folios_contig(mapping, &index,
+ end_index, &fbatch);
- found_pages = find_get_pages_contig(mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- if (found_pages == 0) {
+ if (found_folios == 0) {
/*
* Only if we're going to lock these pages, we can find
* nothing at @index.
@@ -300,23 +300,20 @@ static int __process_pages_contig(struct address_space *mapping,
goto out;
}
- for (i = 0; i < found_pages; i++) {
+ for (i = 0; i < found_folios; i++) {
int process_ret;
-
+ struct folio *folio = fbatch.folios[i];
process_ret = process_one_page(fs_info, mapping,
- pages[i], locked_page, page_ops,
+ &folio->page, locked_page, page_ops,
start, end);
if (process_ret < 0) {
- for (; i < found_pages; i++)
- put_page(pages[i]);
err = -EAGAIN;
+ folio_batch_release(&fbatch);
goto out;
}
- put_page(pages[i]);
- pages_processed++;
+ pages_processed += folio_nr_pages(folio);
}
- nr_pages -= found_pages;
- index += found_pages;
+ folio_batch_release(&fbatch);
cond_resched();
}
out:
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 6fc2b77ae5c3..9a176af847d7 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -337,7 +337,7 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
*
* Even with 0 returned, the page still need extra check to make sure
* it's really the correct page, as the caller is using
- * find_get_pages_contig(), which can race with page invalidating.
+ * filemap_get_folios_contig(), which can race with page invalidating.
*/
int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index f69ec4d2d6eb..350da449db08 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -4,6 +4,7 @@
*/
#include <linux/pagemap.h>
+#include <linux/pagevec.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sizes.h>
@@ -20,39 +21,40 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
{
int ret;
- struct page *pages[16];
+ struct folio_batch fbatch;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
int i;
int count = 0;
int loops = 0;
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
+ folio_batch_init(&fbatch);
+
+ while (index <= end_index) {
+ ret = filemap_get_folios_contig(inode->i_mapping, &index,
+ end_index, &fbatch);
for (i = 0; i < ret; i++) {
+ struct folio *folio = fbatch.folios[i];
+
if (flags & PROCESS_TEST_LOCKED &&
- !PageLocked(pages[i]))
+ !folio_test_locked(folio))
count++;
- if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
- unlock_page(pages[i]);
- put_page(pages[i]);
+ if (flags & PROCESS_UNLOCK && folio_test_locked(folio))
+ folio_unlock(folio);
if (flags & PROCESS_RELEASE)
- put_page(pages[i]);
+ folio_put(folio);
}
- nr_pages -= ret;
- index += ret;
+ folio_batch_release(&fbatch);
cond_resched();
loops++;
if (loops > 100000) {
printk(KERN_ERR
- "stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
- start, end, nr_pages, ret);
+ "stuck in a loop, start %llu, end %llu, ret %d\n",
+ start, end, ret);
break;
}
}
+
return count;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 0a7ba84c1905..b927f6981ad1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -152,7 +152,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
/*
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
- * unlock the buffer. This is what ll_rw_block uses too.
+ * unlock the buffer.
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
@@ -491,8 +491,8 @@ int inode_has_buffers(struct inode *inode)
* all already-submitted IO to complete, but does not queue any new
* writes to the disk.
*
- * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
- * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
+ * as you dirty the buffers, and then use osync_inode_buffers to wait for
* completion. Any other dirty buffers which are not yet queued for
* write will not be flushed to disk by the osync.
*/
@@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev,
struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
- ll_rw_block(REQ_OP_WRITE, 1, &bh);
+ write_dirty_buffer(bh, 0);
put_bh(bh);
}
}
@@ -1342,23 +1342,12 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
+ bh_readahead(bh, REQ_RAHEAD);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead);
-void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
- gfp_t gfp)
-{
- struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
- if (likely(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh);
- brelse(bh);
- }
-}
-EXPORT_SYMBOL(__breadahead_gfp);
-
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
@@ -1817,7 +1806,7 @@ done:
/*
* The page was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
- * ll_rw_block/submit_bh. A rare case.
+ * write_dirty_buffer/submit_bh. A rare case.
*/
end_page_writeback(page);
@@ -2033,7 +2022,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(REQ_OP_READ, 1, &bh);
+ bh_read_nowait(bh, 0);
*wait_bh++=bh;
}
}
@@ -2352,7 +2341,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
int err;
err = inode_newsize_ok(inode, size);
@@ -2378,7 +2367,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
const struct address_space_operations *aops = mapping->a_ops;
unsigned int blocksize = i_blocksize(inode);
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
pgoff_t index, curidx;
loff_t curpos;
unsigned zerofrom, offset, len;
@@ -2593,11 +2582,9 @@ int block_truncate_page(struct address_space *mapping,
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
- err = -EIO;
- ll_rw_block(REQ_OP_READ, 1, &bh);
- wait_on_buffer(bh);
+ err = bh_read(bh, 0);
/* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
+ if (err < 0)
goto unlock;
}
@@ -2725,61 +2712,6 @@ void submit_bh(blk_opf_t opf, struct buffer_head *bh)
}
EXPORT_SYMBOL(submit_bh);
-/**
- * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @opf: block layer request operation and flags.
- * @nr: number of &struct buffer_heads in the array
- * @bhs: array of pointers to &struct buffer_head
- *
- * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
- * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
- * @opf contains flags modifying the detailed I/O behavior, most notably
- * %REQ_RAHEAD.
- *
- * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit), any buffer that appears to be clean when doing a write
- * request, and any buffer that appears to be up-to-date when doing read
- * request. Further it marks as clean buffers that are processed for
- * writing (the buffer cache won't assume that they are actually clean
- * until the buffer gets unlocked).
- *
- * ll_rw_block sets b_end_io to simple completion handler that marks
- * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
- * any waiters.
- *
- * All of the buffers must be for the same device, and must also be a
- * multiple of the current approved size for the device.
- */
-void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[])
-{
- const enum req_op op = opf & REQ_OP_MASK;
- int i;
-
- for (i = 0; i < nr; i++) {
- struct buffer_head *bh = bhs[i];
-
- if (!trylock_buffer(bh))
- continue;
- if (op == REQ_OP_WRITE) {
- if (test_clear_buffer_dirty(bh)) {
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
- submit_bh(opf, bh);
- continue;
- }
- } else {
- if (!buffer_uptodate(bh)) {
- bh->b_end_io = end_buffer_read_sync;
- get_bh(bh);
- submit_bh(opf, bh);
- continue;
- }
- }
- unlock_buffer(bh);
- }
-}
-EXPORT_SYMBOL(ll_rw_block);
-
void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
lock_buffer(bh);
@@ -3026,29 +2958,69 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
- * bh_submit_read - Submit a locked buffer for reading
+ * __bh_read - Submit read for a locked buffer
* @bh: struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @wait: wait until reading finish
*
- * Returns zero on success and -EIO on error.
+ * Returns zero on success or don't wait, and -EIO on error.
*/
-int bh_submit_read(struct buffer_head *bh)
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
- BUG_ON(!buffer_locked(bh));
+ int ret = 0;
- if (buffer_uptodate(bh)) {
- unlock_buffer(bh);
- return 0;
- }
+ BUG_ON(!buffer_locked(bh));
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, bh);
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return 0;
- return -EIO;
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ if (wait) {
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ ret = -EIO;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(__bh_read);
+
+/**
+ * __bh_read_batch - Submit read for a batch of unlocked buffers
+ * @nr: entry number of the buffer batch
+ * @bhs: a batch of struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @force_lock: force to get a lock on the buffer if set, otherwise drops any
+ * buffer that cannot lock.
+ *
+ * Returns zero on success or don't wait, and -EIO on error.
+ */
+void __bh_read_batch(int nr, struct buffer_head *bhs[],
+ blk_opf_t op_flags, bool force_lock)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (force_lock)
+ lock_buffer(bh);
+ else
+ if (!trylock_buffer(bh))
+ continue;
+
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+
+ bh->b_end_io = end_buffer_read_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ }
}
-EXPORT_SYMBOL(bh_submit_read);
+EXPORT_SYMBOL(__bh_read_batch);
void __init buffer_init(void)
{
diff --git a/fs/coredump.c b/fs/coredump.c
index 1897bc445062..7bad7785e8e6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1100,30 +1100,20 @@ whole:
return vma->vm_end - vma->vm_start;
}
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
- struct vm_area_struct *gate_vma)
-{
- struct vm_area_struct *ret = tsk->mm->mmap;
-
- if (ret)
- return ret;
- return gate_vma;
-}
-
/*
* Helper function for iterating across a vma list. It ensures that the caller
* will visit `gate_vma' prior to terminating the search.
*/
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
+ struct vm_area_struct *vma,
struct vm_area_struct *gate_vma)
{
- struct vm_area_struct *ret;
-
- ret = this_vma->vm_next;
- if (ret)
- return ret;
- if (this_vma == gate_vma)
+ if (gate_vma && (vma == gate_vma))
return NULL;
+
+ vma = mas_next(mas, ULONG_MAX);
+ if (vma)
+ return vma;
return gate_vma;
}
@@ -1147,9 +1137,10 @@ static void free_vma_snapshot(struct coredump_params *cprm)
*/
static bool dump_vma_snapshot(struct coredump_params *cprm)
{
- struct vm_area_struct *vma, *gate_vma;
+ struct vm_area_struct *gate_vma, *vma = NULL;
struct mm_struct *mm = current->mm;
- int i;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
+ int i = 0;
/*
* Once the stack expansion code is fixed to not change VMA bounds
@@ -1169,8 +1160,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
return false;
}
- for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
- vma = next_vma(vma, gate_vma), i++) {
+ while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
struct core_vma_metadata *m = cprm->vma_meta + i;
m->start = vma->vm_start;
@@ -1178,10 +1168,10 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
m->flags = vma->vm_flags;
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
m->pgoff = vma->vm_pgoff;
-
m->file = vma->vm_file;
if (m->file)
get_file(m->file);
+ i++;
}
mmap_write_unlock(mm);
diff --git a/fs/exec.c b/fs/exec.c
index de084e485462..349a5da91efe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -28,7 +28,6 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/mm.h>
-#include <linux/vmacache.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
@@ -683,6 +682,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
+ VMA_ITERATOR(vmi, mm, new_start);
+ struct vm_area_struct *next;
struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -691,7 +692,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* ensure there are no vmas between where we want to go
* and where we are
*/
- if (vma != find_vma(mm, new_start))
+ if (vma != vma_next(&vmi))
return -EFAULT;
/*
@@ -710,12 +711,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
+ next = vma_next(&vmi);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -724,7 +726,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb);
@@ -1010,6 +1012,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
+ lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1022,9 +1025,8 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
- tsk->mm->vmacache_seqnum = 0;
- vmacache_flush(tsk);
task_unlock(tsk);
+ lru_gen_use_mm(mm);
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c17ccc19b938..5dc0a31f4a08 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -126,6 +126,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
struct ext2_group_desc * desc;
struct buffer_head * bh = NULL;
ext2_fsblk_t bitmap_blk;
+ int ret;
desc = ext2_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -139,10 +140,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
block_group, le32_to_cpu(desc->bg_block_bitmap));
return NULL;
}
- if (likely(bh_uptodate_or_lock(bh)))
+ ret = bh_read(bh, 0);
+ if (ret > 0)
return bh;
-
- if (bh_submit_read(bh) < 0) {
+ if (ret < 0) {
brelse(bh);
ext2_error(sb, __func__,
"Cannot read block bitmap - "
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51897427a534..b4a6e0a1b945 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *page)
1 << PG_active |
1 << PG_workingset |
1 << PG_reclaim |
- 1 << PG_waiters))) {
+ 1 << PG_waiters |
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
dump_page(page, "fuse: trying to steal weird page");
return 1;
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 7e70e0ba5a6c..6ed728aae9a5 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -525,8 +525,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
- if (!buffer_locked(first_bh))
- ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &first_bh);
+ bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
dblock++;
extlen--;
@@ -534,9 +533,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
while (extlen) {
bh = gfs2_getbuf(gl, dblock, CREATE);
- if (!buffer_uptodate(bh) && !buffer_locked(bh))
- ll_rw_block(REQ_OP_READ | REQ_RAHEAD | REQ_META |
- REQ_PRIO, 1, &bh);
+ bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
brelse(bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index f201eaf59d0d..1ed17226d9ed 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -745,12 +745,8 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
}
if (PageUptodate(page))
set_buffer_uptodate(bh);
- if (!buffer_uptodate(bh)) {
- ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- goto unlock_out;
- }
+ if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
+ goto unlock_out;
if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);