summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-18 14:11:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-18 14:11:54 -0700
commit7991c92f4cc50b971fcb4d05087e490dc47a6857 (patch)
tree4f996bb9ce2aaa32784899f43bef25b4e95139e4 /fs
parent61ea647ed190af8ed5c5adece2fb5ee33eb3cd22 (diff)
parentc6a6c9694aadc4c3ab8d89bdd44aed3eab1e43c6 (diff)
downloadlinux-7991c92f4cc50b971fcb4d05087e490dc47a6857.tar.gz
linux-7991c92f4cc50b971fcb4d05087e490dc47a6857.tar.bz2
linux-7991c92f4cc50b971fcb4d05087e490dc47a6857.zip
Merge tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: - more folio conversion patches - add support for FS_IOC_GETFSSYSFSPATH - mballoc cleaups and add more kunit tests - sysfs cleanups and bug fixes - miscellaneous bug fixes and cleanups * tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits) ext4: fix error pointer dereference in ext4_mb_load_buddy_gfp() jbd2: add prefix 'jbd2' for 'shrink_type' jbd2: use shrink_type type instead of bool type for __jbd2_journal_clean_checkpoint_list() ext4: fix uninitialized ratelimit_state->lock access in __ext4_fill_super() ext4: remove calls to to set/clear the folio error flag ext4: propagate errors from ext4_sb_bread() in ext4_xattr_block_cache_find() ext4: fix mb_cache_entry's e_refcnt leak in ext4_xattr_block_cache_find() jbd2: remove redundant assignement to variable err ext4: remove the redundant folio_wait_stable() ext4: fix potential unnitialized variable ext4: convert ac_buddy_page to ac_buddy_folio ext4: convert ac_bitmap_page to ac_bitmap_folio ext4: convert ext4_mb_init_cache() to take a folio ext4: convert bd_buddy_page to bd_buddy_folio ext4: convert bd_bitmap_page to bd_bitmap_folio ext4: open coding repeated check in next_linear_group ext4: use correct criteria name instead stale integer number in comment ext4: call ext4_mb_mark_free_simple to free continuous bits in found chunk ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used ext4: keep "prefetch_grp" and "nr" consistent ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/acl.h5
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c5
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/ioctl.c3
-rw-r--r--fs/ext4/mballoc-test.c76
-rw-r--r--fs/ext4/mballoc.c322
-rw-r--r--fs/ext4/mballoc.h14
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/readpage.c1
-rw-r--r--fs/ext4/super.c36
-rw-r--r--fs/ext4/sysfs.c174
-rw-r--r--fs/ext4/xattr.c145
-rw-r--r--fs/jbd2/checkpoint.c24
-rw-r--r--fs/jbd2/commit.c3
18 files changed, 478 insertions, 362 deletions
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index ef4c19e5f570..0c5a79c3b5d4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -68,11 +68,6 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
- /* usually, the umask is applied by posix_acl_create(), but if
- ext4 ACL support is disabled at compile time, we need to do
- it here, because posix_acl_create() will never be called */
- inode->i_mode &= ~current_umask();
-
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8d126654019e..983dad8c07ec 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -213,11 +213,14 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
-/* Large fragment size list lookup succeeded at least once for cr = 0 */
+/* Large fragment size list lookup succeeded at least once for
+ * CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
+/* Avg fragment size rb tree lookup succeeded at least once for
+ * CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
+/* Avg fragment size rb tree lookup succeeded at least once for
+ * CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e57054bdc5fd..e067f2dd0335 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3402,9 +3402,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
- int allocated = 0, max_zeroout = 0;
int err = 0;
int split_flag = EXT4_EXT_DATA_VALID2;
+ int allocated = 0;
+ unsigned int max_zeroout = 0;
ext_debug(inode, "logical block %llu, max_blocks %u\n",
(unsigned long long)map->m_lblk, map_len);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 28c51b0cc4db..c89e434db6b7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -844,8 +844,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (err)
goto out_journal;
lock_buffer(sbi->s_sbh);
- strncpy(sbi->s_es->s_last_mounted, cp,
- sizeof(sbi->s_es->s_last_mounted));
+ strtomem_pad(sbi->s_es->s_last_mounted, cp, 0);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
@@ -885,7 +884,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 537803250ca9..4bae9ccf5fe0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1865,7 +1865,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
len = folio_size(folio);
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(mpd->inode))
- len = size & ~PAGE_MASK;
+ len = size & (len - 1);
err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
if (!err)
mpd->wbc->nr_to_write--;
@@ -2334,7 +2334,7 @@ static int mpage_journal_page_buffers(handle_t *handle,
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(inode))
- len = size - folio_pos(folio);
+ len = size & (len - 1);
return ext4_journal_folio_buffers(handle, folio, len);
}
@@ -2887,9 +2887,6 @@ retry:
if (IS_ERR(folio))
return PTR_ERR(folio);
- /* In case writeback began while the folio was unlocked */
- folio_wait_stable(folio);
-
#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#else
@@ -3530,7 +3527,6 @@ static const struct address_space_operations ext4_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3547,7 +3543,6 @@ static const struct address_space_operations ext4_journalled_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3564,7 +3559,6 @@ static const struct address_space_operations ext4_da_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3573,7 +3567,6 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
- .direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
.bmap = ext4_bmap,
.swap_activate = ext4_iomap_swap_activate,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7160a71044c8..dab7acd49709 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1150,9 +1150,8 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
*/
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
- memset(label, 0, sizeof(label));
lock_buffer(sbi->s_sbh);
- strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ strscpy_pad(label, sbi->s_es->s_volume_name);
unlock_buffer(sbi->s_sbh);
if (copy_to_user(user_label, label, sizeof(label)))
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 044ca5238f41..bb2a223b207c 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -30,7 +30,31 @@ struct mbt_ext4_super_block {
#define MBT_CTX(_sb) (&MBT_SB(_sb)->mbt_ctx)
#define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group])
+static struct inode *mbt_alloc_inode(struct super_block *sb)
+{
+ struct ext4_inode_info *ei;
+
+ ei = kmalloc(sizeof(struct ext4_inode_info), GFP_KERNEL);
+ if (!ei)
+ return NULL;
+
+ INIT_LIST_HEAD(&ei->i_orphan);
+ init_rwsem(&ei->xattr_sem);
+ init_rwsem(&ei->i_data_sem);
+ inode_init_once(&ei->vfs_inode);
+ ext4_fc_init_inode(&ei->vfs_inode);
+
+ return &ei->vfs_inode;
+}
+
+static void mbt_free_inode(struct inode *inode)
+{
+ kfree(EXT4_I(inode));
+}
+
static const struct super_operations mbt_sops = {
+ .alloc_inode = mbt_alloc_inode,
+ .free_inode = mbt_free_inode,
};
static void mbt_kill_sb(struct super_block *sb)
@@ -859,6 +883,56 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_mb_unload_buddy(&e4b);
}
+#define COUNT_FOR_ESTIMATE 100000
+static void test_mb_mark_used_cost(struct kunit *test)
+{
+ struct ext4_buddy e4b;
+ struct super_block *sb = (struct super_block *)test->priv;
+ struct ext4_free_extent ex;
+ int ret;
+ struct test_range ranges[TEST_RANGE_COUNT];
+ int i, j;
+ unsigned long start, end, all = 0;
+
+ /* buddy cache assumes that each page contains at least one block */
+ if (sb->s_blocksize > PAGE_SIZE)
+ kunit_skip(test, "blocksize exceeds pagesize");
+
+ ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
+ ex.fe_group = TEST_GOAL_GROUP;
+ for (j = 0; j < COUNT_FOR_ESTIMATE; j++) {
+ mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+ start = jiffies;
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ex.fe_start = ranges[i].start;
+ ex.fe_len = ranges[i].len;
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_mark_used(&e4b, &ex);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ end = jiffies;
+ all += (end - start);
+
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_free_blocks(NULL, &e4b, ranges[i].start,
+ ranges[i].len);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ }
+
+ kunit_info(test, "costed jiffies %lu\n", all);
+ ext4_mb_unload_buddy(&e4b);
+}
+
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
{
.blocksize_bits = 10,
@@ -901,6 +975,8 @@ static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
+ KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
+ { .speed = KUNIT_SPEED_SLOW }),
{}
};
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3f196010b..9dda9cd68ab2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
return 0;
if (order == MB_NUM_ORDERS(sb))
order--;
+ if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
+ order = MB_NUM_ORDERS(sb) - 1;
return order;
}
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
* goal length.
*/
order = fls(ac->ac_g_ex.fe_len) - 1;
+ if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
+ order = MB_NUM_ORDERS(ac->ac_sb);
min_order = order - sbi->s_mb_best_avail_max_trim_order;
if (min_order < 0)
min_order = 0;
@@ -1076,23 +1080,11 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
}
/*
- * Return next linear group for allocation. If linear traversal should not be
- * performed, this function just returns the same group
+ * Return next linear group for allocation.
*/
static ext4_group_t
-next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
- ext4_group_t ngroups)
+next_linear_group(ext4_group_t group, ext4_group_t ngroups)
{
- if (!should_optimize_scan(ac))
- goto inc_and_return;
-
- if (ac->ac_groups_linear_remaining) {
- ac->ac_groups_linear_remaining--;
- goto inc_and_return;
- }
-
- return group;
-inc_and_return:
/*
* Artificially restricted ngroups for non-extent
* files makes group > ngroups possible on first loop.
@@ -1118,8 +1110,19 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;
- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
- *group = next_linear_group(ac, *group, ngroups);
+ if (!should_optimize_scan(ac)) {
+ *group = next_linear_group(*group, ngroups);
+ return;
+ }
+
+ /*
+ * Optimized scanning can return non adjacent groups which can cause
+ * seek overhead for rotational disks. So try few linear groups before
+ * trying optimized scan.
+ */
+ if (ac->ac_groups_linear_remaining) {
+ *group = next_linear_group(*group, ngroups);
+ ac->ac_groups_linear_remaining--;
return;
}
@@ -1131,8 +1134,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
- * TODO: For CR=2, we can arrange groups in an rb tree sorted by
- * bb_free. But until that happens, we should never come here.
+ * TODO: For CR_GOAL_LEN_SLOW, we can arrange groups in an
+ * rb tree sorted by bb_free. But until that happens, we should
+ * never come here.
*/
WARN_ON(1);
}
@@ -1270,7 +1274,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
-static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
unsigned int blocksize;
@@ -1288,13 +1292,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
char *bitmap;
struct ext4_group_info *grinfo;
- inode = page->mapping->host;
+ inode = folio->mapping->host;
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
blocksize = i_blocksize(inode);
blocks_per_page = PAGE_SIZE / blocksize;
- mb_debug(sb, "init page %lu\n", page->index);
+ mb_debug(sb, "init folio %lu\n", folio->index);
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
@@ -1309,9 +1313,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else
bh = &bhs;
- first_group = page->index * blocks_per_page / 2;
+ first_group = folio->index * blocks_per_page / 2;
- /* read all groups the page covers into the cache */
+ /* read all groups the folio covers into the cache */
for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
if (group >= ngroups)
break;
@@ -1322,10 +1326,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
- * we must skip all initialized uptodate buddies on the page,
+ * we must skip all initialized uptodate buddies on the folio,
* which may be currently in use by an allocating task.
*/
- if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
+ if (folio_test_uptodate(folio) &&
+ !EXT4_MB_GRP_NEED_INIT(grinfo)) {
bh[i] = NULL;
continue;
}
@@ -1349,7 +1354,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
err = err2;
}
- first_block = page->index * blocks_per_page;
+ first_block = folio->index * blocks_per_page;
for (i = 0; i < blocks_per_page; i++) {
group = (first_block + i) >> 1;
if (group >= ngroups)
@@ -1370,7 +1375,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* above
*
*/
- data = page_address(page) + (i * blocksize);
+ data = folio_address(folio) + (i * blocksize);
bitmap = bh[group - first_group]->b_data;
/*
@@ -1385,8 +1390,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
- mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
+ mb_debug(sb, "put buddy for group %u in folio %lu/%x\n",
+ group, folio->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
@@ -1404,8 +1409,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else {
/* this is block of bitmap */
BUG_ON(incore != NULL);
- mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
+ mb_debug(sb, "put bitmap for group %u in folio %lu/%x\n",
+ group, folio->index, i * blocksize);
trace_ext4_mb_bitmap_load(sb, group);
/* see comments in ext4_mb_put_pa() */
@@ -1423,7 +1428,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
incore = data;
}
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
out:
if (bh) {
@@ -1439,7 +1444,7 @@ out:
* Lock the buddy and bitmap pages. This make sure other parallel init_group
* on the same buddy page doesn't happen whild holding the buddy page lock.
* Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
- * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
+ * are on the same page e4b->bd_buddy_folio is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
@@ -1447,10 +1452,10 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
int blocks_per_page;
- struct page *page;
+ struct folio *folio;
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
+ e4b->bd_buddy_folio = NULL;
+ e4b->bd_bitmap_folio = NULL;
blocks_per_page = PAGE_SIZE / sb->s_blocksize;
/*
@@ -1461,12 +1466,13 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (!page)
- return -ENOMEM;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ BUG_ON(folio->mapping != inode->i_mapping);
+ e4b->bd_bitmap_folio = folio;
+ e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
if (blocks_per_page >= 2) {
/* buddy and bitmap are on the same page */
@@ -1474,23 +1480,24 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
}
/* blocks_per_page == 1, hence we need another page for the buddy */
- page = find_or_create_page(inode->i_mapping, block + 1, gfp);
- if (!page)
- return -ENOMEM;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_buddy_page = page;
+ folio = __filemap_get_folio(inode->i_mapping, block + 1,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ BUG_ON(folio->mapping != inode->i_mapping);
+ e4b->bd_buddy_folio = folio;
return 0;
}
static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
{
- if (e4b->bd_bitmap_page) {
- unlock_page(e4b->bd_bitmap_page);
- put_page(e4b->bd_bitmap_page);
+ if (e4b->bd_bitmap_folio) {
+ folio_unlock(e4b->bd_bitmap_folio);
+ folio_put(e4b->bd_bitmap_folio);
}
- if (e4b->bd_buddy_page) {
- unlock_page(e4b->bd_buddy_page);
- put_page(e4b->bd_buddy_page);
+ if (e4b->bd_buddy_folio) {
+ folio_unlock(e4b->bd_buddy_folio);
+ folio_put(e4b->bd_buddy_folio);
}
}
@@ -1505,7 +1512,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
struct ext4_group_info *this_grp;
struct ext4_buddy e4b;
- struct page *page;
+ struct folio *folio;
int ret = 0;
might_sleep();
@@ -1532,16 +1539,16 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
- page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL, gfp);
+ folio = e4b.bd_bitmap_folio;
+ ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret)
goto err;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- if (e4b.bd_buddy_page == NULL) {
+ if (e4b.bd_buddy_folio == NULL) {
/*
* If both the bitmap and buddy are in
* the same page we don't need to force
@@ -1551,11 +1558,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
/* init buddy cache */
- page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
+ folio = e4b.bd_buddy_folio;
+ ret = ext4_mb_init_cache(folio, e4b.bd_bitmap, gfp);
if (ret)
goto err;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
@@ -1577,7 +1584,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
int block;
int pnum;
int poff;
- struct page *page;
+ struct folio *folio;
int ret;
struct ext4_group_info *grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1595,8 +1602,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
e4b->bd_info = grp;
e4b->bd_sb = sb;
e4b->bd_group = group;
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
+ e4b->bd_buddy_folio = NULL;
+ e4b->bd_bitmap_folio = NULL;
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
/*
@@ -1617,102 +1624,103 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- /* we could use find_or_create_page(), but it locks page
- * what we'd like to avoid in fast path ... */
- page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
+ /* Avoid locking the folio in the fast path ... */
+ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (!IS_ERR(folio))
/*
- * drop the page reference and try
- * to get the page with lock. If we
+ * drop the folio reference and try
+ * to get the folio with lock. If we
* are not uptodate that implies
- * somebody just created the page but
- * is yet to initialize the same. So
+ * somebody just created the folio but
+ * is yet to initialize it. So
* wait for it to initialize.
*/
- put_page(page);
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (page) {
- if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
- "ext4: bitmap's paging->mapping != inode->i_mapping\n")) {
+ folio_put(folio);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (!IS_ERR(folio)) {
+ if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
+ "ext4: bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
- unlock_page(page);
+ folio_unlock(folio);
ret = -EINVAL;
goto err;
}
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL, gfp);
+ if (!folio_test_uptodate(folio)) {
+ ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret) {
- unlock_page(page);
+ folio_unlock(folio);
goto err;
}
- mb_cmp_bitmaps(e4b, page_address(page) +
+ mb_cmp_bitmaps(e4b, folio_address(folio) +
(poff * sb->s_blocksize));
}
- unlock_page(page);
+ folio_unlock(folio);
}
}
- if (page == NULL) {
- ret = -ENOMEM;
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
goto err;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- /* Pages marked accessed already */
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
+ /* Folios marked accessed already */
+ e4b->bd_bitmap_folio = folio;
+ e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
- put_page(page);
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (page) {
- if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
- "ext4: buddy bitmap's page->mapping != inode->i_mapping\n")) {
+ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (!IS_ERR(folio))
+ folio_put(folio);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (!IS_ERR(folio)) {
+ if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
+ "ext4: buddy bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
- unlock_page(page);
+ folio_unlock(folio);
ret = -EINVAL;
goto err;
}
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+ if (!folio_test_uptodate(folio)) {
+ ret = ext4_mb_init_cache(folio, e4b->bd_bitmap,
gfp);
if (ret) {
- unlock_page(page);
+ folio_unlock(folio);
goto err;
}
}
- unlock_page(page);
+ folio_unlock(folio);
}
}
- if (page == NULL) {
- ret = -ENOMEM;
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
goto err;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- /* Pages marked accessed already */
- e4b->bd_buddy_page = page;
- e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
+ /* Folios marked accessed already */
+ e4b->bd_buddy_folio = folio;
+ e4b->bd_buddy = folio_address(folio) + (poff * sb->s_blocksize);
return 0;
err:
- if (page)
- put_page(page);
- if (e4b->bd_bitmap_page)
- put_page(e4b->bd_bitmap_page);
+ if (!IS_ERR_OR_NULL(folio))
+ folio_put(folio);
+ if (e4b->bd_bitmap_folio)
+ folio_put(e4b->bd_bitmap_folio);
e4b->bd_buddy = NULL;
e4b->bd_bitmap = NULL;
@@ -1727,10 +1735,10 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
- if (e4b->bd_bitmap_page)
- put_page(e4b->bd_bitmap_page);
- if (e4b->bd_buddy_page)
- put_page(e4b->bd_buddy_page);
+ if (e4b->bd_bitmap_folio)
+ folio_put(e4b->bd_bitmap_folio);
+ if (e4b->bd_buddy_folio)
+ folio_put(e4b->bd_buddy_folio);
}
@@ -2040,13 +2048,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
int ord;
int mlen = 0;
int max = 0;
- int cur;
int start = ex->fe_start;
int len = ex->fe_len;
unsigned ret = 0;
int len0 = len;
void *buddy;
- bool split = false;
+ int ord_start, ord_end;
BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
BUG_ON(e4b->bd_group != ex->fe_group);
@@ -2071,16 +2078,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
/* let's maintain buddy itself */
while (len) {
- if (!split)
- ord = mb_find_order_for_block(e4b, start);
+ ord = mb_find_order_for_block(e4b, start);
if (((start >> ord) << ord) == start && len >= (1 << ord)) {
/* the whole chunk may be allocated at once! */
mlen = 1 << ord;
- if (!split)
- buddy = mb_find_buddy(e4b, ord, &max);
- else
- split = false;
+ buddy = mb_find_buddy(e4b, ord, &max);
BUG_ON((start >> ord) >= max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
@@ -2094,20 +2097,29 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
if (ret == 0)
ret = len | (ord << 16);
- /* we have to split large buddy */
BUG_ON(ord <= 0);
buddy = mb_find_buddy(e4b, ord, &max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
- ord--;
- cur = (start >> ord) & ~1U;
- buddy = mb_find_buddy(e4b, ord, &max);
- mb_clear_bit(cur, buddy);
- mb_clear_bit(cur + 1, buddy);
- e4b->bd_info->bb_counters[ord]++;
- e4b->bd_info->bb_counters[ord]++;
- split = true;
+ ord_start = (start >> ord) << ord;
+ ord_end = ord_start + (1 << ord);
+ /* first chunk */
+ if (start > ord_start)
+ ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
+ ord_start, start - ord_start,
+ e4b->bd_info);
+
+ /* last chunk */
+ if (start + len < ord_end) {
+ ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
+ start + len,
+ ord_end - (start + len),
+ e4b->bd_info);
+ break;
+ }
+ len = start + len - ord_end;
+ start = ord_end;
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
@@ -2149,10 +2161,10 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
* double allocate blocks. The reference is dropped
* in ext4_mb_release_context
*/
- ac->ac_bitmap_page = e4b->bd_bitmap_page;
- get_page(ac->ac_bitmap_page);
- ac->ac_buddy_page = e4b->bd_buddy_page;
- get_page(ac->ac_buddy_page);
+ ac->ac_bitmap_folio = e4b->bd_bitmap_folio;
+ folio_get(ac->ac_bitmap_folio);
+ ac->ac_buddy_folio = e4b->bd_buddy_folio;
+ folio_get(ac->ac_buddy_folio);
/* store last allocated for subsequent stream allocation */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
spin_lock(&sbi->s_md_lock);
@@ -2675,7 +2687,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
int ret;
/*
- * cr=CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
+ * CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
* search to find large good chunks almost for free. If buddy
* data is not ready, then this optimization makes no sense. But
* we never skip the first block group in a flex_bg, since this
@@ -2856,6 +2868,7 @@ repeat:
group = ac->ac_g_ex.fe_group;
ac->ac_groups_linear_remaining =