diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-26 13:48:02 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-26 13:48:02 -0700 |
| commit | 55ba0fe059a577fa08f23223991b24564962620f (patch) | |
| tree | f3b4ccfd5105c44c4b398be496c8219a97365e35 | |
| parent | 2a19866b6e4cf554b57660549d12496ea84aa7d7 (diff) | |
| parent | 18bb8bbf13c1839b43c9e09e76d397b753989af2 (diff) | |
| download | linux-55ba0fe059a577fa08f23223991b24564962620f.tar.gz linux-55ba0fe059a577fa08f23223991b24564962620f.tar.bz2 linux-55ba0fe059a577fa08f23223991b24564962620f.zip | |
Merge tag 'for-5.13-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The updates this time are mostly stabilization, preparation and minor
improvements.
User visible improvements:
- readahead for send, improving run time of full send by 10% and for
incremental by 25%
- make reflinks respect O_SYNC, O_DSYNC and S_SYNC flags
- export supported sectorsize values in sysfs (currently only page
size, more once full subpage support lands)
- more graceful errors and warnings on 32bit systems when logical
addresses for metadata reach the limit posed by unsigned long in
page::index
- error: fail mount if there's a metadata block beyond the limit
- error: new metadata block would be at unreachable address
- warn when 5/8th of the limit is reached, for 4K page systems
it's 10T, for 64K page it's 160T
- zoned mode
- relocated zones get reset at the end instead of discard
- automatic background reclaim of zones that have 75%+ of unusable
space, the threshold is tunable in sysfs
Fixes:
- fsync and tree mod log fixes
- fix inefficient preemptive reclaim calculations
- fix exhaustion of the system chunk array due to concurrent
allocations
- fix fallback to no compression when racing with remount
- preemptive fix for dm-crypt on zoned device that does not properly
advertise zoned support
Core changes:
- add inode lock to synchronize mmap and other block updates (eg.
deduplication, fallocate, fsync)
- kmap conversions to new kmap_local API
- subpage support (continued)
- new helpers for page state/extent buffer tracking
- metadata changes now support read and write
- error handling through out relocation call paths
- many other cleanups and code simplifications"
* tag 'for-5.13-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (112 commits)
btrfs: zoned: automatically reclaim zones
btrfs: rename delete_unused_bgs_mutex to reclaim_bgs_lock
btrfs: zoned: reset zones of relocated block groups
btrfs: more graceful errors/warnings on 32bit systems when reaching limits
btrfs: zoned: fix unpaired block group unfreeze during device replace
btrfs: fix race when picking most recent mod log operation for an old root
btrfs: fix metadata extent leak after failure to create subvolume
btrfs: handle remount to no compress during compression
btrfs: zoned: fail mount if the device does not support zone append
btrfs: fix race between transaction aborts and fsyncs leading to use-after-free
btrfs: introduce submit_eb_subpage() to submit a subpage metadata page
btrfs: make lock_extent_buffer_for_io() to be subpage compatible
btrfs: introduce write_one_subpage_eb() function
btrfs: introduce end_bio_subpage_eb_writepage() function
btrfs: check return value of btrfs_commit_transaction in relocation
btrfs: do proper error handling in merge_reloc_roots
btrfs: handle extent corruption with select_one_root properly
btrfs: cleanup error handling in prepare_to_merge
btrfs: do not panic in __add_reloc_root
btrfs: handle __add_reloc_root failures in btrfs_recover_relocation
...
46 files changed, 2964 insertions, 1582 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b4fb997eda16..cec88a66bd6c 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ - subpage.o + subpage.o tree-mod-log.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f47c1528eb9a..117d423fdb93 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -14,6 +14,7 @@ #include "delayed-ref.h" #include "locking.h" #include "misc.h" +#include "tree-mod-log.h" /* Just an arbitrary number so we can be sure this happened */ #define BACKREF_FOUND_SHARED 6 @@ -452,7 +453,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (path->slots[0] >= btrfs_header_nritems(eb) || is_shared_data_backref(preftrees, eb->start) || ref->root_id != btrfs_header_owner(eb)) { - if (time_seq == SEQ_LAST) + if (time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_leaf(root, path); else ret = btrfs_next_old_leaf(root, path, time_seq); @@ -476,7 +477,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (slot == 0 && (is_shared_data_backref(preftrees, eb->start) || ref->root_id != btrfs_header_owner(eb))) { - if (time_seq == SEQ_LAST) + if (time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_leaf(root, path); else ret = btrfs_next_old_leaf(root, path, time_seq); @@ -514,7 +515,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie = NULL; } next: - if (time_seq == SEQ_LAST) + if (time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_item(root, path); else ret = btrfs_next_old_item(root, path, time_seq); @@ -574,7 +575,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, if (path->search_commit_root) root_level = btrfs_header_level(root->commit_root); - else if (time_seq == SEQ_LAST) + else if (time_seq == BTRFS_SEQ_LAST) root_level = btrfs_header_level(root->node); else root_level = btrfs_old_root_level(root, time_seq); @@ -605,7 +606,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, search_key.offset >= LLONG_MAX) search_key.offset = 0; path->lowest_level = level; - if (time_seq == SEQ_LAST) + if (time_seq == BTRFS_SEQ_LAST) ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); else ret = btrfs_search_old_slot(root, &search_key, path, time_seq); @@ -1147,8 +1148,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * - * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave - * much like trans == NULL case, the difference only lies in it will not + * If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and + * behave much like trans == NULL case, the difference only lies in it will not * commit root. * The special case is for qgroup to search roots in commit_transaction(). * @@ -1199,7 +1200,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path->skip_locking = 1; } - if (time_seq == SEQ_LAST) + if (time_seq == BTRFS_SEQ_LAST) path->skip_locking = 1; /* @@ -1217,9 +1218,9 @@ again: #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (trans && likely(trans->type != __TRANS_DUMMY) && - time_seq != SEQ_LAST) { + time_seq != BTRFS_SEQ_LAST) { #else - if (trans && time_seq != SEQ_LAST) { + if (trans && time_seq != BTRFS_SEQ_LAST) { #endif /* * look if there are updates for this ref queued and lock the @@ -1527,7 +1528,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, struct btrfs_trans_handle *trans; struct ulist_iterator uiter; struct ulist_node *node; - struct seq_list elem = SEQ_LIST_INIT(elem); + struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem); int ret = 0; struct share_check shared = { .root_objectid = root->root_key.objectid, @@ -1953,7 +1954,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; - struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); + struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem); struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; @@ -1971,12 +1972,12 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, } if (trans) - btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + btrfs_get_tree_mod_seq(fs_info, &seq_elem); else down_read(&fs_info->commit_root_sem); ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - tree_mod_seq_elem.seq, &refs, + seq_elem.seq, &refs, &extent_item_pos, ignore_offset); if (ret) goto out; @@ -1984,7 +1985,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, - tree_mod_seq_elem.seq, &roots, + seq_elem.seq, &roots, ignore_offset); if (ret) break; @@ -2007,7 +2008,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, free_leaf_list(refs); out: if (trans) { - btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + btrfs_put_tree_mod_seq(fs_info, &seq_elem); btrfs_end_transaction(trans); } else { up_read(&fs_info->commit_root_sem); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 744b99ddc28c..aa57bdc8fc89 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1289,7 +1289,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Long running balances can keep us blocked here for eternity, so * simply skip deletion if we're unable to get the mutex. */ - if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex)) + if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) return; spin_lock(&fs_info->unused_bgs_lock); @@ -1462,12 +1462,12 @@ next: spin_lock(&fs_info->unused_bgs_lock); } spin_unlock(&fs_info->unused_bgs_lock); - mutex_unlock(&fs_info->delete_unused_bgs_mutex); + mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); - mutex_unlock(&fs_info->delete_unused_bgs_mutex); + mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); } @@ -1485,6 +1485,97 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) spin_unlock(&fs_info->unused_bgs_lock); } +void btrfs_reclaim_bgs_work(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info = + container_of(work, struct btrfs_fs_info, reclaim_bgs_work); + struct btrfs_block_group *bg; + struct btrfs_space_info *space_info; + int ret; + + if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) + return; + + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) + return; + + mutex_lock(&fs_info->reclaim_bgs_lock); + spin_lock(&fs_info->unused_bgs_lock); + while (!list_empty(&fs_info->reclaim_bgs)) { + bg = list_first_entry(&fs_info->reclaim_bgs, + struct btrfs_block_group, + bg_list); + list_del_init(&bg->bg_list); + + space_info = bg->space_info; + spin_unlock(&fs_info->unused_bgs_lock); + + /* Don't race with allocators so take the groups_sem */ + down_write(&space_info->groups_sem); + + spin_lock(&bg->lock); + if (bg->reserved || bg->pinned || bg->ro) { + /* + * We want to bail if we made new allocations or have + * outstanding allocations in this block group. We do + * the ro check in case balance is currently acting on + * this block group. + */ + spin_unlock(&bg->lock); + up_write(&space_info->groups_sem); + goto next; + } + spin_unlock(&bg->lock); + + /* Get out fast, in case we're unmounting the filesystem */ + if (btrfs_fs_closing(fs_info)) { + up_write(&space_info->groups_sem); + goto next; + } + + ret = inc_block_group_ro(bg, 0); + up_write(&space_info->groups_sem); + if (ret < 0) + goto next; + + btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used", + bg->start, div_u64(bg->used * 100, bg->length)); + trace_btrfs_reclaim_block_group(bg); + ret = btrfs_relocate_chunk(fs_info, bg->start); + if (ret) + btrfs_err(fs_info, "error relocating chunk %llu", + bg->start); + +next: + btrfs_put_block_group(bg); + spin_lock(&fs_info->unused_bgs_lock); + } + spin_unlock(&fs_info->unused_bgs_lock); + mutex_unlock(&fs_info->reclaim_bgs_lock); + btrfs_exclop_finish(fs_info); +} + +void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) +{ + spin_lock(&fs_info->unused_bgs_lock); + if (!list_empty(&fs_info->reclaim_bgs)) + queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work); + spin_unlock(&fs_info->unused_bgs_lock); +} + +void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + trace_btrfs_add_reclaim_block_group(bg); + list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); +} + static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key, struct btrfs_path *path) { @@ -2267,29 +2358,33 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, struct btrfs_trans_handle *trans; u64 alloc_flags; int ret; + bool dirty_bg_running; -again: - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + do { + trans = btrfs_join_transaction(fs_info->extent_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); - /* - * we're not allowed to set block groups readonly after the dirty - * block groups cache has started writing. If it already started, - * back off and let this transaction commit - */ - mutex_lock(&fs_info->ro_block_group_mutex); - if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { - u64 transid = trans->transid; + dirty_bg_running = false; - mutex_unlock(&fs_info->ro_block_group_mutex); - btrfs_end_transaction(trans); + /* + * We're not allowed to set block groups readonly after the dirty + * block group cache has started writing. If it already started, + * back off and let this transaction commit. + */ + mutex_lock(&fs_info->ro_block_group_mutex); + if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { + u64 transid = trans->transid; - ret = btrfs_wait_for_commit(fs_info, transid); - if (ret) - return ret; - goto again; - } + mutex_unlock(&fs_info->ro_block_group_mutex); + btrfs_end_transaction(trans); + + ret = btrfs_wait_for_commit(fs_info, transid); + if (ret) + return ret; + dirty_bg_running = true; + } + } while (dirty_bg_running); if (do_chunk_alloc) { /* @@ -3269,6 +3364,7 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) */ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) { + struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; @@ -3283,6 +3379,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) lockdep_assert_held(&fs_info->chunk_mutex); info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); +again: spin_lock(&info->lock); left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); @@ -3301,6 +3398,58 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (left < thresh) { u64 flags = btrfs_system_alloc_profile(fs_info); + u64 reserved = atomic64_read(&cur_trans->chunk_bytes_reserved); + + /* + * If there's not available space for the chunk tree (system + * space) and there are other tasks that reserved space for + * creating a new system block group, wait for them to complete + * the creation of their system block group and release excess + * reserved space. We do this because: + * + * *) We can end up allocating more system chunks than necessary + * when there are multiple tasks that are concurrently + * allocating block groups, which can lead to exhaustion of + * the system array in the superblock; + * + * *) If we allocate extra and unnecessary system block groups, + * despite being empty for a long time, and possibly forever, + * they end not being added to the list of unused block groups + * because that typically happens only when deallocating the + * last extent from a block group - which never happens since + * we never allocate from them in the first place. The few + * exceptions are when mounting a filesystem or running scrub, + * which add unused block groups to the list of unused block + * groups, to be deleted by the cleaner kthread. + * And even when they are added to the list of unused block + * groups, it can take a long time until they get deleted, + * since the cleaner kthread might be sleeping or busy with + * other work (deleting subvolumes, running delayed iputs, + * defrag scheduling, etc); + * + * This is rare in practice, but can happen when too many tasks + * are allocating blocks groups in parallel (via fallocate()) + * and before the one that reserved space for a new system block + * group finishes the block group creation and releases the space + * reserved in excess (at btrfs_create_pending_block_groups()), + * other tasks end up here and see free system space temporarily + * not enough for updating the chunk tree. + * + * We unlock the chunk mutex before waiting for such tasks and + * lock it again after the wait, otherwise we would deadlock. + * It is safe to do so because allocating a system chunk is the + * first thing done while allocating a new block group. + */ + if (reserved > trans->chunk_bytes_reserved) { + const u64 min_needed = reserved - thresh; + + mutex_unlock(&fs_info->chunk_mutex); + wait_event(cur_trans->chunk_reserve_wait, + atomic64_read(&cur_trans->chunk_bytes_reserved) <= + min_needed); + mutex_lock(&fs_info->chunk_mutex); + goto again; + } /* * Ignore failure to create system chunk. We might end up not @@ -3315,8 +3464,10 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) ret = btrfs_block_rsv_add(fs_info->chunk_root, &fs_info->chunk_block_rsv, thresh, BTRFS_RESERVE_NO_FLUSH); - if (!ret) + if (!ret) { + atomic64_add(thresh, &cur_trans->chunk_bytes_reserved); trans->chunk_bytes_reserved += thresh; + } } } @@ -3386,6 +3537,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) } spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->unused_bgs_lock); + while (!list_empty(&info->reclaim_bgs)) { + block_group = list_first_entry(&info->reclaim_bgs, + struct btrfs_block_group, + bg_list); + list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group, diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 3ecc3372a5ce..7b927425dc71 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -264,6 +264,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, u64 group_start, struct extent_map *em); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_mark_bg_unused(struct btrfs_block_group *bg); +void btrfs_reclaim_bgs_work(struct work_struct *work); +void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info); +void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg); int btrfs_read_block_groups(struct btrfs_fs_info *info); int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, u64 type, u64 chunk_offset, u64 size); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 28e202e89660..c652e19ad74e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -220,6 +220,7 @@ struct btrfs_inode { /* Hook into fs_info->delayed_iputs */ struct list_head delayed_iput; + struct rw_semaphore i_mmap_lock; struct inode vfs_inode; }; @@ -299,24 +300,30 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, mod); } -static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) +/* + * Called every time after doing a buffered, direct IO or memory mapped write. + * + * This is to ensure that if we write to a file that was previously fsynced in + * the current transaction, then try to fsync it again in the same transaction, + * we will know that there were changes in the file and that it needs to be + * logged. + */ +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) +{ + spin_lock(&inode->lock); + inode->last_sub_trans = inode->root->log_transid; + spin_unlock(&inode->lock); +} + +static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { - int ret = 0; + bool ret = false; spin_lock(&inode->lock); if (inode->logged_trans == generation && inode->last_sub_trans <= inode->last_log_commit && - inode->last_sub_trans <= inode->root->last_log_commit) { - /* - * After a ranged fsync we might have left some extent maps - * (that fall outside the fsync's range). So return false - * here if the list isn't empty, to make sure btrfs_log_inode() - * will be called and process those extent maps. - */ - smp_mb(); - if (list_empty(&inode->extent_tree.modified_extents)) - ret = 1; - } + inode->last_sub_trans <= inode->root->last_log_commit) + ret = true; spin_unlock(&inode->lock); return ret; } diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 113cb85c1fd4..169508609324 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1555,10 +1555,11 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) BUG_ON(!block_ctx->pagev); num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> PAGE_SHIFT; + /* Pages must be unmapped in reverse order */ while (num_pages > 0) { num_pages--; if (block_ctx->datav[num_pages]) { - kunmap(block_ctx->pagev[num_pages]); + kunmap_local(block_ctx->datav[num_pages]); block_ctx->datav[num_pages] = NULL; } if (block_ctx->pagev[num_pages]) { @@ -1637,7 +1638,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, i = j; } for (i = 0; i < num_pages; i++) - block_ctx->datav[i] = kmap(block_ctx->pagev[i]); + block_ctx->datav[i] = kmap_local_page(block_ctx->pagev[i]); return block_ctx->len; } @@ -2677,7 +2678,7 @@ static void __btrfsic_submit_bio(struct bio *bio) dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev); if (NULL != dev_state && (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { - unsigned int i = 0; + int i = 0; u64 dev_bytenr; u64 cur_bytenr; struct bio_vec bvec; @@ -2702,7 +2703,7 @@ static void __btrfsic_submit_bio(struct bio *bio) bio_for_each_segment(bvec, bio, iter) { BUG_ON(bvec.bv_len != PAGE_SIZE); - mapped_datav[i] = kmap(bvec.bv_page); + mapped_datav[i] = kmap_local_page(bvec.bv_page); i++; if (dev_state->state->print_mask & @@ -2715,8 +2716,9 @@ static void __btrfsic_submit_bio(struct bio *bio) mapped_datav, segs, bio, &bio_is_patched, bio->bi_opf); - bio_for_each_segment(bvec, bio, iter) - kunmap(bvec.bv_page); + /* Unmap in reverse order */ + for (--i; i >= 0; i--) + kunmap_local(mapped_datav[i]); kfree(mapped_datav); } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { if (dev_state->state->print_mask & diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 3f4c832abfed..17f93fd28f7e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -80,10 +80,15 @@ static int compression_compress_pages(int type, struct list_head *ws, case BTRFS_COMPRESS_NONE: default: /* - * This can't happen, the type is validated several times - * before we get here. As a sane fallback, return what the - * callers will understand as 'no compression happened'. + * This can happen when compression races with remount setting + * it to 'no compress', while caller doesn't call + * inode_need_compress() to check if we really need to + * compress. + * + * Not a big deal, just need to inform caller that we + * haven't allocated any pages yet. */ + *out_pages = 0; return -E2BIG; } } @@ -1611 |
