diff options
Diffstat (limited to 'fs/btrfs')
52 files changed, 2518 insertions, 1796 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 66fa9ab2c046..3282adc84d52 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -49,9 +49,11 @@ config BTRFS_FS_POSIX_ACL If you don't know what Access Control Lists are, say N config BTRFS_FS_CHECK_INTEGRITY - bool "Btrfs with integrity check tool compiled in (DANGEROUS)" + bool "Btrfs with integrity check tool compiled in (DEPRECATED)" depends on BTRFS_FS help + This feature has been deprecated and will be removed in 6.7. + Adds code that examines all block write requests (including writes of the super block). The goal is to verify that the state of the filesystem on disk is always consistent, i.e., diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h index ceadfc5d6c66..8cfc8214109c 100644 --- a/fs/btrfs/accessors.h +++ b/fs/btrfs/accessors.h @@ -3,6 +3,8 @@ #ifndef BTRFS_ACCESSORS_H #define BTRFS_ACCESSORS_H +#include <linux/stddef.h> + struct btrfs_map_token { struct extent_buffer *eb; char *kaddr; @@ -34,13 +36,13 @@ static inline void put_unaligned_le8(u8 val, void *p) read_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ - sizeof(((type *)0)->member))) + sizeof_field(type, member))) #define write_eb_member(eb, ptr, type, member, result) (\ write_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ offsetof(type, member), \ - sizeof(((type *)0)->member))) + sizeof_field(type, member))) #define DECLARE_BTRFS_SETGET_BITS(bits) \ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ @@ -62,25 +64,25 @@ DECLARE_BTRFS_SETGET_BITS(64) static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ const type *s) \ { \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ return btrfs_get_##bits(eb, s, offsetof(type, member)); \ } \ static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ u##bits val) \ { \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \ } \ static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ const type *s) \ { \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ return btrfs_get_token_##bits(token, s, offsetof(type, member));\ } \ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ type *s, u##bits val) \ { \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ } @@ -111,17 +113,14 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s) { - static_assert(sizeof(u64) == - sizeof(((struct btrfs_dev_item *)0))->total_bytes); - return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, - total_bytes)); + static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes)); + return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes)); } static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s, u64 val) { - static_assert(sizeof(u64) == - sizeof(((struct btrfs_dev_item *)0))->total_bytes); + static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes)); WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); } diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 79336fa853db..b7d54efb4728 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3373,7 +3373,6 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, struct btrfs_key *node_key, struct btrfs_backref_node *cur) { - struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_backref_edge *edge; struct btrfs_backref_node *exist; int ret; @@ -3462,25 +3461,21 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, ret = handle_direct_tree_backref(cache, &key, cur); if (ret < 0) goto out; - continue; - } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { - ret = -EINVAL; - btrfs_print_v0_err(fs_info); - btrfs_handle_fs_error(fs_info, ret, NULL); - goto out; - } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { - continue; + } else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { + /* + * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref + * offset means the root objectid. We need to search + * the tree to get its parent bytenr. + */ + ret = handle_indirect_tree_backref(cache, path, &key, node_key, + cur); + if (ret < 0) + goto out; } - /* - * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset - * means the root objectid. We need to search the tree to get - * its parent bytenr. + * Unrecognized tree backref items (if it can pass tree-checker) + * would be ignored. */ - ret = handle_indirect_tree_backref(cache, path, &key, node_key, - cur); - if (ret < 0) - goto out; } ret = 0; cur->checked = 1; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 030ab44fce18..0cb1dee965a0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes) { struct btrfs_caching_control *caching_ctl; + int progress; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return; + /* + * We've already failed to allocate from this block group, so even if + * there's enough space in the block group it isn't contiguous enough to + * allow for an allocation, so wait for at least the next wakeup tick, + * or for the thing to be done. + */ + progress = atomic_read(&caching_ctl->progress); + wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || - (cache->free_space_ctl->free_space >= num_bytes)); + (progress != atomic_read(&caching_ctl->progress) && + (cache->free_space_ctl->free_space >= num_bytes))); btrfs_put_caching_control(caching_ctl); } @@ -494,13 +504,20 @@ static void fragment_free_space(struct btrfs_block_group *block_group) #endif /* - * This is only called by btrfs_cache_block_group, since we could have freed - * extents we need to check the pinned_extents for any extents that can't be - * used yet since their free space will be released as soon as the transaction - * commits. + * Add a free space range to the in memory free space cache of a block group. + * This checks if the range contains super block locations and any such + * locations are not added to the free space cache. + * + * @block_group: The target block group. + * @start: Start offset of the range. + * @end: End offset of the range (exclusive). + * @total_added_ret: Optional pointer to return the total amount of space + * added to the block group's free space cache. + * + * Returns 0 on success or < 0 on error. */ -int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end, - u64 *total_added_ret) +int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start, + u64 end, u64 *total_added_ret) { struct btrfs_fs_info *info = block_group->fs_info; u64 extent_start, extent_end, size; @@ -510,11 +527,10 @@ int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end *total_added_ret = 0; while (start < end) { - ret = find_first_extent_bit(&info->excluded_extents, start, - &extent_start, &extent_end, - EXTENT_DIRTY | EXTENT_UPTODATE, - NULL); - if (ret) + if (!find_first_extent_bit(&info->excluded_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY | EXTENT_UPTODATE, + NULL)) break; if (extent_start <= start) { @@ -789,8 +805,8 @@ next: key.type == BTRFS_METADATA_ITEM_KEY) { u64 space_added; - ret = add_new_free_space(block_group, last, key.objectid, - &space_added); + ret = btrfs_add_new_free_space(block_group, last, + key.objectid, &space_added); if (ret) goto out; total_found += space_added; @@ -802,21 +818,29 @@ next: if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; - if (wakeup) + if (wakeup) { + atomic_inc(&caching_ctl->progress); wake_up(&caching_ctl->wait); + } } } path->slots[0]++; } - ret = add_new_free_space(block_group, last, - block_group->start + block_group->length, - NULL); + ret = btrfs_add_new_free_space(block_group, last, + block_group->start + block_group->length, + NULL); out: btrfs_free_path(path); return ret; } +static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg) +{ + clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, + bg->start + bg->length - 1, EXTENT_UPTODATE); +} + static noinline void caching_thread(struct btrfs_work *work) { struct btrfs_block_group *block_group; @@ -910,6 +934,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; refcount_set(&caching_ctl->count, 2); + atomic_set(&caching_ctl->progress, 0); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); spin_lock(&cache->lock); @@ -2085,8 +2110,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) if (cache->start < BTRFS_SUPER_INFO_OFFSET) { stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start; cache->bytes_super += stripe_len; - ret = btrfs_add_excluded_extent(fs_info, cache->start, - stripe_len); + ret = set_extent_bit(&fs_info->excluded_extents, cache->start, + cache->start + stripe_len - 1, + EXTENT_UPTODATE, NULL); if (ret) return ret; } @@ -2112,8 +2138,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) cache->start + cache->length - logical[nr]); cache->bytes_super += len; - ret = btrfs_add_excluded_extent(fs_info, logical[nr], - len); + ret = set_extent_bit(&fs_info->excluded_extents, logical[nr], + logical[nr] + len - 1, + EXTENT_UPTODATE, NULL); if (ret) { kfree(logical); return ret; @@ -2306,8 +2333,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->cached = BTRFS_CACHE_FINISHED; - ret = add_new_free_space(cache, cache->start, - cache->start + cache->length, NULL); + ret = btrfs_add_new_free_space(cache, cache->start, + cache->start + cache->length, NULL); btrfs_free_excluded_extents(cache); if (ret) goto error; @@ -2754,7 +2781,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); + ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL); btrfs_free_excluded_extents(cache); if (ret) { btrfs_put_block_group(cache); @@ -4062,7 +4089,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (IS_ERR(ret_bg)) { ret = PTR_ERR(ret_bg); - } else if (from_extent_allocation) { + } else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) { /* * New block group is likely to be used soon. Try to activate * it now. Failure is OK for now. @@ -4260,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_caching_control *caching_ctl; struct rb_node *n; + if (btrfs_is_zoned(info)) { + if (info->active_meta_bg) { + btrfs_put_block_group(info->active_meta_bg); + info->active_meta_bg = NULL; + } + if (info->active_system_bg) { + btrfs_put_block_group(info->active_system_bg); + info->active_system_bg = NULL; + } + } + write_lock(&info->block_group_cache_lock); while (!list_empty(&info->caching_block_groups)) { caching_ctl = list_entry(info->caching_block_groups.next, diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index aba5dff66c19..2bdbcb834f95 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -90,6 +90,8 @@ struct btrfs_caching_control { wait_queue_head_t wait; struct btrfs_work work; struct btrfs_block_group *block_group; + /* Track progress of caching during allocation. */ + atomic_t progress; refcount_t count; }; @@ -289,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); -int add_new_free_space(struct btrfs_block_group *block_group, - u64 start, u64 end, u64 *total_added_ret); +int btrfs_add_new_free_space(struct btrfs_block_group *block_group, + u64 start, u64 end, u64 *total_added_ret); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d47a927b3504..bda1fdbba666 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -498,12 +498,8 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, struct writeback_control *wbc); + u64 start, u64 end, struct writeback_control *wbc); int btrfs_writepage_cow_fixup(struct page *page); -void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, - struct page *page, u64 start, - u64 end, bool uptodate); int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info, int compress_type); int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f2d2b313bde5..9419f4e37a58 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -443,6 +443,7 @@ struct btrfs_drop_extents_args { struct btrfs_file_private { void *filldir_buf; + u64 last_index; struct extent_state *llseek_cached_state; }; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6b457b010cbc..53c1211dd60b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) } bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list) { @@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); @@ -1735,9 +1736,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, int over = 0; unsigned char d_type; - if (list_empty(ins_list)) - return 0; - /* * Changing the data of the delayed item is impossible. So * we needn't lock them. And we have held i_mutex of the @@ -1808,9 +1806,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, inode->i_mtime.tv_nsec); btrfs_set_stack_timespec_sec(&inode_item->ctime, - inode->i_ctime.tv_sec); + inode_get_ctime(inode).tv_sec); btrfs_set_stack_timespec_nsec(&inode_item->ctime, - inode->i_ctime.tv_nsec); + inode_get_ctime(inode).tv_nsec); btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime.tv_sec); @@ -1861,8 +1859,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); + inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime), + btrfs_stack_timespec_nsec(&inode_item->ctime)); BTRFS_I(inode)->i_otime.tv_sec = btrfs_stack_timespec_sec(&inode_item->otime); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 4f21daa3dbc7..dc1085b2a397 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); /* Used for readdir() */ bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5f10965fd72b..fff22ed55c42 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -792,9 +792,9 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev, lockdep_assert_held(&srcdev->fs_info->chunk_mutex); - while (!find_first_extent_bit(&srcdev->alloc_state, start, - &found_start, &found_end, - CHUNK_ALLOCATED, &cached_state)) { + while (find_first_extent_bit(&srcdev->alloc_state, start, + &found_start, &found_end, + CHUNK_ALLOCATED, &cached_state)) { ret = set_extent_bit(&tgtdev->alloc_state, found_start, found_end, CHUNK_ALLOCATED, NULL); if (ret) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b9914e5f03d..0a96ea8c1d3a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -313,21 +313,16 @@ static bool check_tree_block_fsid(struct extent_buffer *eb) struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; u8 fsid[BTRFS_FSID_SIZE]; - u8 *metadata_uuid; read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE); + /* - * Checking the incompat flag is only valid for the current fs. For - * seed devices it's forbidden to have their uuid changed so reading - * ->fsid in this case is fine + * alloc_fs_devices() copies the fsid into metadata_uuid if the + * metadata_uuid is unset in the superblock, including for a seed device. + * So, we can use fs_devices->metadata_uuid. */ - if (btrfs_fs_incompat(fs_info, METADATA_UUID)) - metadata_uuid = fs_devices->metadata_uuid; - else - metadata_uuid = fs_devices->fsid; - - if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE)) + if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0) return false; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) @@ -1103,7 +1098,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) btrfs_drew_lock_init(&root->snapshot_lock); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - !btrfs_is_data_reloc_root(root)) { + !btrfs_is_data_reloc_root(root) && + is_fstree(root->root_key.objectid)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } @@ -1300,6 +1296,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, root = btrfs_get_global_root(fs_info, objectid); if (root) return root; + + /* + * If we're called for non-subvolume trees, and above function didn't + * find one, do not try to read it from disk. + * + * This is namely for free-space-tree and quota tree, which can change + * at runtime and should only be grabbed from fs_info. + */ + if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) + return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { @@ -2373,21 +2379,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info, ret = -EINVAL; } - if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid, - BTRFS_FSID_SIZE)) { + if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "superblock fsid doesn't match fsid of fs_devices: %pU != %pU", - fs_info->super_copy->fsid, fs_info->fs_devices->fsid); + sb->fsid, fs_info->fs_ |
