diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/block-group.c | 13 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.c | 67 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/direct-io.c | 38 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 54 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 14 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.c | 24 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 22 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 28 | ||||
| -rw-r--r-- | fs/btrfs/print-tree.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 25 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 52 | ||||
| -rw-r--r-- | fs/btrfs/space-info.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/space-info.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 15 | ||||
| -rw-r--r-- | fs/btrfs/tests/extent-map-tests.c | 99 | ||||
| -rw-r--r-- | fs/btrfs/tree-checker.c | 54 |
19 files changed, 435 insertions, 85 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c216..2e49d978f504 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4..75fa563e4cac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2ac9296edccb..06a9e0542d70 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ef15e998be03..05f634eb472d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4..67adbe9d294a 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c..feec49e6f9c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); @@ -5471,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 parent, int level) { + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *head; struct btrfs_path *path; struct btrfs_extent_inline_ref *iref; int ret; + bool exists = false; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: ret = lookup_extent_backref(trans, path, &iref, bytenr, root->fs_info->nodesize, parent, btrfs_root_id(root), level, 0); + if (ret != -ENOENT) { + /* + * If we get 0 then we found our reference, return 1, else + * return the error if it's not -ENOENT; + */ + btrfs_free_path(path); + return (ret < 0 ) ? ret : 1; + } + + /* + * We could have a delayed ref with this reference, so look it up while + * we're holding the path open to make sure we don't race with the + * delayed ref running. + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + if (!head) + goto out; + if (!mutex_trylock(&head->mutex)) { + /* + * We're contended, means that the delayed ref is running, get a + * reference and wait for the ref head to be complete and then + * try again. + */ + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref_head(head); + goto again; + } + + exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + mutex_unlock(&head->mutex); +out: + spin_unlock(&delayed_refs->lock); btrfs_free_path(path); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - return 1; + return exists ? 1 : 0; } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa7f8148cd0d..c73cd4f89015 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, free_extent_map(em); em = NULL; + /* + * Although the PageDirty bit might be cleared before entering + * this function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * page for range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); btrfs_set_range_writeback(inode, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(inode->root->fs_info, @@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, page->index, cur, end); } - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * page for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); submit_extent_page(bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80..10ac5f657e38 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } @@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c return 0; /* - * We want to be fast because we can be called from any path trying to - * allocate memory, so if the lock is busy we don't want to spend time + * We want to be fast so if the lock is busy we don't want to spend time * waiting for it - either some task is about to do IO for the inode or * we may have another task shrinking extent maps, here in this code, so * skip this inode. @@ -1191,9 +1190,7 @@ next: /* * Stop if we need to reschedule or there's contention on the * lock. This is to avoid slowing other tasks trying to take the - * lock and because the shrinker might be called during a memory - * allocation path and we want to avoid taking a very long time - * and slowing down all sorts of tasks. + * lock. */ if (need_resched() || rwlock_needbreak(&tree->lock)) break; @@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx if (ctx->scanned >= ctx->nr_to_scan) break; - /* - * We may be called from memory allocation paths, so we don't - * want to take too much time and slowdown tasks. - */ - if (need_resched()) - break; + cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) ctx.last_ino); } - /* - * We may be called from memory allocation paths, so we don't want to - * take too much time and slowdown tasks, so stop if we need reschedule. - */ - while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { + while (ctx.scanned < ctx.nr_to_scan) { struct btrfs_root *root; unsigned long count; + cond_resched(); + spin_lock(&fs_info->fs_roots_radix_lock); count = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)&root, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6..9914419f3b7d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -1857,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543a..f5996a43db24 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; + WARN_ON(block_group->zone_unusable > block_group->length); + } spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab6955647..333b0e8587a2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -1581,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1594,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2255,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; @@ -5660,7 +5667,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; @@ -7198,6 +7205,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10133,6 +10146,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da3..fc821aa446f0 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d7100018..0de9162ff481 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ca711a773ef..7fc692fc76e1 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 offset = key->offset; u64 end; u64 bs = sctx->send_root->fs_info->sectorsize; + struct btrfs_file_extent_item *ei; + u64 disk_byte; + u64 data_offset; + u64 num_bytes; + struct btrfs_inode_info info = { 0 }; end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); if (offset >= end) return 0; - if (clone_root && IS_ALIGNED(end, bs)) { - struct btrfs_file_extent_item *ei; - u64 disk_byte; - u64 data_offset; + num_bytes = end - offset; - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); - data_offset = btrfs_file_extent_offset(path->nodes[0], ei); - ret = clone_range(sctx, path, clone_root, disk_byte, - data_offset, offset, end - offset); - } else { - ret = send_extent_data(sctx, path, offset, end - offset); - } + if (!clone_root) + goto write_data; + + if (IS_ALIGNED(end, bs)) + goto clone_data; + + /* + * If the extent end is not aligned, we can clone if the extent ends at + * the i_size of the inode and the clone range ends at the i_size of the + * source inode, otherwise the clone operation fails with -EINVAL. + */ + if (end != sctx->cur_inode_size) + goto write_data; + + ret = get_inode_info(clone_root->root, clone_root->ino, &info); + if (ret < 0) + return ret; + + if (clone_root->offset + num_bytes == info.size) + goto clone_data; + +write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); + sctx->cur_inode_next_write_offset = end; + return ret; + +clone_data: + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); + data_offset = btrfs_file_extent_offset(path->nodes[0], ei); + ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset, + num_bytes); sctx->cur_inode_next_write_offset = end; return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8..68e14fd48638 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c16..88b44221ce97 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fb..11044e9e2cb1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,6 +28,7 @@ #include <linux/btrfs.h> #include <linux/security.h> #include <linux/fs_parser.h> +#include <linux/swap.h> #include "messages.h" #include "delayed-inode.h" #include "ctree.h" @@ -683,8 +684,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } @@ -2406,6 +2410,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); + /* + * We may be called from any task trying to allocate memory and we don't + * want to slow it down with scanning and dropping extent maps. It would + * also cause heavy lock contention if many tasks concurrently enter + * here. Therefore only allow kswapd tasks to scan and drop extent maps. + */ + if (!current_is_kswapd()) + return 0; + return btrfs_free_extent_maps(fs_info, nr_to_scan); } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b8..56e61ac1cc64 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlo |
