diff options
Diffstat (limited to 'fs')
76 files changed, 1000 insertions, 625 deletions
@@ -80,7 +80,7 @@ struct aio_ring { struct kioctx_table { struct rcu_head rcu; unsigned nr; - struct kioctx __rcu *table[]; + struct kioctx __rcu *table[] __counted_by(nr); }; struct kioctx_cpu { diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6a13cf00218b..9fe4ccca50a0 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -103,24 +103,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) * Transfer bytes to our delayed refs rsv. * * @fs_info: the filesystem - * @src: source block rsv to transfer from * @num_bytes: number of bytes to transfer * - * This transfers up to the num_bytes amount from the src rsv to the + * This transfers up to the num_bytes amount, previously reserved, to the * delayed_refs_rsv. Any extra bytes are returned to the space info. */ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes) { struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; u64 to_free = 0; - spin_lock(&src->lock); - src->reserved -= num_bytes; - src->size -= num_bytes; - spin_unlock(&src->lock); - spin_lock(&delayed_refs_rsv->lock); if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { u64 delta = delayed_refs_rsv->size - @@ -163,6 +156,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1); u64 num_bytes = 0; + u64 refilled_bytes; + u64 to_free; int ret = -ENOSPC; spin_lock(&block_rsv->lock); @@ -178,9 +173,38 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); if (ret) return ret; - btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); + + /* + * We may have raced with someone else, so check again if we the block + * reserve is still not full and release any excess space. + */ + spin_lock(&block_rsv->lock); + if (block_rsv->reserved < block_rsv->size) { + u64 needed = block_rsv->size - block_rsv->reserved; + + if (num_bytes >= needed) { + block_rsv->reserved += needed; + block_rsv->full = true; + to_free = num_bytes - needed; + refilled_bytes = needed; + } else { + block_rsv->reserved += num_bytes; + to_free = 0; + refilled_bytes = num_bytes; + } + } else { + to_free = num_bytes; + refilled_bytes = 0; + } + spin_unlock(&block_rsv->lock); + + if (to_free > 0) + btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info, + to_free); + + if (refilled_bytes > 0) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0, + refilled_bytes, 1); return 0; } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index b8e14b0ba5f1..fd9bf2b709c0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -407,7 +407,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f356f08b55cb..fc313fce5bbd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1514,15 +1514,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* now insert the actual backref */ - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - BUG_ON(refs_to_add != 1); + if (owner < BTRFS_FIRST_FREE_OBJECTID) ret = insert_tree_block_ref(trans, path, bytenr, parent, root_objectid); - } else { + else ret = insert_extent_data_ref(trans, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); - } + if (ret) btrfs_abort_transaction(trans, ret); out: @@ -1656,7 +1655,10 @@ again: goto again; } } else { - err = -EIO; + err = -EUCLEAN; + btrfs_err(fs_info, + "missing extent item for extent %llu num_bytes %llu level %d", + head->bytenr, head->num_bytes, extent_op->level); goto out; } } @@ -1699,12 +1701,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent = ref->parent; ref_root = ref->root; - if (node->ref_mod != 1) { + if (unlikely(node->ref_mod != 1)) { btrfs_err(trans->fs_info, - "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu", node->bytenr, node->ref_mod, node->action, ref_root, parent); - return -EIO; + return -EUCLEAN; } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ac3fca5a5e41..caccd0376342 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) bvec->bv_offset, bvec->bv_len); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); - if (error) { - btrfs_page_clear_uptodate(fs_info, page, start, len); + if (error) mapping_set_error(page->mapping, error); - } btrfs_page_clear_writeback(fs_info, page, start, len); } @@ -1456,8 +1454,6 @@ done: if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page, - page_start, PAGE_SIZE); mapping_set_error(page->mapping, ret); } unlock_page(page); @@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) struct page *page = bvec->bv_page; u32 len = bvec->bv_len; - if (!uptodate) - btrfs_page_clear_uptodate(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len); bio_offset += len; } @@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, cur, cur_len, !ret); - btrfs_page_clear_uptodate(fs_info, page, cur, cur_len); mapping_set_error(page->mapping, ret); } btrfs_page_unlock_writer(fs_info, page, cur, cur_len); @@ -4002,8 +3995,14 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; unsigned long i = get_eb_page_index(start); - if (check_eb_range(eb, start, len)) + if (check_eb_range(eb, start, len)) { + /* + * Invalid range hit, reset the memory, so callers won't get + * some random garbage for their uninitialzed memory. + */ + memset(dstv, 0, len); return; + } offset = get_eb_offset_in_page(eb, start); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ca46a529d56b..361535c71c0f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1106,6 +1106,25 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode) btrfs_drew_write_unlock(&inode->root->snapshot_lock); } +static void update_time_for_write(struct inode *inode) +{ + struct timespec64 now, ctime; + + if (IS_NOCMTIME(inode)) + return; + + now = current_time(inode); + if (!timespec64_equal(&inode->i_mtime, &now)) + inode->i_mtime = now; + + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, &now)) + inode_set_ctime_to_ts(inode, now); + + if (IS_I_VERSION(inode)) + inode_inc_iversion(inode); +} + static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count) { @@ -1137,10 +1156,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, * need to start yet another transaction to update the inode as we will * update the inode when we finish writing whatever data we write. */ - if (!IS_NOCMTIME(inode)) { - inode->i_mtime = inode_set_ctime_current(inode); - inode_inc_iversion(inode); - } + update_time_for_write(inode); start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); @@ -1451,8 +1467,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1460,6 +1481,13 @@ relock: if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f09fbdc43f0f..7814b9d654ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, btrfs_mark_ordered_io_finished(inode, locked_page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(inode->root->fs_info, - locked_page, page_start, - PAGE_SIZE); mapping_set_error(locked_page->mapping, ret); unlock_page(locked_page); } @@ -2791,7 +2788,6 @@ out_page: mapping_set_error(page->mapping, ret); btrfs_mark_ordered_io_finished(inode, page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE); clear_page_dirty_for_io(page); } btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); @@ -5769,20 +5765,24 @@ out: static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) { - if (dir->index_cnt == (u64)-1) { - int ret; + int ret = 0; + btrfs_inode_lock(dir, 0); + if (dir->index_cnt == (u64)-1) { ret = btrfs_inode_delayed_dir_index_count(dir); if (ret) { ret = btrfs_set_inode_index_count(dir); if (ret) - return ret; + goto out; } } - *index = dir->index_cnt; + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; +out: + btrfs_inode_unlock(dir, 0); - return 0; + return ret; } /* @@ -5817,6 +5817,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) return 0; } +static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct btrfs_file_private *private = file->private_data; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), + &private->last_index); + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + struct dir_entry { u64 ino; u64 offset; @@ -10868,7 +10881,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { }; static const struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_dir_llseek, .read = generic_read_dir, .iterate_shared = btrfs_real_readdir, .open = btrfs_opendir, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 09bfe68d2ea3..1a093ec0f7e3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2117,7 +2117,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && - total_free_meta - thresh < block_rsv->size) + (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; @@ -2150,7 +2150,7 @@ static struct file_system_type btrfs_fs_type = { .name = "btrfs", .mount = btrfs_mount, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; static struct file_system_type btrfs_root_fs_type = { @@ -2158,8 +2158,7 @@ static struct file_system_type btrfs_root_fs_type = { .name = "btrfs", .mount = btrfs_mount_root, .kill_sb = btrfs_kill_super, - .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | - FS_ALLOW_IDMAP | FS_MGTIME, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("btrfs"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0bf42dccb041..c780d3729463 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -631,14 +631,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush); if (ret) goto reserve_fail; if (delayed_refs_bytes) { - btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv, - delayed_refs_bytes); + btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes); num_bytes -= delayed_refs_bytes; } + btrfs_block_rsv_add_bytes(rsv, num_bytes, true); if (rsv->space_info->force_alloc) do_chunk_alloc = true; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d1e46b839519..cbb17b542131 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4722,7 +4722,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans |
