// SPDX-License-Identifier: GPL-2.0
#include <linux/fsverity.h>
#include <linux/iomap.h>
#include "ctree.h"
#include "delalloc-space.h"
#include "direct-io.h"
#include "extent-tree.h"
#include "file.h"
#include "fs.h"
#include "transaction.h"
#include "volumes.h"
struct btrfs_dio_data {
ssize_t submitted;
struct extent_changeset *data_reserved;
struct btrfs_ordered_extent *ordered;
bool data_space_reserved;
bool nocow_done;
};
struct btrfs_dio_private {
/* Range of I/O */
u64 file_offset;
u32 bytes;
/* This must be last */
struct btrfs_bio bbio;
};
static struct bio_set btrfs_dio_bioset;
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
struct extent_state **cached_state,
unsigned int iomap_flags)
{
const bool writing = (iomap_flags & IOMAP_WRITE);
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
int ret = 0;
/* Direct lock must be taken before the extent lock. */
if (nowait) {
if (!btrfs_try_lock_dio_extent(io_tree, lockstart, lockend, cached_state))
return -EAGAIN;
} else {
btrfs_lock_dio_extent(io_tree, lockstart, lockend, cached_state);
}
while (1) {
if (nowait) {
if (!btrfs_try_lock_extent(io_tree, lockstart, lockend,
cached_state)) {
ret = -EAGAIN;
break;
}
} else {
btrfs_lock_extent(io_tree, lockstart, lockend, cached_state);
}
/*
* We're concerned with the entire range that we're going to be
* doing DIO to, so we need to make sure there's no ordered
* extents in this range.
*/
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
lockend - lockstart + 1);
/*
* We need to make sure there are no buffered pages in this
* range either, we could have raced between the invalidate in
* generic_file_direct_write and locking the extent. The
* invalidate needs to happen so that reads after a write do not
* get stale data.
*/
if (!ordered &&
(!writing || !filemap_range_has_page(inode->i_mapping,
lockstart, lockend)))
break;
btrfs_unlock_extent(io_tree, lockstart, lockend, cached_state);
if (ordered) {
if (nowait) {
btrfs_put_ordered_extent(ordered);
ret = -EAGAIN;
break;
}
/*
* If we are doing a DIO read and the ordered extent we
* found is for a buffered write, we can not wait for it
* to complete and retry, because if we do so we can
* deadlock with concurrent buffered writes on page
* locks. This happens only if our DIO read covers more
* than one extent map, if at this point has already
* created an ordered extent for a previous extent map
* and locked its range in the inode's io tree, and a
* concurrent write against that previous extent map's
* range and this range started (we unlock the ranges
* in the io tree only when the bios complete and
* buffered writes always lock pages before attempting
* to lock range in the io tree).
*/
if (writing ||
test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
btrfs_start_ordered_extent(ordered);
else
ret = nowait ? -EAGAIN : -ENOTBLK;
btrfs_put_ordered_extent(ordered);
} else {
/*
* We could trigger writeback for this range (and wait
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
* call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
* complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
ret = nowait ? -EAGAIN : -ENOTBLK;
}
if (ret)
break;
cond_resched();
}
if (ret)
btrfs_unlock_dio_extent(io_tree, lockstart, lockend, cached_state);
return ret;
}
static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
struct btrfs_dio_data *dio_data,
const u64 start,
const struct btrfs_file_extent *file_extent,
const int type)
{
struct extent_map *em = NULL;
struct btrfs_ordered_extent *ordered;
if (type != BTRFS_ORDERED_NOCOW) {
em = btrfs_create_io_em(inode, start, file_extent, type);
if (IS_ERR(em))
goto out;
}
ordered = btrfs_alloc_ordered_extent(inode, start, file_extent,
(1U << type) |
(1U << BTRFS_ORDERED_DIRECT));
if (IS_ERR(ordered)) {
if (em) {
btrfs_free_extent_map(em);
btrfs_drop_extent_map_range(inode, start,
start + file_extent->num_bytes - 1, false);
}
em = ERR_CAST(ordered);
} else {
ASSERT(!dio_data->ordered);
dio_data->ordered = ordered;
}
out:
return em;
}
static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
struct btrfs_dio_data *dio_data,
u64 start, u64 len)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_file_extent file_extent