// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (C) 2016-2019 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/writeback.h>
#include <linux/list_sort.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/sched/signal.h>
#include <linux/migrate.h>
#include "trace.h"
#include "../internal.h"
#define IOEND_BATCH_SIZE 4096
/*
* Structure allocated for each folio when block size < folio size
* to track sub-folio uptodate status and I/O completions.
*/
struct iomap_page {
atomic_t read_bytes_pending;
atomic_t write_bytes_pending;
spinlock_t uptodate_lock;
unsigned long uptodate[];
};
static inline struct iomap_page *to_iomap_page(struct folio *folio)
{
if (folio_test_private(folio))
return folio_get_private(folio);
return NULL;
}
static struct bio_set iomap_ioend_bioset;
static struct iomap_page *
iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
{
struct iomap_page *iop = to_iomap_page(folio);
unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
gfp_t gfp;
if (iop || nr_blocks <= 1)
return iop;
if (flags & IOMAP_NOWAIT)
gfp = GFP_NOWAIT;
else
gfp = GFP_NOFS | __GFP_NOFAIL;
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
gfp);
if (iop) {
spin_lock_init(&iop->uptodate_lock);
if (folio_test_uptodate(folio))
bitmap_fill(iop->uptodate, nr_blocks);
folio_attach_private(folio, iop);
}
return iop;
}
static void iomap_page_release(struct folio *folio)
{
struct iomap_page *iop = folio_detach_private(folio);
struct inode *inode = folio->mapping->host;
unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
if (!iop)
return;
WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
folio_test_uptodate(folio));
kfree(iop);
}
/*
* Calculate the range inside the folio that we actually need to read.
*/
static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
loff_t *pos, loff_t length, size_t *offp, size_t *lenp)
{
struct iomap_page *iop = to_iomap_page(folio);
loff_t orig_pos = *pos;
loff_t isize = i_size_read(inode);
unsigned block_bits = inode->i_blkbits;
unsigned block_size = (1 << block_bits);
size_t poff = offset_in_folio(folio, *pos);
size_t plen = min_t(loff_t, folio_size(folio) - poff, length);
unsigned first = poff >> block_bits;
unsigned last = (poff + plen - 1) >> block_bits;
/*
* If the block size is smaller than the page size, we need to check the
* per-block uptodate status and adjust the offset and length if needed
* to avoid reading in already uptodate ranges.
*/
if (iop) {
unsigned int i;
/* move forward for each leading block marked uptodate */
for (i = first; i <= last; i++) {
if (!test_bit(i, iop->uptodate))
break;
*pos += block_size;
poff += block_size;
plen -= block_size;
first++;
}
/* truncate len if we find any trailing uptodate block(s) */
for ( ; i <= last; i++) {
if (test_bit(i, iop->uptodate)) {
plen -= (last - i + 1) * block_size;
last = i - 1;
break;
}
}
}
/*
* If the extent spans the block that contains the i_size, we need to
* handle both halves separately so that we properly zero data in the
* page cache for blocks that are entirely outside of i_size.
*/
if (orig_pos <= isize && orig_pos + length > isize) {
unsigned end = offset_in_folio(folio, isize - 1) >> block_bits;
if (first <= end && last > end)
plen -= (last - end) * block_size;
}
*offp = poff;
*lenp = plen;
}
static void iomap_iop_set_range_uptodate(struct folio *folio,
struct iomap_page *iop, size_t off, size_t len)
{
struct inode *inode = folio->mapping->host;
unsigned first = off >> inode->i_blkbits;
unsigned last = (off + len - 1) >> inode->i_blkbits;
unsigned long flags;
spin_lock_irqsave(&iop->uptodate_lock, flags);
bitmap_set(iop->uptodate, first, last - first + 1);
if (bitmap_full(iop->uptodate, i_blocks_per_folio(inode, folio)))
folio_mark_uptodate(folio);
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
}
static void iomap_set_range_uptodate(struct folio *folio,
struct iomap_page *iop, size_t off, size_t len)
{
if (iop)
iomap_iop_set_range_uptodate(folio, iop, off, len);
else
folio_mark_uptodate(folio);
}
static void iomap_finish_folio_read(struct folio *folio, size_t offset,
size_t len, int error)
{
struct iomap_page *iop = to_iomap_page(folio);
if (unlikely(error)) {
folio_clear_uptodate(folio);
folio_set_error(folio);
} else {
iomap_set_range_uptodate(folio, iop, offset, len);
}
if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending))
folio_unlock(folio);
}
static void iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
bio_for_each_folio_all(fi, bio)
iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error);
bio_put(bio);
}
struct iomap_readpage_ctx {
struct folio *cur_folio;
bool cur_folio_in_bio;
struct bio *bio;
struct readahead_control *rac;
};
/**
* iomap_read_inline_data - copy inline data into the page cache
* @iter: iteration structure
* @folio: folio to copy to
*
* Copy the inline data in @iter into @folio and zero out the rest of the folio.
* Only a single IOMAP_INLINE extent is allowed at the end of each file.
* Returns zero for success to complete the read, or the usual negative errno.
*/
static int iomap_read_inline_data(const struct iomap_iter *iter,
struct folio *folio)
{
struct iomap_page *iop;
const struct iomap *iomap = iomap_iter_srcmap(iter);
size_t size = i_size_read(iter->inode) - iomap->offset;
size_t poff = offset_in_page(iomap->offset);
size_t offset = offset_in_fo
|