// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/nfs/direct.c
*
* Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
*
* High-performance uncached I/O for the Linux NFS client
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
* (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
* system cache protocols. Applications that process datasets
* considerably larger than the client's memory do not always benefit
* from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
* are made directly to the server; data stored or fetched via these
* requests is not cached in the Linux page cache. The client does not
* correct unaligned requests from applications. All requested bytes are
* held on permanent storage before a direct write system call returns to
* an application.
*
* Solaris implements an uncached I/O facility called directio() that
* is used for backups and sequential I/O to very large files. Solaris
* also supports uncaching whole NFS partitions with "-o forcedirectio,"
* an undocumented mount option.
*
* Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
* help from Andrew Morton.
*
* 18 Dec 2001 Initial implementation for 2.4 --cel
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
* 04 May 2005 support O_DIRECT with aio --cel
*
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/module.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
#include "fscache.h"
#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static struct kmem_cache *nfs_direct_cachep;
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
static void nfs_direct_write_schedule_work(struct work_struct *work);
static inline void get_dreq(struct nfs_direct_req *dreq)
{
atomic_inc(&dreq->io_count);
}
static inline int put_dreq(struct nfs_direct_req *dreq)
{
return atomic_dec_and_test(&dreq->io_count);
}
static void
nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
const struct nfs_pgio_header *hdr,
ssize_t dreq_len)
{
if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
test_bit(NFS_IOHDR_EOF, &hdr->flags)))
return;
if (dreq->max_count >= dreq_len) {
dreq->max_count = dreq_len;
if (dreq->count > dreq_len)
dreq->count = dreq_len;
}
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
dreq->error = hdr->error;
}
static void
nfs_direct_count_bytes(struct nfs_direct_req *dreq,
const struct nfs_pgio_header *hdr)
{
loff_t hdr_end = hdr->io_start + hdr->good_bytes;
ssize_t dreq_len = 0;
if (hdr_end > dreq->io_start)
dreq_len = hdr_end - dreq->io_start;
nfs_direct_handle_truncated(dreq, hdr, dreq_len);
if (dreq_len > dreq->max_count)
dreq_len = dreq->max_count;
if (dreq->count < dreq_len)
dreq->count = dreq_len;
}
static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
struct nfs_page *req)
{
loff_t offs = req_offset(req);
size_t req_start = (size_t)(offs - dreq->io_start);
if (req_start < dreq->max_count)
dreq->max_count = req_start;
if (req_start < dreq->count)
dreq->count = req_start;
}
/**
* nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
* @iter: I/O buffer
*
* Perform IO to the swap-file. This is much like direct IO.
*/
int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
ret = nfs_file_direct_write(iocb, iter, true);
if (ret < 0)
return ret;
return 0;
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{
unsigned int i;
for (i = 0; i < npages; i++)
put_page(pages[i]);
}
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq)
{
cinfo->inode = dreq->inode;
cinfo->mds = &dreq->mds_cinfo;
cinfo->ds = &dreq->ds_cinfo;
cinfo->dreq = dreq;
cinfo->completion_ops = &nfs_direct_commit_completion_ops;
}
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
pnfs_init_ds_commit_info(&dreq->ds_cinfo);
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);