/*
* linux/fs/nfs/direct.c
*
* Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
*
* High-performance uncached I/O for the Linux NFS client
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
* (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
* system cache protocols. Applications that process datasets
* considerably larger than the client's memory do not always benefit
* from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
* are made directly to the server; data stored or fetched via these
* requests is not cached in the Linux page cache. The client does not
* correct unaligned requests from applications. All requested bytes are
* held on permanent storage before a direct write system call returns to
* an application.
*
* Solaris implements an uncached I/O facility called directio() that
* is used for backups and sequential I/O to very large files. Solaris
* also supports uncaching whole NFS partitions with "-o forcedirectio,"
* an undocumented mount option.
*
* Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
* help from Andrew Morton.
*
* 18 Dec 2001 Initial implementation for 2.4 --cel
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
* 04 May 2005 support O_DIRECT with aio --cel
*
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/module.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static struct kmem_cache *nfs_direct_cachep;
/*
* This represents a set of asynchronous requests that we're waiting on
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
/* I/O parameters */
struct nfs_open_context *ctx; /* file open context info */
struct nfs_lock_context *l_ctx; /* Lock context info */
struct kiocb * iocb; /* controlling i/o request */
struct inode * inode; /* target file of i/o */
/* completion state */
atomic_t io_count; /* i/os we're waiting for */
spinlock_t lock; /* protect completion state */
ssize_t count, /* bytes actually processed */
bytes_left, /* bytes left to be sent */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
/* commit state */
struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work;
int flags;
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
struct nfs_writeverf verf; /* unstable write verifier */
};
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
static void nfs_direct_write_schedule_work(struct work_struct *work);
static inline void get_dreq(struct nfs_direct_req *dreq)
{
atomic_inc(&dreq->io_count);
}
static inline int put_dreq(struct nfs_direct_req *dreq)
{
return atomic_dec_and_test(&dreq->io_count);
}
/*
* nfs_direct_select_verf - select the right verifier
* @dreq - direct request possibly spanning multiple servers
* @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
* @ds_idx - index of data server in data server list, only valid if ds_clp set
*
* returns the correct verifier to use given the role of the server
*/
static struct nfs_writeverf *
nfs_direct_select_verf(struct nfs_direct_req *dreq,
struct nfs_client *ds_clp,
int ds_idx)
{
struct nfs_writeverf *verfp = &dreq->verf;
#ifdef CONFIG_NFS_V4_1
if (ds_clp) {
/* pNFS is in use, use the DS verf */
if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
else
WARN_ON_ONCE(1);
}
#endif
return verfp;
}
/*
* nfs_direct_set_hdr_verf - set the write/commit verifier
* @dreq - direct request possibly spanning multiple servers
* @hdr - pageio header to validate against previously seen verfs
*
* Set the server's (MDS or DS) "seen" verifier
*/
static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
struct nfs_pgio_header *hdr)
{
struct nfs_writeverf *verfp;
verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
hdr->data->ds_idx);
WARN_ON_ONCE(verfp->committed >= 0);
memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
WARN_ON_ONCE(verfp->committed < 0);
}
/*
* nfs_direct_cmp_hdr_verf - compare verifier for pgio header
* @dreq - direct request possibly spanning multiple servers
* @hdr - pageio header to validate against previously seen verf
*
* set the server's "seen" verf if not initialized.
* returns result of comparison between @hdr->verf and the "seen"
* verf of the server used by @hdr (DS or MDS)
*/
static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
struct nfs_pgio_header *hdr)
{
struct nfs_writeverf *verfp;
verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
hdr->data