/*
* linux/fs/nfs/write.c
*
* Writing file data over NFS.
*
* We do it like this: When a (user) process wishes to write data to an
* NFS file, a write request is allocated that contains the RPC task data
* plus some info on the page to be written, and added to the inode's
* write chain. If the process writes past the end of the page, an async
* RPC call to write the page is scheduled immediately; otherwise, the call
* is delayed for a few seconds.
*
* Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
*
* Write requests are kept on the inode's writeback list. Each entry in
* that list references the page (portion) to be written. When the
* cache timeout has expired, the RPC task is woken up, and tries to
* lock the page. As soon as it manages to do so, the request is moved
* from the writeback list to the writelock list.
*
* Note: we must make sure never to confuse the inode passed in the
* write_page request with the one in page->inode. As far as I understand
* it, these are different when doing a swap-out.
*
* To understand everything that goes on here and in the NFS read code,
* one should be aware that a page is locked in exactly one of the following
* cases:
*
* - A write request is in progress.
* - A user process is in generic_file_write/nfs_update_page
* - A user process is in generic_file_read
*
* Also note that because of the way pages are invalidated in
* nfs_revalidate_inode, the following assertions hold:
*
* - If a page is dirty, there will be no read requests (a page will
* not be re-read unless invalidated by nfs_revalidate_inode).
* - If the page is not uptodate, there will be no pending write
* requests, and no process will be in nfs_update_page.
*
* FIXME: Interaction with the vmscan routines is not optimal yet.
* Either vmscan must be made nfs-savvy, or we need a different page
* reclaim concept that supports something like FS-independent
* buffer_heads with a b_ops-> field.
*
* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <asm/uaccess.h>
#include <linux/smp_lock.h>
#include "delegation.h"
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
#define MIN_POOL_WRITE (32)
#define MIN_POOL_COMMIT (4)
/*
* Local function declarations
*/
static struct nfs_page * nfs_update_request(struct nfs_open_context*,
struct inode *,
struct page *,
unsigned int, unsigned int);
static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how);
static const struct rpc_call_ops nfs_write_partial_ops;
static const struct rpc_call_ops nfs_write_full_ops;
static const struct rpc_call_ops nfs_commit_ops;
static kmem_cache_t *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
void nfs_commit_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
struct nfs_write_data *nfs_writedata_alloc(size_t len)
{
unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array;
else {
p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
if (!p->pagevec) {
mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
}
}
return p;
}
static void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_wdata_mempool);
}
void nfs_writedata_release(void *wdata)
{
nfs_writedata_free(wdata);
}
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
struct inode *inode = page->mapping->host;
loff_t end, i_size = i_size_read(inode);
unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
if (i_size > 0 && page->index < end_index)
return;
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
i_size_write(inode, end);
}
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
{
loff_t end_offs;
if (PageUptodate(page))
return;
if (base != 0)
return;
if (count == PAGE_CACHE_SIZE) {
SetPageUptodate(page);
return;
}
end_offs = i_size_read(page->mapping->host) - 1;
if (end_offs < 0)
return;
/* Is this the last page? */
if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
return;
/* This is the last page: set PG_uptodate if we cover the entire
* extent of the data, then zero the rest of the page.
*/
if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
SetPageUptodate(page);
}
}
/*
* Write a page synchronously.
* Offset is the data offset within the page.
*/
static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
struct page *page, unsigned
|