summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/lockd/svc4proc.c12
-rw-r--r--fs/lockd/svclock.c10
-rw-r--r--fs/lockd/svcproc.c5
-rw-r--r--fs/lockd/xdr4.c19
-rw-r--r--fs/nfsd/acl.h6
-rw-r--r--fs/nfsd/filecache.c749
-rw-r--r--fs/nfsd/filecache.h11
-rw-r--r--fs/nfsd/netns.h3
-rw-r--r--fs/nfsd/nfs2acl.c6
-rw-r--r--fs/nfsd/nfs3acl.c4
-rw-r--r--fs/nfsd/nfs3proc.c35
-rw-r--r--fs/nfsd/nfs4acl.c46
-rw-r--r--fs/nfsd/nfs4callback.c37
-rw-r--r--fs/nfsd/nfs4proc.c330
-rw-r--r--fs/nfsd/nfs4state.c127
-rw-r--r--fs/nfsd/nfs4xdr.c123
-rw-r--r--fs/nfsd/nfsctl.c21
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsfh.c27
-rw-r--r--fs/nfsd/nfsfh.h58
-rw-r--r--fs/nfsd/nfsproc.c27
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/trace.h327
-rw-r--r--fs/nfsd/vfs.c256
-rw-r--r--fs/nfsd/vfs.h33
-rw-r--r--fs/nfsd/xdr4.h60
26 files changed, 1403 insertions, 936 deletions
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 176b468a61c7..bf274f23969b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
if (!nlmsvc_ops)
return nlm_lck_denied_nolocks;
+ if (lock->lock_start > OFFSET_MAX ||
+ (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start))))
+ return nlm4_fbig;
+
/* Obtain host handle */
if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
|| (argp->monitor && nsm_monitor(host) < 0))
@@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
+ lock->fl.fl_start = (loff_t)lock->lock_start;
+ lock->fl.fl_end = lock->lock_len ?
+ (loff_t)(lock->lock_start + lock->lock_len - 1) :
+ OFFSET_MAX;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
if (!lock->fl.fl_owner) {
@@ -87,6 +95,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
struct nlm_args *argp = rqstp->rq_argp;
struct nlm_host *host;
struct nlm_file *file;
+ struct nlm_lockowner *test_owner;
__be32 rc = rpc_success;
dprintk("lockd: TEST4 called\n");
@@ -96,6 +105,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ test_owner = argp->lock.fl.fl_owner;
/* Now check for conflicting locks */
resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
@@ -103,7 +113,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
- nlmsvc_release_lockowner(&argp->lock);
+ nlmsvc_put_lockowner(test_owner);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index cb3658ab9b7a..9c1aa75441e1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
return lockowner;
}
-static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
@@ -590,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
int error;
int mode;
__be32 ret;
- struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
nlmsvc_file_inode(file)->i_sb->s_id,
@@ -604,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
- /* If there's a conflicting lock, remember to clean up the test lock */
- test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
-
mode = lock_to_openmode(&lock->fl);
error = vfs_test_lock(file->f_file[mode], &lock->fl);
if (error) {
@@ -635,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
- /* Clean up the test lock */
- lock->fl.fl_owner = NULL;
- nlmsvc_put_lockowner(test_owner);
-
ret = nlm_lck_denied;
out:
return ret;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4dc1b40a489a..b09ca35b527c 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -116,6 +116,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
struct nlm_args *argp = rqstp->rq_argp;
struct nlm_host *host;
struct nlm_file *file;
+ struct nlm_lockowner *test_owner;
__be32 rc = rpc_success;
dprintk("lockd: TEST called\n");
@@ -125,6 +126,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ test_owner = argp->lock.fl.fl_owner;
+
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply)
@@ -133,7 +136,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
- nlmsvc_release_lockowner(&argp->lock);
+ nlmsvc_put_lockowner(test_owner);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 856267c0864b..712fdfeb8ef0 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -20,13 +20,6 @@
#include "svcxdr.h"
-static inline loff_t
-s64_to_loff_t(__s64 offset)
-{
- return (loff_t)offset;
-}
-
-
static inline s64
loff_t_to_s64(loff_t offset)
{
@@ -70,8 +63,6 @@ static bool
svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
{
struct file_lock *fl = &lock->fl;
- u64 len, start;
- s64 end;
if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
return false;
@@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
return false;
if (xdr_stream_decode_u32(xdr, &lock->svid) < 0)
return false;
- if (xdr_stream_decode_u64(xdr, &start) < 0)
+ if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0)
return false;
- if (xdr_stream_decode_u64(xdr, &len) < 0)
+ if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0)
return false;
locks_init_lock(fl);
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK;
- end = start + len - 1;
- fl->fl_start = s64_to_loff_t(start);
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = s64_to_loff_t(end);
return true;
}
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index ba14d2f4b64f..4b7324458a94 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -38,6 +38,8 @@
struct nfs4_acl;
struct svc_fh;
struct svc_rqst;
+struct nfsd_attrs;
+enum nfs_ftype4;
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
@@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_acl **acl);
-__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfs4_acl *acl);
+__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
+ struct nfsd_attrs *attr);
#endif /* LINUX_NFS4_ACL_H */
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index a605c0e39b09..eeed4ae5b4ad 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -13,6 +13,7 @@
#include <linux/fsnotify_backend.h>
#include <linux/fsnotify.h>
#include <linux/seq_file.h>
+#include <linux/rhashtable.h>
#include "vfs.h"
#include "nfsd.h"
@@ -21,28 +22,19 @@
#include "filecache.h"
#include "trace.h"
-#define NFSDDBG_FACILITY NFSDDBG_FH
-
-/* FIXME: dynamically size this for the machine somehow? */
-#define NFSD_FILE_HASH_BITS 12
-#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
-#define NFSD_FILE_SHUTDOWN (1)
-#define NFSD_FILE_LRU_THRESHOLD (4096UL)
-#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
+#define NFSD_FILE_CACHE_UP (0)
/* We only care about NFSD_MAY_READ/WRITE for this cache */
#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
-struct nfsd_fcache_bucket {
- struct hlist_head nfb_head;
- spinlock_t nfb_lock;
- unsigned int nfb_count;
- unsigned int nfb_maxcount;
-};
-
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
struct nfsd_fcache_disposal {
struct work_struct work;
@@ -54,21 +46,146 @@ static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
-static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
static struct list_lru nfsd_file_lru;
-static long nfsd_file_lru_flags;
+static unsigned long nfsd_file_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
-static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
+static struct rhashtable nfsd_file_rhash_tbl
+ ____cacheline_aligned_in_smp;
+
+enum nfsd_file_lookup_type {
+ NFSD_FILE_KEY_INODE,
+ NFSD_FILE_KEY_FULL,
+};
+
+struct nfsd_file_lookup_key {
+ struct inode *inode;
+ struct net *net;
+ const struct cred *cred;
+ unsigned char need;
+ enum nfsd_file_lookup_type type;
+};
+
+/*
+ * The returned hash value is based solely on the address of an in-code
+ * inode, a pointer to a slab-allocated object. The entropy in such a
+ * pointer is concentrated in its middle bits.
+ */
+static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
+{
+ unsigned long ptr = (unsigned long)inode;
+ u32 k;
+
+ k = ptr >> L1_CACHE_SHIFT;
+ k &= 0x00ffffff;
+ return jhash2(&k, 1, seed);
+}
+
+/**
+ * nfsd_file_key_hashfn - Compute the hash value of a lookup key
+ * @data: key on which to compute the hash value
+ * @len: rhash table's key_len parameter (unused)
+ * @seed: rhash table's random seed of the day
+ *
+ * Return value:
+ * Computed 32-bit hash value
+ */
+static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nfsd_file_lookup_key *key = data;
+
+ return nfsd_file_inode_hash(key->inode, seed);
+}
+
+/**
+ * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
+ * @data: object on which to compute the hash value
+ * @len: rhash table's key_len parameter (unused)
+ * @seed: rhash table's random seed of the day
+ *
+ * Return value:
+ * Computed 32-bit hash value
+ */
+static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nfsd_file *nf = data;
+
+ return nfsd_file_inode_hash(nf->nf_inode, seed);
+}
-static void nfsd_file_gc(void);
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+ int i;
+
+ if (!uid_eq(c1->fsuid, c2->fsuid))
+ return false;
+ if (!gid_eq(c1->fsgid, c2->fsgid))
+ return false;
+ if (c1->group_info == NULL || c2->group_info == NULL)
+ return c1->group_info == c2->group_info;
+ if (c1->group_info->ngroups != c2->group_info->ngroups)
+ return false;
+ for (i = 0; i < c1->group_info->ngroups; i++) {
+ if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+ return false;
+ }
+ return true;
+}
+
+/**
+ * nfsd_file_obj_cmpfn - Match a cache item against search criteria
+ * @arg: search criteria
+ * @ptr: cache item to check
+ *
+ * Return values:
+ * %0 - Item matches search criteria
+ * %1 - Item does not match search criteria
+ */
+static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nfsd_file_lookup_key *key = arg->key;
+ const struct nfsd_file *nf = ptr;
+
+ switch (key->type) {
+ case NFSD_FILE_KEY_INODE:
+ if (nf->nf_inode != key->inode)
+ return 1;
+ break;
+ case NFSD_FILE_KEY_FULL:
+ if (nf->nf_inode != key->inode)
+ return 1;
+ if (nf->nf_may != key->need)
+ return 1;
+ if (nf->nf_net != key->net)
+ return 1;
+ if (!nfsd_match_cred(nf->nf_cred, key->cred))
+ return 1;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
+static const struct rhashtable_params nfsd_file_rhash_params = {
+ .key_len = sizeof_field(struct nfsd_file, nf_inode),
+ .key_offset = offsetof(struct nfsd_file, nf_inode),
+ .head_offset = offsetof(struct nfsd_file, nf_rhash),
+ .hashfn = nfsd_file_key_hashfn,
+ .obj_hashfn = nfsd_file_obj_hashfn,
+ .obj_cmpfn = nfsd_file_obj_cmpfn,
+ /* Reduce resizing churn on light workloads */
+ .min_size = 512, /* buckets */
+ .automatic_shrinking = true,
+};
static void
nfsd_file_schedule_laundrette(void)
{
- long count = atomic_long_read(&nfsd_filecache_count);
-
- if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+ if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
+ test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
return;
queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
@@ -111,12 +228,11 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm)
}
static struct nfsd_file_mark *
-nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
{
int err;
struct fsnotify_mark *mark;
struct nfsd_file_mark *nfm = NULL, *new;
- struct inode *inode = nf->nf_inode;
do {
fsnotify_group_lock(nfsd_file_fsnotify_group);
@@ -167,31 +283,25 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
}
static struct nfsd_file *
-nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
- struct net *net)
+nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
{
struct nfsd_file *nf;
nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
if (nf) {
- INIT_HLIST_NODE(&nf->nf_node);
INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_birthtime = ktime_get();
nf->nf_file = NULL;
nf->nf_cred = get_current_cred();
- nf->nf_net = net;
+ nf->nf_net = key->net;
nf->nf_flags = 0;
- nf->nf_inode = inode;
- nf->nf_hashval = hashval;
- refcount_set(&nf->nf_ref, 1);
- nf->nf_may = may & NFSD_FILE_MAY_MASK;
- if (may & NFSD_MAY_NOT_BREAK_LEASE) {
- if (may & NFSD_MAY_WRITE)
- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
- if (may & NFSD_MAY_READ)
- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
- }
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ nf->nf_inode = key->inode;
+ /* nf_ref is pre-incremented for hash table */
+ refcount_set(&nf->nf_ref, 2);
+ nf->nf_may = key->need;
nf->nf_mark = NULL;
- trace_nfsd_file_alloc(nf);
}
return nf;
}
@@ -199,8 +309,12 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
static bool
nfsd_file_free(struct nfsd_file *nf)
{
+ s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
bool flush = false;
+ this_cpu_inc(nfsd_file_releases);
+ this_cpu_add(nfsd_file_total_age, age);
+
trace_nfsd_file_put_final(nf);
if (nf->nf_mark)
nfsd_file_mark_put(nf->nf_mark);
@@ -210,6 +324,14 @@ nfsd_file_free(struct nfsd_file *nf)
fput(nf->nf_file);
flush = true;
}
+
+ /*
+ * If this item is still linked via nf_lru, that's a bug.
+ * WARN and leak it to preserve system stability.
+ */
+ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
+ return flush;
+
call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
return flush;
}
@@ -240,31 +362,44 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
static void
nfsd_file_flush(struct nfsd_file *nf)
{
- if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
+ struct file *file = nf->nf_file;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return;
+ this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
+ if (vfs_fsync(file, 1) != 0)
nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
}
-static void
-nfsd_file_do_unhash(struct nfsd_file *nf)
+static void nfsd_file_lru_add(struct nfsd_file *nf)
{
- lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
+ trace_nfsd_file_lru_add(nf);
+}
+static void nfsd_file_lru_remove(struct nfsd_file *nf)
+{
+ if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
+ trace_nfsd_file_lru_del(nf);
+}
+
+static void
+nfsd_file_hash_remove(struct nfsd_file *nf)
+{
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
- --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
- hlist_del_rcu(&nf->nf_node);
- atomic_long_dec(&nfsd_filecache_count);
+ rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+ nfsd_file_rhash_params);
}
static bool
nfsd_file_unhash(struct nfsd_file *nf)
{
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- nfsd_file_do_unhash(nf);
- if (!list_empty(&nf->nf_lru))
- list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+ nfsd_file_hash_remove(nf);
return true;
}
return false;
@@ -274,17 +409,16 @@ nfsd_file_unhash(struct nfsd_file *nf)
* Return true if the file was unhashed.
*/
static bool
-nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
{
- lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
-
- trace_nfsd_file_unhash_and_release_locked(nf);
+ trace_nfsd_file_unhash_and_dispose(nf);
if (!nfsd_file_unhash(nf))
return false;
/* keep final reference for nfsd_file_lru_dispose */
if (refcount_dec_not_one(&nf->nf_ref))
return true;
+ nfsd_file_lru_remove(nf);
list_add(&nf->nf_lru, dispose);
return true;
}
@@ -296,6 +430,7 @@ nfsd_file_put_noref(struct nfsd_file *nf)
if (refcount_dec_and_test(&nf->nf_ref)) {
WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+ nfsd_file_lru_remove(nf);
nfsd_file_free(nf);
}
}
@@ -305,7 +440,7 @@ nfsd_file_put(struct nfsd_file *nf)
{
might_sleep();
- set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ nfsd_file_lru_add(nf);
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
@@ -314,9 +449,24 @@ nfsd_file_put(struct nfsd_file *nf)
nfsd_file_schedule_laundrette();
} else
nfsd_file_put_noref(nf);
+}
- if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
- nfsd_file_gc();
+/**
+ * nfsd_file_close - Close an nfsd_file
+ * @nf: nfsd_file to close
+ *
+ * If this is the final reference for @nf, free it immediately.
+ * This reflects an on-the-wire CLOSE or DELEGRETURN into the
+ * VFS and exported filesystem.
+ */
+void nfsd_file_close(struct nfsd_file *nf)
+{
+ nfsd_file_put(nf);
+ if (refcount_dec_if_one(&nf->nf_ref)) {
+ nfsd_file_unhash(nf);
+ nfsd_file_lru_remove(nf);
+ nfsd_file_free(nf);
+ }
}
struct nfsd_file *
@@ -334,7 +484,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
- list_del(&nf->nf_lru);
+ list_del_init(&nf->nf_lru);
nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
}
@@ -348,7 +498,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
- list_del(&nf->nf_lru);
+ list_del_init(&nf->nf_lru);
nfsd_file_flush(nf);
if (!refcount_dec_and_test(&nf->nf_ref))
continue;
@@ -405,8 +555,19 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
}
}
-/*
+/**
+ * nfsd_file_lru_cb - Examine an entry on the LRU list
+ * @item: LRU entry to examine
+ * @lru: controlling LRU
+ * @lock: LRU list lock (unused)
+ * @arg: dispose list
+ *
* Note this can deadlock with nfsd_file_cache_purge.
+ *
+ * Return values:
+ * %LRU_REMOVED: @item was removed from the LRU
+ * %LRU_ROTATE: @item is to be moved to the LRU tail
+ * %LRU_SKIP: @item cannot be evicted
*/
static enum lru_status
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
@@ -427,55 +588,65 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
* counter. Here we check the counter and then test and clear the flag.
* That order is deliberate to ensure that we can do this locklessly.
*/
- if (refcount_read(&nf->nf_ref) > 1)
- goto out_skip;
+ if (refcount_read(&nf->nf_ref) > 1) {
+ list_lru_isolate(lru, &nf->nf_lru);
+ trace_nfsd_file_gc_in_use(nf);
+ return LRU_REMOVED;
+ }
/*
* Don't throw out files that are still undergoing I/O or
* that have uncleared errors pending.
*/
- if (nfsd_file_check_writeback(nf))
- goto out_skip;
+ if (nfsd_file_check_writeback(nf)) {
+ trace_nfsd_file_gc_writeback(nf);
+ return LRU_SKIP;
+ }
- if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
- goto out_skip;
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
+ trace_nfsd_file_gc_referenced(nf);
+ return LRU_ROTATE;
+ }
- if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
- goto out_skip;
+ if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ trace_nfsd_file_gc_hashed(nf);
+ return LRU_SKIP;
+ }
list_lru_isolate_move(lru, &nf->nf_lru, head);
+ this_cpu_inc(nfsd_file_evictions);
+ trace_nfsd_file_gc_disposed(nf);
return LRU_REMOVED;
-out_skip:
- return LRU_SKIP;
}
-static unsigned long
-nfsd_file_lru_walk_list(struct shrink_control *sc)
+/*
+ * Unhash items on @dispose immediately, then queue them on the
+ * disposal workqueue to finish releasing them in the background.
+ *
+ * cel: Note that between the time list_lru_shrink_walk runs and
+ * now, these items are in the hash table but marked unhashed.
+ * Why release these outside of lru_cb ? There's no lock ordering
+ * problem since lru_cb currently takes no lock.
+ */
+static void nfsd_file_gc_dispose_list(struct list_head *dispose)
{
- LIST_HEAD(head);
struct nfsd_file *nf;
- unsigned long ret;
- if (sc)
- ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
- nfsd_file_lru_cb, &head);
- else
- ret = list_lru_walk(&nfsd_file_lru,
- nfsd_file_lru_cb,
- &head, LONG_MAX);
- list_for_each_entry(nf, &head, nf_lru) {
- spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
- nfsd_file_do_unhash(nf);
- spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
- }
- nfsd_file_dispose_list_delayed(&head);
- return ret;
+ list_for_each_entry(nf, dispose, nf_lru)
+ nfsd_file_hash_remove(nf);
+ nfsd_file_dispose_list_delayed(dispose);
}
static void
nfsd_file_gc(void)
{
- nfsd_file_lru_walk_list(NULL);
+ LIST_HEAD(dispose);
+ unsigned long ret;
+
+ ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
+ &dispose, list_lru_count(&nfsd_file_lru));
+ trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_gc_dispose_list(&dispose);
}
static void
@@ -494,7 +665,14 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
static unsigned long
nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
{
- return nfsd_file_lru_walk_list(sc);
+ LIST_HEAD(dispose);
+ unsigned long ret;
+
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &dispose);
+ trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_gc_dispose_list(&dispose);
+ return ret;
}
static struct shrinker nfsd_file_shrinker = {
@@ -503,39 +681,47 @@ static struct shrinker nfsd_file_shrinker = {
.seeks = 1,
};
-static void
-__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
- struct list_head *dispose)
+/*
+ * Find all cache items across all net namespaces that match @inode and
+ * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
+ */
+static unsigned int
+__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
{
- struct nfsd_file *nf;
- struct hlist_node *tmp;
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
+ };
+ unsigned int count = 0;
+ struct nfsd_file *nf;
- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
- hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
- if (inode == nf->nf_inode)
- nfsd_file_unhash_and_release_locked(nf, dispose);
- }
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ rcu_read_lock();
+ do {
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+ if (!nf)
+ break;
+ nfsd_file_unhash_and_dispose(nf, dispose);
+ count++;
+ } while (1);
+ rcu_read_unlock();
+ return count;
}
/**
* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
* @inode: inode of the file to attempt to remove
*
- * Walk the whole hash bucket, looking for any files that correspond to "inode".
- * If any do, then unhash them and put the hashtable reference to them and
- * destroy any that had their last reference put. Also ensure that any of the
- * fputs also have their final __fput done as well.
+ * Unhash and put, then flush and fput all cache items associated with @inode.
*/
void
nfsd_file_close_inode_sync(struct inode *inode)
{
- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
- NFSD_FILE_HASH_BITS);
LIST_HEAD(dispose);
+ unsigned int count;
- __nfsd_file_close_inode(inode, hashval, &dispose);
- trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+ count = __nfsd_file_close_inode(inode, &dispose);
+ trace_nfsd_file_close_inode_sync(inode, count);
nfsd_file_dispose_list_sync(&dispose);
}
@@ -543,19 +729,16 @@ nfsd_file_close_inode_sync(struct inode *inode)
* nfsd_file_close_inode - attempt a delayed close of a nfsd_file
* @inode: inode of the file to attempt to remove
*
- * Walk the whole hash bucket, looking for any files that correspond to "inode".
- * If any do, then unhash them and put the hashtable reference to them and
- * destroy any that had their last reference put.
+ * Unhash and put all cache item associated with @inode.
*/
static void
nfsd_file_close_inode(struct inode *inode)
{
- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
- NFSD_FILE_HASH_BITS);
LIST_HEAD(dispose);
+ unsigned int count;
- __nfsd_file_close_inode(inode, hashval, &dispose);
- trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+ count = __nfsd_file_close_inode(inode, &dispose);
+ trace_nfsd_file_close_inode(inode, count);
nfsd_file_dispose_list_delayed(&dispose);
}
@@ -630,25 +813,21 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
int
nfsd_file_cache_init(void)
{
- int ret = -ENOMEM;
- unsigned int i;
+ int ret;
- clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
-
- if (nfsd_file_hashtbl)
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
return 0;
+ ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
if (!nfsd_filecache_wq)
goto out;
- nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
- sizeof(*nfs