diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/lockd/svc4proc.c | 12 | ||||
| -rw-r--r-- | fs/lockd/svclock.c | 10 | ||||
| -rw-r--r-- | fs/lockd/svcproc.c | 5 | ||||
| -rw-r--r-- | fs/lockd/xdr4.c | 19 | ||||
| -rw-r--r-- | fs/nfsd/acl.h | 6 | ||||
| -rw-r--r-- | fs/nfsd/filecache.c | 749 | ||||
| -rw-r--r-- | fs/nfsd/filecache.h | 11 | ||||
| -rw-r--r-- | fs/nfsd/netns.h | 3 | ||||
| -rw-r--r-- | fs/nfsd/nfs2acl.c | 6 | ||||
| -rw-r--r-- | fs/nfsd/nfs3acl.c | 4 | ||||
| -rw-r--r-- | fs/nfsd/nfs3proc.c | 35 | ||||
| -rw-r--r-- | fs/nfsd/nfs4acl.c | 46 | ||||
| -rw-r--r-- | fs/nfsd/nfs4callback.c | 37 | ||||
| -rw-r--r-- | fs/nfsd/nfs4proc.c | 330 | ||||
| -rw-r--r-- | fs/nfsd/nfs4state.c | 127 | ||||
| -rw-r--r-- | fs/nfsd/nfs4xdr.c | 123 | ||||
| -rw-r--r-- | fs/nfsd/nfsctl.c | 21 | ||||
| -rw-r--r-- | fs/nfsd/nfsd.h | 6 | ||||
| -rw-r--r-- | fs/nfsd/nfsfh.c | 27 | ||||
| -rw-r--r-- | fs/nfsd/nfsfh.h | 58 | ||||
| -rw-r--r-- | fs/nfsd/nfsproc.c | 27 | ||||
| -rw-r--r-- | fs/nfsd/state.h | 1 | ||||
| -rw-r--r-- | fs/nfsd/trace.h | 327 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 256 | ||||
| -rw-r--r-- | fs/nfsd/vfs.h | 33 | ||||
| -rw-r--r-- | fs/nfsd/xdr4.h | 60 |
26 files changed, 1403 insertions, 936 deletions
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 176b468a61c7..bf274f23969b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (!nlmsvc_ops) return nlm_lck_denied_nolocks; + if (lock->lock_start > OFFSET_MAX || + (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) + return nlm4_fbig; + /* Obtain host handle */ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) || (argp->monitor && nsm_monitor(host) < 0)) @@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; + lock->fl.fl_start = (loff_t)lock->lock_start; + lock->fl.fl_end = lock->lock_len ? + (loff_t)(lock->lock_start + lock->lock_len - 1) : + OFFSET_MAX; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); if (!lock->fl.fl_owner) { @@ -87,6 +95,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -96,6 +105,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) @@ -103,7 +113,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cb3658ab9b7a..9c1aa75441e1 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) return lockowner; } -static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -590,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, int error; int mode; __be32 ret; - struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", nlmsvc_file_inode(file)->i_sb->s_id, @@ -604,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - /* If there's a conflicting lock, remember to clean up the test lock */ - test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; - mode = lock_to_openmode(&lock->fl); error = vfs_test_lock(file->f_file[mode], &lock->fl); if (error) { @@ -635,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); - /* Clean up the test lock */ - lock->fl.fl_owner = NULL; - nlmsvc_put_lockowner(test_owner); - ret = nlm_lck_denied; out: return ret; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 4dc1b40a489a..b09ca35b527c 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -116,6 +116,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -125,6 +126,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; + /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) @@ -133,7 +136,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 856267c0864b..712fdfeb8ef0 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -20,13 +20,6 @@ #include "svcxdr.h" -static inline loff_t -s64_to_loff_t(__s64 offset) -{ - return (loff_t)offset; -} - - static inline s64 loff_t_to_s64(loff_t offset) { @@ -70,8 +63,6 @@ static bool svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - u64 len, start; - s64 end; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) return false; @@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) return false; if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) return false; - if (xdr_stream_decode_u64(xdr, &start) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) return false; - if (xdr_stream_decode_u64(xdr, &len) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) return false; locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - end = start + len - 1; - fl->fl_start = s64_to_loff_t(start); - if (len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = s64_to_loff_t(end); return true; } diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index ba14d2f4b64f..4b7324458a94 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,6 +38,8 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; +struct nfsd_attrs; +enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl); +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a605c0e39b09..eeed4ae5b4ad 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -13,6 +13,7 @@ #include <linux/fsnotify_backend.h> #include <linux/fsnotify.h> #include <linux/seq_file.h> +#include <linux/rhashtable.h> #include "vfs.h" #include "nfsd.h" @@ -21,28 +22,19 @@ #include "filecache.h" #include "trace.h" -#define NFSDDBG_FACILITY NFSDDBG_FH - -/* FIXME: dynamically size this for the machine somehow? */ -#define NFSD_FILE_HASH_BITS 12 -#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_SHUTDOWN (1) -#define NFSD_FILE_LRU_THRESHOLD (4096UL) -#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) +#define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) -struct nfsd_fcache_bucket { - struct hlist_head nfb_head; - spinlock_t nfb_lock; - unsigned int nfb_count; - unsigned int nfb_maxcount; -}; - static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); +static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { struct work_struct work; @@ -54,21 +46,146 @@ static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; -static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static long nfsd_file_lru_flags; +static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static struct rhashtable nfsd_file_rhash_tbl + ____cacheline_aligned_in_smp; + +enum nfsd_file_lookup_type { + NFSD_FILE_KEY_INODE, + NFSD_FILE_KEY_FULL, +}; + +struct nfsd_file_lookup_key { + struct inode *inode; + struct net *net; + const struct cred *cred; + unsigned char need; + enum nfsd_file_lookup_type type; +}; + +/* + * The returned hash value is based solely on the address of an in-code + * inode, a pointer to a slab-allocated object. The entropy in such a + * pointer is concentrated in its middle bits. + */ +static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) +{ + unsigned long ptr = (unsigned long)inode; + u32 k; + + k = ptr >> L1_CACHE_SHIFT; + k &= 0x00ffffff; + return jhash2(&k, 1, seed); +} + +/** + * nfsd_file_key_hashfn - Compute the hash value of a lookup key + * @data: key on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file_lookup_key *key = data; + + return nfsd_file_inode_hash(key->inode, seed); +} + +/** + * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file + * @data: object on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file *nf = data; + + return nfsd_file_inode_hash(nf->nf_inode, seed); +} -static void nfsd_file_gc(void); +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; + } + return true; +} + +/** + * nfsd_file_obj_cmpfn - Match a cache item against search criteria + * @arg: search criteria + * @ptr: cache item to check + * + * Return values: + * %0 - Item matches search criteria + * %1 - Item does not match search criteria + */ +static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nfsd_file_lookup_key *key = arg->key; + const struct nfsd_file *nf = ptr; + + switch (key->type) { + case NFSD_FILE_KEY_INODE: + if (nf->nf_inode != key->inode) + return 1; + break; + case NFSD_FILE_KEY_FULL: + if (nf->nf_inode != key->inode) + return 1; + if (nf->nf_may != key->need) + return 1; + if (nf->nf_net != key->net) + return 1; + if (!nfsd_match_cred(nf->nf_cred, key->cred)) + return 1; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + return 1; + break; + } + return 0; +} + +static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), + .head_offset = offsetof(struct nfsd_file, nf_rhash), + .hashfn = nfsd_file_key_hashfn, + .obj_hashfn = nfsd_file_obj_hashfn, + .obj_cmpfn = nfsd_file_obj_cmpfn, + /* Reduce resizing churn on light workloads */ + .min_size = 512, /* buckets */ + .automatic_shrinking = true, +}; static void nfsd_file_schedule_laundrette(void) { - long count = atomic_long_read(&nfsd_filecache_count); - - if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) + if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || + test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; queue_delayed_work(system_wq, &nfsd_filecache_laundrette, @@ -111,12 +228,11 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf) +nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; - struct inode *inode = nf->nf_inode; do { fsnotify_group_lock(nfsd_file_fsnotify_group); @@ -167,31 +283,25 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - struct net *net) +nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); if (nf) { - INIT_HLIST_NODE(&nf->nf_node); INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); - nf->nf_net = net; + nf->nf_net = key->net; nf->nf_flags = 0; - nf->nf_inode = inode; - nf->nf_hashval = hashval; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; - if (may & NFSD_MAY_NOT_BREAK_LEASE) { - if (may & NFSD_MAY_WRITE) - __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); - if (may & NFSD_MAY_READ) - __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - } + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + nf->nf_inode = key->inode; + /* nf_ref is pre-incremented for hash table */ + refcount_set(&nf->nf_ref, 2); + nf->nf_may = key->need; nf->nf_mark = NULL; - trace_nfsd_file_alloc(nf); } return nf; } @@ -199,8 +309,12 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, static bool nfsd_file_free(struct nfsd_file *nf) { + s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; + this_cpu_inc(nfsd_file_releases); + this_cpu_add(nfsd_file_total_age, age); + trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); @@ -210,6 +324,14 @@ nfsd_file_free(struct nfsd_file *nf) fput(nf->nf_file); flush = true; } + + /* + * If this item is still linked via nf_lru, that's a bug. + * WARN and leak it to preserve system stability. + */ + if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) + return flush; + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); return flush; } @@ -240,31 +362,44 @@ nfsd_file_check_write_error(struct nfsd_file *nf) static void nfsd_file_flush(struct nfsd_file *nf) { - if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); + if (vfs_fsync(file, 1) != 0) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } -static void -nfsd_file_do_unhash(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_add(nf); +} +static void nfsd_file_lru_remove(struct nfsd_file *nf) +{ + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_del(nf); +} + +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_hash_remove(nf); return true; } return false; @@ -274,17 +409,16 @@ nfsd_file_unhash(struct nfsd_file *nf) * Return true if the file was unhashed. */ static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - - trace_nfsd_file_unhash_and_release_locked(nf); + trace_nfsd_file_unhash_and_dispose(nf); if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ if (refcount_dec_not_one(&nf->nf_ref)) return true; + nfsd_file_lru_remove(nf); list_add(&nf->nf_lru, dispose); return true; } @@ -296,6 +430,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_lru_remove(nf); nfsd_file_free(nf); } } @@ -305,7 +440,7 @@ nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + nfsd_file_lru_add(nf); if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); @@ -314,9 +449,24 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_schedule_laundrette(); } else nfsd_file_put_noref(nf); +} - if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) - nfsd_file_gc(); +/** + * nfsd_file_close - Close an nfsd_file + * @nf: nfsd_file to close + * + * If this is the final reference for @nf, free it immediately. + * This reflects an on-the-wire CLOSE or DELEGRETURN into the + * VFS and exported filesystem. + */ +void nfsd_file_close(struct nfsd_file *nf) +{ + nfsd_file_put(nf); + if (refcount_dec_if_one(&nf->nf_ref)) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + nfsd_file_free(nf); + } } struct nfsd_file * @@ -334,7 +484,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); nfsd_file_put_noref(nf); } @@ -348,7 +498,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; @@ -405,8 +555,19 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) } } -/* +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * * Note this can deadlock with nfsd_file_cache_purge. + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_ROTATE: @item is to be moved to the LRU tail + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -427,55 +588,65 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + if (refcount_read(&nf->nf_ref) > 1) { + list_lru_isolate(lru, &nf->nf_lru); + trace_nfsd_file_gc_in_use(nf); + return LRU_REMOVED; + } /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_ROTATE; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_gc_hashed(nf); + return LRU_SKIP; + } list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; } -static unsigned long -nfsd_file_lru_walk_list(struct shrink_control *sc) +/* + * Unhash items on @dispose immediately, then queue them on the + * disposal workqueue to finish releasing them in the background. + * + * cel: Note that between the time list_lru_shrink_walk runs and + * now, these items are in the hash table but marked unhashed. + * Why release these outside of lru_cb ? There's no lock ordering + * problem since lru_cb currently takes no lock. + */ +static void nfsd_file_gc_dispose_list(struct list_head *dispose) { - LIST_HEAD(head); struct nfsd_file *nf; - unsigned long ret; - if (sc) - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &head); - else - ret = list_lru_walk(&nfsd_file_lru, - nfsd_file_lru_cb, - &head, LONG_MAX); - list_for_each_entry(nf, &head, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } - nfsd_file_dispose_list_delayed(&head); - return ret; + list_for_each_entry(nf, dispose, nf_lru) + nfsd_file_hash_remove(nf); + nfsd_file_dispose_list_delayed(dispose); } static void nfsd_file_gc(void) { - nfsd_file_lru_walk_list(NULL); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, list_lru_count(&nfsd_file_lru)); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); } static void @@ -494,7 +665,14 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - return nfsd_file_lru_walk_list(sc); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); + return ret; } static struct shrinker nfsd_file_shrinker = { @@ -503,39 +681,47 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +/* + * Find all cache items across all net namespaces that match @inode and + * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). + */ +static unsigned int +__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { - struct nfsd_file *nf; - struct hlist_node *tmp; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + unsigned int count = 0; + struct nfsd_file *nf; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) - nfsd_file_unhash_and_release_locked(nf, dispose); - } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rcu_read_lock(); + do { + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; + nfsd_file_unhash_and_dispose(nf, dispose); + count++; + } while (1); + rcu_read_unlock(); + return count; } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Unhash and put, then flush and fput all cache items associated with @inode. */ void nfsd_file_close_inode_sync(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode_sync(inode, count); nfsd_file_dispose_list_sync(&dispose); } @@ -543,19 +729,16 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Unhash and put all cache item associated with @inode. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode(inode, count); nfsd_file_dispose_list_delayed(&dispose); } @@ -630,25 +813,21 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret = -ENOMEM; - unsigned int i; + int ret; - clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); - - if (nfsd_file_hashtbl) + lockdep_assert_held(&nfsd_mutex); + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; + ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + if (ret) + return ret; + + ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfs |
