diff options
57 files changed, 2481 insertions, 2039 deletions
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index f033f3a69a3b..07b839560576 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -93,7 +93,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags) return nfs4_do_check_delegation(inode, flags, false); } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -108,7 +108,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file) != ctx) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); @@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode, int err; again: - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -147,15 +147,16 @@ again: continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; - get_nfs_open_context(ctx); - spin_unlock(&inode->i_lock); + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) - err = nfs_delegation_claim_locks(ctx, state, stateid); + err = nfs_delegation_claim_locks(state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); @@ -164,7 +165,7 @@ again: return err; goto again; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8bfaa658b2c1..71b2e390becf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } +static int +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, + struct inode *inode, int error) +{ + switch (error) { + case 1: + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", + __func__, dentry); + return 1; + case 0: + nfs_mark_for_revalidate(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ + nfs_zap_caches(inode); + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (IS_ROOT(dentry)) + return 1; + } + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", + __func__, dentry); + return 0; + } + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", + __func__, dentry, error); + return error; +} + +static int +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int ret = 1; + if (nfs_neg_need_reval(dir, dentry, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; + ret = 0; + } + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret); +} + +static int +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); +} + +static int +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + struct nfs_fh *fhandle; + struct nfs_fattr *fattr; + struct nfs4_label *label; + int ret; + + ret = -ENOMEM; + fhandle = nfs_alloc_fhandle(); + fattr = nfs_alloc_fattr(); + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (fhandle == NULL || fattr == NULL || IS_ERR(label)) + goto out; + + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + if (ret < 0) { + if (ret == -ESTALE || ret == -ENOENT) + ret = 0; + goto out; + } + ret = 0; + if (nfs_compare_fh(NFS_FH(inode), fhandle)) + goto out; + if (nfs_refresh_inode(inode, fattr) < 0) + goto out; + + nfs_setsecurity(inode, fattr, label); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + + /* set a readdirplus hint that we had a cache miss */ + nfs_force_use_readdirplus(dir); + ret = 1; +out: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); + nfs4_label_free(label); + return nfs_lookup_revalidate_done(dir, dentry, inode, ret); +} + /* * This is called every time the dcache has a lookup hit, * and we should check whether we can really trust that @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, + unsigned int flags) { - struct inode *dir; struct inode *inode; - struct dentry *parent; - struct nfs_fh *fhandle = NULL; - struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; int error; - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - } nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = d_inode(dentry); - if (!inode) { - if (nfs_neg_need_reval(dir, dentry, flags)) { - if (flags & LOOKUP_RCU) - return -ECHILD; - goto out_bad; - } - goto out_valid; - } + if (!inode) + return nfs_lookup_revalidate_negative(dir, dentry, flags); if (is_bad_inode(inode)) { - if (flags & LOOKUP_RCU) - return -ECHILD; dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); goto out_bad; } if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) - goto out_set_verifier; + return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* Force a full look up iff the parent directory has changed */ if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { error = nfs_lookup_verify_inode(inode, flags); if (error) { - if (flags & LOOKUP_RCU) - return -ECHILD; if (error == -ESTALE) - goto out_zap_parent; - goto out_error; + nfs_zap_caches(dir); + goto out_bad; } nfs_advise_use_readdirplus(dir); goto out_valid; @@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (NFS_STALE(inode)) goto out_bad; - error = -ENOMEM; - fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - if (fhandle == NULL || fattr == NULL) - goto out_error; - - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(label)) - goto out_error; - trace_nfs_lookup_revalidate_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + error = nfs_lookup_revalidate_dentry(dir, dentry, inode); trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); - if (error == -ESTALE || error == -ENOENT) - goto out_bad; - if (error) - goto out_error; - if (nfs_compare_fh(NFS_FH(inode), fhandle)) - goto out_bad; - if ((error = nfs_refresh_inode(inode, fattr)) != 0) - goto out_bad; - - nfs_setsecurity(inode, fattr, label); - - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); + return error; +out_valid: + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); +out_bad: + if (flags & LOOKUP_RCU) + return -ECHILD; + return nfs_lookup_revalidate_done(dir, dentry, inode, 0); +} - /* set a readdirplus hint that we had a cache miss */ - nfs_force_use_readdirplus(dir); +static int +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, + int (*reval)(struct inode *, struct dentry *, unsigned int)) +{ + struct dentry *parent; + struct inode *dir; + int ret; -out_set_verifier: - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - out_valid: if (flags & LOOKUP_RCU) { + parent = READ_ONCE(dentry->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + ret = reval(dir, dentry, flags); if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; - } else + } else { + parent = dget_parent(dentry); + ret = reval(d_inode(parent), dentry, flags); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", - __func__, dentry); - return 1; -out_zap_parent: - nfs_zap_caches(dir); - out_bad: - WARN_ON(flags & LOOKUP_RCU); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); - nfs_mark_for_revalidate(dir); - if (inode && S_ISDIR(inode->i_mode)) { - /* Purge readdir caches. */ - nfs_zap_caches(inode); - /* - * We can't d_drop the root of a disconnected tree: - * its d_hash is on the s_anon list and d_drop() would hide - * it from shrink_dcache_for_unmount(), leading to busy - * inodes on unmount and further oopses. - */ - if (IS_ROOT(dentry)) - goto out_valid; } - dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", - __func__, dentry); - return 0; -out_error: - WARN_ON(flags & LOOKUP_RCU); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); - dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", - __func__, dentry, error); - return error; + return ret; +} + +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +{ + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); } /* @@ -1579,62 +1615,55 @@ no_open: } EXPORT_SYMBOL_GPL(nfs_atomic_open); -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, + unsigned int flags) { struct inode *inode; - int ret = 0; if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) - goto no_open; + goto full_reval; if (d_mountpoint(dentry)) - goto no_open; - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) - goto no_open; + goto full_reval; inode = d_inode(dentry); /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ - if (inode == NULL) { - struct dentry *parent; - struct inode *dir; - - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - } - if (!nfs_neg_need_reval(dir, dentry, flags)) - ret = 1; - else if (flags & LOOKUP_RCU) - ret = -ECHILD; - if (!(flags & LOOKUP_RCU)) - dput(parent); - else if (parent != READ_ONCE(dentry->d_parent)) - return -ECHILD; - goto out; - } + if (inode == NULL) + goto full_reval; + + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) + return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) - goto no_open; + goto full_reval; + /* We cannot do exclusive creation on a positive dentry */ - if (flags & LOOKUP_EXCL) - goto no_open; + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL)) + goto reval_dentry; + + /* Check if the directory changed */ + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) + goto reval_dentry; /* Let f_op->open() actually open (and revalidate) the file */ - ret = 1; + return 1; +reval_dentry: + if (flags & LOOKUP_RCU) + return -ECHILD; + return nfs_lookup_revalidate_dentry(dir, dentry, inode);; -out: - return ret; +full_reval: + return nfs_do_lookup_revalidate(dir, dentry, flags); +} -no_open: - return nfs_lookup_revalidate(dentry, flags); +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) +{ + return __nfs_lookup_revalidate(dentry, flags, + nfs4_do_lookup_revalidate); } #endif /* CONFIG_NFSV4 */ diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d175724ff566..61f46facb39c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", .owner = THIS_MODULE, + .max_layoutget_response = 4096, /* 1 page or so... */ .alloc_layout_hdr = filelayout_alloc_layout_hdr, .free_layout_hdr = filelayout_free_layout_hdr, .alloc_lseg = filelayout_alloc_lseg, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index cae43333ef16..86bcba40ca61 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2356,6 +2356,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .name = "LAYOUT_FLEX_FILES", .owner = THIS_MODULE, .flags = PNFS_LAYOUTGET_ON_OPEN, + .max_layoutget_response = 4096, /* 1 page or so... */ .set_layoutdriver = ff_layout_set_layoutdriver, .alloc_layout_hdr = ff_layout_alloc_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 59aa04976331..74d8d5352438 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -453,7 +453,7 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); struct rpc_cred *cred; - if (mirror) { + if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); if (!cred) cred = get_rpccred(mdscred); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b65aee481d13..5b1eee4952b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *head = &ctx->lock_context; - struct nfs_lock_context *pos = head; + struct nfs_lock_context *pos; - do { + list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) { if (pos->lockowner != current->files) continue; - refcount_inc(&pos->count); - return pos; - } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); + if (refcount_inc_not_zero(&pos->count)) + return pos; + } return NULL; } @@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) struct nfs_lock_context *res, *new = NULL; struct inode *inode = d_inode(ctx->dentry); - spin_lock(&inode->i_lock); + rcu_read_lock(); res = __nfs_find_lock_context(ctx); + rcu_read_unlock(); if (res == NULL) { - spin_unlock(&inode->i_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) return ERR_PTR(-ENOMEM); @@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); if (res == NULL) { - list_add_tail(&new->list, &ctx->lock_context.list); + list_add_tail_rcu(&new->list, &ctx->lock_context.list); new->open_context = ctx; res = new; new = NULL; } + spin_unlock(&inode->i_lock); + kfree(new); } - spin_unlock(&inode->i_lock); - kfree(new); return res; } EXPORT_SYMBOL_GPL(nfs_get_lock_context); @@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; - list_del(&l_ctx->list); + list_del_rcu(&l_ctx->list); spin_unlock(&inode->i_lock); - kfree(l_ctx); + kfree_rcu(l_ctx, rcu_head); } EXPORT_SYMBOL_GPL(nfs_put_lock_context); @@ -978,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { - if (ctx != NULL) - refcount_inc(&ctx->lock_context.count); - return ctx; + if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count)) + return ctx; + return NULL; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -989,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct inode *inode = d_inode(ctx->dentry); struct super_block *sb = ctx->dentry->d_sb; + if (!refcount_dec_and_test(&ctx->lock_context.count)) + return; if (!list_empty(&ctx->list)) { - if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) - return; - list_del(&ctx->list); + spin_lock(&inode->i_lock); + list_del_rcu(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!refcount_dec_and_test(&ctx->lock_context.count)) - return; + } if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) @@ -1003,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) dput(ctx->dentry); nfs_sb_deactive(sb); kfree(ctx->mdsthreshold); - kfree(ctx); + kfree_rcu(ctx, rcu_head); } void put_nfs_open_context(struct nfs_open_context *ctx) @@ -1027,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - if (ctx->mode & FMODE_WRITE) - list_add(&ctx->list, &nfsi->open_files); - else - list_add_tail(&ctx->list, &nfsi->open_files); + list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); @@ -1051,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; - spin_lock(&inode->i_lock); - list_for_each_entry(pos, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; ctx = get_nfs_open_context(pos); - break; + if (ctx) + break; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ctx; } @@ -1078,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx->error < 0) invalidate_inode_pages2(inode->i_mapping); filp->private_data = NULL; - spin_lock(&inode->i_lock); - list_move_tail(&ctx->list, &NFS_I(inode)->open_files); - spin_unlock(&inode->i_lock); put_nfs_open_context_sync(ctx); } } @@ -1329,19 +1323,11 @@ static bool nfs_file_has_writers(struct nfs_inode *nfsi) { struct inode *inode = &nfsi->vfs_inode; - assert_spin_locked(&inode->i_lock); - if (!S_ISREG(inode->i_mode)) return false; if (list_empty(&nfsi->open_files)) return false; - /* Note: This relies on nfsi->open_files being ordered with writers - * being placed at the head of the list. - * See nfs_inode_attach_open_context() - */ - return (list_first_entry(&nfsi->open_files, - struct nfs_open_context, - list)->mode & FMODE_WRITE) == FMODE_WRITE; + return inode_is_open_for_write(inode); } static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ec8a9efa268f..71bc16225b98 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; + struct nfs_server *server = NFS_SERVER(inode); if (hdr->pgio_done_cb != NULL) return hdr->pgio_done_cb(task, hdr); @@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) |
