diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-26 10:33:33 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-26 10:33:33 -0700 |
| commit | ea8ea737c46cffa5d0ee74309f81e55a7e5e9c2a (patch) | |
| tree | ae159b2c5968fa3c2a5a4ab7176584bc9a17b889 | |
| parent | 0b9210c9c86e46a7a62bbc7b69b84001315072ff (diff) | |
| parent | c7d73af2d249f0323f5cdb171a59497ce80011fb (diff) | |
| download | linux-ea8ea737c46cffa5d0ee74309f81e55a7e5e9c2a.tar.gz linux-ea8ea737c46cffa5d0ee74309f81e55a7e5e9c2a.tar.bz2 linux-ea8ea737c46cffa5d0ee74309f81e55a7e5e9c2a.zip | |
Merge tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"Highlights include:
Features:
- Add support for the NFS v4.2 COPY operation
- Add support for NFS/RDMA over IPv6
Bugfixes and cleanups:
- Avoid race that crashes nfs_init_commit()
- Fix oops in callback path
- Fix LOCK/OPEN race when unlinking an open file
- Choose correct stateids when using delegations in setattr, read and
write
- Don't send empty SETATTR after OPEN_CREATE
- xprtrdma: Prevent server from writing a reply into memory client
has released
- xprtrdma: Support using Read list and Reply chunk in one RPC call"
* tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (61 commits)
pnfs: pnfs_update_layout needs to consider if strict iomode checking is on
nfs/flexfiles: Use the layout segment for reading unless it a IOMODE_RW and reading is disabled
nfs/flexfiles: Helper function to detect FF_FLAGS_NO_READ_IO
nfs: avoid race that crashes nfs_init_commit
NFS: checking for NULL instead of IS_ERR() in nfs_commit_file()
pnfs: make pnfs_layout_process more robust
pnfs: rework LAYOUTGET retry handling
pnfs: lift retry logic from send_layoutget to pnfs_update_layout
pnfs: fix bad error handling in send_layoutget
flexfiles: add kerneldoc header to nfs4_ff_layout_prepare_ds
flexfiles: remove pointless setting of NFS_LAYOUT_RETURN_REQUESTED
pnfs: only tear down lsegs that precede seqid in LAYOUTRETURN args
pnfs: keep track of the return sequence number in pnfs_layout_hdr
pnfs: record sequence in pnfs_layout_segment when it's created
pnfs: don't merge new ff lsegs with ones that have LAYOUTRETURN bit set
pNFS/flexfiles: When initing reads or writes, we might have to retry connecting to DSes
pNFS/flexfiles: When checking for available DSes, conditionally check for MDS io
pNFS/flexfile: Fix erroneous fall back to read/write through the MDS
NFS: Reclaim writes via writepage are opportunistic
NFSv4: Use the right stateid for delegations in setattr, read and write
...
49 files changed, 1764 insertions, 899 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 618ced381a14..aaa2e8d3df6f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -217,7 +217,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, } if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, - &args->cbl_range)) { + &args->cbl_range, + be32_to_cpu(args->cbl_stateid.seqid))) { rv = NFS4_OK; goto unlock; } @@ -500,8 +501,10 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, cps->slot = slot; /* The ca_maxresponsesize_cached is 0 with no DRC */ - if (args->csa_cachethis != 0) - return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); + if (args->csa_cachethis != 0) { + status = htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); + goto out_unlock; + } /* * Check for pending referring calls. If a match is found, a diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 976c90608e56..d81f96aacd51 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -146,10 +146,16 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) p = read_buf(xdr, NFS4_STATEID_SIZE); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(stateid, p, NFS4_STATEID_SIZE); + memcpy(stateid->data, p, NFS4_STATEID_SIZE); return 0; } +static __be32 decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + stateid->type = NFS4_DELEGATION_STATEID_TYPE; + return decode_stateid(xdr, stateid); +} + static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) { __be32 *p; @@ -211,7 +217,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - status = decode_stateid(xdr, &args->stateid); + status = decode_delegation_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; p = read_buf(xdr, 4); @@ -227,6 +233,11 @@ out: } #if defined(CONFIG_NFS_V4_1) +static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + stateid->type = NFS4_LAYOUT_STATEID_TYPE; + return decode_stateid(xdr, stateid); +} static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, @@ -263,7 +274,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, } p = xdr_decode_hyper(p, &args->cbl_range.offset); p = xdr_decode_hyper(p, &args->cbl_range.length); - status = decode_stateid(xdr, &args->cbl_stateid); + status = decode_layout_stateid(xdr, &args->cbl_stateid); if (unlikely(status != 0)) goto out; } else if (args->cbl_recall_type == RETURN_FSID) { diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5166adcfc0fb..322c2585bc34 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -875,15 +875,16 @@ int nfs_delegations_present(struct nfs_client *clp) /** * nfs4_copy_delegation_stateid - Copy inode's state ID information - * @dst: stateid data structure to fill in * @inode: inode to check * @flags: delegation type requirement + * @dst: stateid data structure to fill in + * @cred: optional argument to retrieve credential * * Returns "true" and fills in "dst->data" * if inode had a delegation, * otherwise "false" is returned. */ -bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, - fmode_t flags) +bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, + nfs4_stateid *dst, struct rpc_cred **cred) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -896,6 +897,8 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, if (ret) { nfs4_stateid_copy(dst, &delegation->stateid); nfs_mark_delegation_referenced(delegation); + if (cred) + *cred = get_rpccred(delegation->cred); } rcu_read_unlock(); return ret; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 333063e032f0..64724d252a79 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -56,7 +56,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); -bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); +bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 741a92c470bb..979b3c4dee6a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -87,6 +87,7 @@ struct nfs_direct_req { int mirror_count; ssize_t count, /* bytes actually processed */ + max_count, /* max expected count */ bytes_left, /* bytes left to be sent */ io_start, /* start of IO */ error; /* any reported error */ @@ -123,6 +124,8 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) int i; ssize_t count; + WARN_ON_ONCE(dreq->count >= dreq->max_count); + if (dreq->mirror_count == 1) { dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; dreq->count += hdr->good_bytes; @@ -275,7 +278,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq) { - cinfo->lock = &dreq->inode->i_lock; + cinfo->inode = dreq->inode; cinfo->mds = &dreq->mds_cinfo; cinfo->ds = &dreq->ds_cinfo; cinfo->dreq = dreq; @@ -591,7 +594,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; dreq->inode = inode; - dreq->bytes_left = count; + dreq->bytes_left = dreq->max_count = count; dreq->io_start = iocb->ki_pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); @@ -630,13 +633,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, struct nfs_commit_info *cinfo) { - spin_lock(cinfo->lock); + spin_lock(&cinfo->inode->i_lock); #ifdef CONFIG_NFS_V4_1 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); #endif nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); - spin_unlock(cinfo->lock); + spin_unlock(&cinfo->inode->i_lock); } static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) @@ -671,13 +674,13 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) if (!nfs_pageio_add_request(&desc, req)) { nfs_list_remove_request(req); nfs_list_add_request(req, &failed); - spin_lock(cinfo.lock); + spin_lock(&cinfo.inode->i_lock); dreq->flags = 0; if (desc.pg_error < 0) dreq->error = desc.pg_error; else dreq->error = -EIO; - spin_unlock(cinfo.lock); + spin_unlock(&cinfo.inode->i_lock); } nfs_release_request(req); } @@ -1023,7 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; dreq->inode = inode; - dreq->bytes_left = iov_iter_count(iter); + dreq->bytes_left = dreq->max_count = iov_iter_count(iter); dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 3384dc8e6683..aa59757389dc 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -795,7 +795,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; } - spin_lock(cinfo->lock); + spin_lock(&cinfo->inode->i_lock); if (cinfo->ds->nbuckets >= size) goto out; for (i = 0; i < cinfo->ds->nbuckets; i++) { @@ -811,7 +811,7 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, swap(cinfo->ds->buckets, buckets); cinfo->ds->nbuckets = size; out: - spin_unlock(cinfo->lock); + spin_unlock(&cinfo->inode->i_lock); kfree(buckets); return 0; } @@ -890,6 +890,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 0, NFS4_MAX_UINT64, IOMODE_READ, + false, GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); @@ -915,6 +916,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 0, NFS4_MAX_UINT64, IOMODE_RW, + false, GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0cb1abd535e3..0e8018bc9880 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -26,6 +26,8 @@ #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) +static struct group_info *ff_zero_group; + static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) { @@ -53,14 +55,15 @@ ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) kfree(FF_LAYOUT_FROM_HDR(lo)); } -static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { __be32 *p; p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); if (unlikely(p == NULL)) return -ENOBUFS; - memcpy(stateid, p, NFS4_STATEID_SIZE); + stateid->type = NFS4_PNFS_DS_STATEID_TYPE; + memcpy(stateid->data, p, NFS4_STATEID_SIZE); dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, p[0], p[1], p[2], p[3]); return 0; @@ -211,10 +214,16 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { + struct rpc_cred *cred; + ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); - if (mirror->cred) - put_rpccred(mirror->cred); + cred = rcu_access_pointer(mirror->ro_cred); + if (cred) + put_rpccred(cred); + cred = rcu_access_pointer(mirror->rw_cred); + if (cred) + put_rpccred(cred); nfs4_ff_layout_put_deviceid(mirror->mirror_ds); kfree(mirror); } @@ -290,6 +299,8 @@ ff_lseg_merge(struct pnfs_layout_segment *new, { u64 new_end, old_end; + if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) + return false; if (new->pls_range.iomode != old->pls_range.iomode) return false; old_end = pnfs_calc_offset_end(old->pls_range.offset, @@ -310,8 +321,6 @@ ff_lseg_merge(struct pnfs_layout_segment *new, new_end); if (test_bit(NFS_LSEG_ROC, &old->pls_flags)) set_bit(NFS_LSEG_ROC, &new->pls_flags); - if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) - set_bit(NFS_LSEG_LAYOUTRETURN, &new->pls_flags); return true; } @@ -407,8 +416,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid devid; struct nfs4_deviceid_node *idnode; - u32 ds_count; - u32 fh_count; + struct auth_cred acred = { .group_info = ff_zero_group }; + struct rpc_cred __rcu *cred; + u32 ds_count, fh_count, id; int j; rc = -EIO; @@ -456,7 +466,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->efficiency = be32_to_cpup(p); /* stateid */ - rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); + rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid); if (rc) goto out_err_free; @@ -484,24 +494,49 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->fh_versions_cnt = fh_count; /* user */ - rc = decode_name(&stream, &fls->mirror_array[i]->uid); + rc = decode_name(&stream, &id); if (rc) goto out_err_free; + acred.uid = make_kuid(&init_user_ns, id); + /* group */ - rc = decode_name(&stream, &fls->mirror_array[i]->gid); + rc = decode_name(&stream, &id); if (rc) goto out_err_free; + acred.gid = make_kgid(&init_user_ns, id); + + /* find the cred for it */ + rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags)); + if (IS_ERR(cred)) { + rc = PTR_ERR(cred); + goto out_err_free; + } + + if (lgr->range.iomode == IOMODE_READ) + rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); + else + rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); + mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); if (mirror != fls->mirror_array[i]) { + /* swap cred ptrs so free_mirror will clean up old */ + if (lgr->range.iomode == IOMODE_READ) { + cred = xchg(&mirror->ro_cred, cred); + rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); + } else { + cred = xchg(&mirror->rw_cred, cred); + rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); + } ff_layout_free_mirror(fls->mirror_array[i]); fls->mirror_array[i] = mirror; } - dprintk("%s: uid %d gid %d\n", __func__, - fls->mirror_array[i]->uid, - fls->mirror_array[i]->gid); + dprintk("%s: iomode %s uid %u gid %u\n", __func__, + lgr->range.iomode == IOMODE_READ ? "READ" : "RW", + from_kuid(&init_user_ns, acred.uid), + from_kgid(&init_user_ns, acred.gid)); } p = xdr_inline_decode(&stream, 4); @@ -745,7 +780,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, else { int i; - spin_lock(cinfo->lock); + spin_lock(&cinfo->inode->i_lock); if (cinfo->ds->nbuckets != 0) kfree(buckets); else { @@ -759,7 +794,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, NFS_INVALID_STABLE_HOW; } } - spin_unlock(cinfo->lock); + spin_unlock(&cinfo->inode->i_lock); return 0; } } @@ -786,6 +821,36 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, } static void +ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, + bool strict_iomode) +{ +retry_strict: + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + strict_iomode, + GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + } + + /* If we don't have checking, do get a IOMODE_RW + * segment, and the server wants to avoid READs + * there, then retry! + */ + if (pgio->pg_lseg && !strict_iomode && + ff_layout_avoid_read_on_rw(pgio->pg_lseg)) { + strict_iomode = true; + goto retry_strict; + } +} + +static void ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -795,26 +860,23 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, int ds_idx; /* Use full layout for now */ - if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_READ, - GFP_KERNEL); - if (IS_ERR(pgio->pg_lseg)) { - pgio->pg_error = PTR_ERR(pgio->pg_lseg); - pgio->pg_lseg = NULL; - return; - } - } + if (!pgio->pg_lseg) + ff_layout_pg_get_read(pgio, req, false); + else if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) + ff_layout_pg_get_read(pgio, req, true); + /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) goto out_mds; ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); - if (!ds) - goto out_mds; + if (!ds) { + if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_pnfs; + else + goto out_mds; + } + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); pgio->pg_mirror_idx = ds_idx; @@ -828,6 +890,12 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_read_mds(pgio); + return; + +out_pnfs: + pnfs_set_lo_fail(pgio->pg_lseg); + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; } static void @@ -847,6 +915,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, 0, NFS4_MAX_UINT64, IOMODE_RW, + false, GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); @@ -870,8 +939,12 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, for (i = 0; i < pgio->pg_mirror_count; i++) { ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); - if (!ds) - goto out_mds; + if (!ds) { + if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) + goto out_pnfs; + else + goto out_mds; + } pgm = &pgio->pg_mirrors[i]; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; @@ -883,6 +956,12 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_write_mds(pgio); + return; + +out_pnfs: + pnfs_set_lo_fail(pgio->pg_lseg); + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; } static unsigned int @@ -895,6 +974,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 0, NFS4_MAX_UINT64, IOMODE_RW, + false, GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); @@ -1067,8 +1147,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: - if (ff_layout_no_fallback_to_mds(lseg) || - ff_layout_has_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg)) return -NFS4ERR_RESET_TO_PNFS; reset: dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1215,8 +1294,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, - &hdr->lseg->pls_layout->plh_flags); pnfs_read_resend_pnfs(hdr); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: @@ -1260,7 +1337,7 @@ ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) } static bool -ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) +ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx) { /* No mirroring for now */ struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -1297,16 +1374,10 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, rpc_exit(task, -EIO); return -EIO; } - if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { - dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - if (ff_layout_has_available_ds(hdr->lseg)) - pnfs_read_resend_pnfs(hdr); - else - ff_layout_reset_read(hdr); - rpc_exit(task, 0); + if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { + rpc_exit(task, -EHOSTDOWN); return -EAGAIN; } - hdr->pgio_done_cb = ff_layout_read_done_cb; ff_layout_read_record_layoutstats_start(task, hdr); return 0; @@ -1496,14 +1567,8 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, return -EIO; } - if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { - bool retry_pnfs; - - retry_pnfs = ff_layout_has_available_ds(hdr->lseg); - dprintk("%s task %u reset io to %s\n", __func__, - task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); - ff_layout_reset_write(hdr, retry_pnfs); - rpc_exit(task, 0); + if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { + rpc_exit(task, -EHOSTDOWN); return -EAGAIN; } @@ -1712,7 +1777,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) goto out_failed; ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); - if (IS_ERR(ds_cred)) + if (!ds_cred) goto out_failed; vers = nfs4_ff_layout_ds_version(lseg, idx); @@ -1720,6 +1785,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); + hdr->pgio_done_cb = ff_layout_read_done_cb; atomic_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); |
