summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-03 14:20:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-03 14:20:40 -0700
commit070a542f08acb7e8cf197287f5c44658c715d2d1 (patch)
treed89287e9db5c585db61da8b11f4f16ff4ece42e3
parent63e62baaa72e1aceb422f64a50408bc9b02a6022 (diff)
parent1f0d4ab0f5326ab6f940482b1941d2209d61285a (diff)
downloadlinux-070a542f08acb7e8cf197287f5c44658c715d2d1.tar.gz
linux-070a542f08acb7e8cf197287f5c44658c715d2d1.tar.bz2
linux-070a542f08acb7e8cf197287f5c44658c715d2d1.zip
Merge tag 'nfs-for-6.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "New Features: - Add a Kconfig option to redirect dfprintk() to the trace buffer - Enable use of the RWF_DONTCACHE flag on the NFS client - Add striped layout handling to pNFS flexfiles - Add proper localio handling for READ and WRITE O_DIRECT Bugfixes: - Handle NFS4ERR_GRACE errors during delegation recall - Fix NFSv4.1 backchannel max_resp_sz verification check - Fix mount hang after CREATE_SESSION failure - Fix d_parent->d_inode locking in nfs4_setup_readdir() Other Cleanups and Improvements: - Improvements to write handling tracepoints - Fix a few trivial spelling mistakes - Cleanups to the rpcbind cleanup call sites - Convert the SUNRPC xdr_buf to use a scratch folio instead of scratch page - Remove unused NFS_WBACK_BUSY() macro - Remove __GFP_NOWARN flags - Unexport rpc_malloc() and rpc_free()" * tag 'nfs-for-6.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (46 commits) NFS: add basic STATX_DIOALIGN and STATX_DIO_READ_ALIGN support nfs/localio: add tracepoints for misaligned DIO READ and WRITE support nfs/localio: add proper O_DIRECT support for READ and WRITE nfs/localio: refactor iocb initialization nfs/localio: refactor iocb and iov_iter_bvec initialization nfs/localio: avoid issuing misaligned IO using O_DIRECT nfs/localio: make trace_nfs_local_open_fh more useful NFSD: filecache: add STATX_DIOALIGN and STATX_DIO_READ_ALIGN support sunrpc: unexport rpc_malloc() and rpc_free() NFSv4/flexfiles: Add support for striped layouts NFSv4/flexfiles: Update layout stats & error paths for striped layouts NFSv4/flexfiles: Write path updates for striped layouts NFSv4/flexfiles: Commit path updates for striped layouts NFSv4/flexfiles: Read path updates for striped layouts NFSv4/flexfiles: Update low level helper functions to be DS stripe aware. NFSv4/flexfiles: Add data structure support for striped layouts NFSv4/flexfiles: Use ds_commit_idx when marking a write commit NFSv4/flexfiles: Remove cred local variable dependency nfs4_setup_readdir(): insufficient locking for ->d_parent->d_inode dereferencing NFS: Enable use of the RWF_DONTCACHE flag on the NFS client ...
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c8
-rw-r--r--fs/nfs/blocklayout/dev.c8
-rw-r--r--fs/nfs/callback.c10
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/file.c29
-rw-r--r--fs/nfs/filelayout/filelayout.c10
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c10
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c786
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h64
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c115
-rw-r--r--fs/nfs/inode.c15
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/localio.c403
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs42proc.c4
-rw-r--r--fs/nfs/nfs42xdr.c2
-rw-r--r--fs/nfs/nfs4file.c1
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfs/nfs4state.c3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfstrace.h215
-rw-r--r--fs/nfs/write.c34
-rw-r--r--fs/nfsd/filecache.c34
-rw-r--r--fs/nfsd/filecache.h4
-rw-r--r--fs/nfsd/localio.c11
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfssvc.c7
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--include/linux/nfs_page.h2
-rw-r--r--include/linux/nfs_xdr.h4
-rw-r--r--include/linux/nfslocalio.h2
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/sunrpc/debug.h30
-rw-r--r--include/linux/sunrpc/svc.h4
-rw-r--r--include/linux/sunrpc/svc_xprt.h3
-rw-r--r--include/linux/sunrpc/xdr.h8
-rw-r--r--include/trace/misc/fs.h22
-rw-r--r--mm/filemap.c34
-rw-r--r--net/sunrpc/Kconfig14
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c8
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/svc.c11
-rw-r--r--net/sunrpc/svc_xprt.c7
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c2
48 files changed, 1460 insertions, 547 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index e80262a51884..d68afa196535 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -216,8 +216,7 @@ out_err:
if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
- svc_xprt_destroy_all(serv, net);
- svc_rpcb_cleanup(serv, net);
+ svc_xprt_destroy_all(serv, net, true);
return err;
}
@@ -255,8 +254,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
nlm_shutdown_hosts_net(net);
cancel_delayed_work_sync(&ln->grace_period_end);
locks_end_grace(&ln->lockd_manager);
- svc_xprt_destroy_all(serv, net);
- svc_rpcb_cleanup(serv, net);
+ svc_xprt_destroy_all(serv, net, true);
}
} else {
pr_err("%s: no users! net=%x\n",
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 5d6edafbed20..0e4c67373e4f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -676,7 +676,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
struct pnfs_layout_segment *lseg;
struct xdr_buf buf;
struct xdr_stream xdr;
- struct page *scratch;
+ struct folio *scratch;
int status, i;
uint32_t count;
__be32 *p;
@@ -689,13 +689,13 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
return ERR_PTR(-ENOMEM);
status = -ENOMEM;
- scratch = alloc_page(gfp_mask);
+ scratch = folio_alloc(gfp_mask, 0);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf,
lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_page(&xdr, scratch);
+ xdr_set_scratch_folio(&xdr, scratch);
status = -EIO;
p = xdr_inline_decode(&xdr, 4);
@@ -744,7 +744,7 @@ process_extents:
}
out_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out:
dprintk("%s returns %d\n", __func__, status);
switch (status) {
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 44306ac22353..ab76120705e2 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -541,16 +541,16 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct pnfs_block_dev *top;
struct xdr_stream xdr;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
int nr_volumes, ret, i;
__be32 *p;
- scratch = alloc_page(gfp_mask);
+ scratch = folio_alloc(gfp_mask, 0);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_page(&xdr, scratch);
+ xdr_set_scratch_folio(&xdr, scratch);
p = xdr_inline_decode(&xdr, sizeof(__be32));
if (!p)
@@ -582,7 +582,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
out_free_volumes:
kfree(volumes);
out_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out:
return node;
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 86bdc7d23fb9..c8b837006bb2 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -136,7 +136,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
return;
dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
- svc_xprt_destroy_all(serv, net);
+ svc_xprt_destroy_all(serv, net, false);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
@@ -153,7 +153,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
ret = svc_bind(serv, net);
if (ret < 0) {
printk(KERN_WARNING "NFS: bind callback service failed\n");
- goto err_bind;
+ goto err;
}
ret = 0;
@@ -166,13 +166,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
- goto err_socks;
+ goto err;
}
return 0;
-err_socks:
- svc_rpcb_cleanup(serv, net);
-err_bind:
+err:
nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %x\n", ret, net->ns.inum);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5f7d9be6f022..46d9c65d50f8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -829,17 +829,17 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc,
struct address_space *mapping = desc->file->f_mapping;
struct folio *new, *folio = *arrays;
struct xdr_stream stream;
- struct page *scratch;
+ struct folio *scratch;
struct xdr_buf buf;
u64 cookie;
int status;
- scratch = alloc_page(GFP_KERNEL);
+ scratch = folio_alloc(GFP_KERNEL, 0);
if (scratch == NULL)
return -ENOMEM;
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
do {
status = nfs_readdir_entry_decode(desc, entry, &stream);
@@ -891,7 +891,7 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc,
if (folio != *arrays)
nfs_readdir_folio_unlock_and_put(folio);
- put_page(scratch);
+ folio_put(scratch);
return status;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8059ece82468..d020aab40c64 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -161,6 +161,8 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
+ trace_nfs_file_read(iocb, to);
+
if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_read(iocb, to, false);
@@ -361,6 +363,8 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
if (pnfs_ld_read_whole_page(file_inode(file)))
return true;
+ if (folio_test_dropbehind(folio))
+ return false;
/* Open for reading too? */
if (file->f_mode & FMODE_READ)
return true;
@@ -380,22 +384,23 @@ static int nfs_write_begin(const struct kiocb *iocb,
loff_t pos, unsigned len, struct folio **foliop,
void **fsdata)
{
- fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
struct file *file = iocb->ki_filp;
int once_thru = 0;
int ret;
+ trace_nfs_write_begin(file_inode(file), pos, len);
+
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos);
- fgp |= fgf_set_order(len);
start:
- folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp,
- mapping_gfp_mask(mapping));
- if (IS_ERR(folio))
- return PTR_ERR(folio);
+ folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
+ goto out;
+ }
*foliop = folio;
ret = nfs_flush_incompatible(file, folio);
@@ -405,11 +410,14 @@ start:
} else if (!once_thru &&
nfs_want_read_modify_write(file, folio, pos, len)) {
once_thru = 1;
+ folio_clear_dropbehind(folio);
ret = nfs_read_folio(file, folio);
folio_put(folio);
if (!ret)
goto start;
}
+out:
+ trace_nfs_write_begin_done(file_inode(file), pos, len, ret);
return ret;
}
@@ -423,6 +431,7 @@ static int nfs_write_end(const struct kiocb *iocb,
unsigned offset = offset_in_folio(folio, pos);
int status;
+ trace_nfs_write_end(file_inode(file), pos, len);
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
@@ -451,13 +460,16 @@ static int nfs_write_end(const struct kiocb *iocb,
folio_unlock(folio);
folio_put(folio);
- if (status < 0)
+ if (status < 0) {
+ trace_nfs_write_end_done(file_inode(file), pos, len, status);
return status;
+ }
NFS_I(mapping->host)->write_io += copied;
if (nfs_ctx_key_to_expire(ctx, mapping->host))
nfs_wb_all(mapping->host);
+ trace_nfs_write_end_done(file_inode(file), pos, len, copied);
return copied;
}
@@ -690,6 +702,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
errseq_t since;
int error;
+ trace_nfs_file_write(iocb, from);
+
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
@@ -949,5 +963,6 @@ const struct file_operations nfs_file_operations = {
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
+ .fop_flags = FOP_DONTCACHE,
};
EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d39a1f58e18d..5c4551117c58 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -646,19 +646,19 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
{
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
__be32 *p;
uint32_t nfl_util;
int i;
dprintk("%s: set_layout_map Begin\n", __func__);
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
return -ENOMEM;
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
* num_fh (4) */
@@ -724,11 +724,11 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
fl->fh_array[i]->size);
}
- __free_page(scratch);
+ folio_put(scratch);
return 0;
out_err:
- __free_page(scratch);
+ folio_put(scratch);
return -EIO;
}
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 29d9234d5c08..df79aeb68db4 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -73,18 +73,18 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_file_layout_dsaddr *dsaddr = NULL;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
goto out_err;
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* Get the stripe count (number of stripe index) */
p = xdr_inline_decode(&stream, 4);
@@ -186,7 +186,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
}
}
- __free_page(scratch);
+ folio_put(scratch);
return dsaddr;
out_err_drain_dsaddrs:
@@ -204,7 +204,7 @@ out_err_free_deviceid:
out_err_free_stripe_indices:
kfree(stripe_indices);
out_err_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out_err:
dprintk("%s ERROR: returning NULL\n", __func__);
return NULL;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 9edb5f9b0c4e..df01d2876b68 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -47,7 +47,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
int dev_limit, enum nfs4_ff_op_type type);
static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
const struct nfs42_layoutstat_devinfo *devinfo,
- struct nfs4_ff_layout_mirror *mirror);
+ struct nfs4_ff_layout_ds_stripe *dss_info);
static struct pnfs_layout_hdr *
ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
@@ -164,31 +164,32 @@ decode_name(struct xdr_stream *xdr, u32 *id)
}
static struct nfsd_file *
-ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
struct nfs_client *clp, const struct cred *cred,
struct nfs_fh *fh, fmode_t mode)
{
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
- return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode);
+ return nfs_local_open_fh(clp, cred, fh, &mirror->dss[dss_id].nfl, mode);
#else
return NULL;
#endif
}
-static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
- const struct nfs4_ff_layout_mirror *m2)
+static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
+ const struct nfs4_ff_layout_ds_stripe *dss2)
{
int i, j;
- if (m1->fh_versions_cnt != m2->fh_versions_cnt)
+ if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
return false;
- for (i = 0; i < m1->fh_versions_cnt; i++) {
+
+ for (i = 0; i < dss1->fh_versions_cnt; i++) {
bool found_fh = false;
- for (j = 0; j < m2->fh_versions_cnt; j++) {
- if (nfs_compare_fh(&m1->fh_versions[i],
- &m2->fh_versions[j]) == 0) {
+ for (j = 0; j < dss2->fh_versions_cnt; j++) {
+ if (nfs_compare_fh(&dss1->fh_versions[i],
+ &dss2->fh_versions[j]) == 0) {
found_fh = true;
break;
}
@@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
return true;
}
+static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
+ const struct nfs4_ff_layout_mirror *m2)
+{
+ u32 dss_id;
+
+ if (m1->dss_count != m2->dss_count)
+ return false;
+
+ for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
+ if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
+ return false;
+
+ return true;
+}
+
+static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
+ const struct nfs4_ff_layout_mirror *m2)
+{
+ u32 dss_id;
+
+ if (m1->dss_count != m2->dss_count)
+ return false;
+
+ for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
+ if (memcmp(&m1->dss[dss_id].devid,
+ &m2->dss[dss_id].devid,
+ sizeof(m1->dss[dss_id].devid)) != 0)
+ return false;
+
+ return true;
+}
+
static struct nfs4_ff_layout_mirror *
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
struct nfs4_ff_layout_mirror *mirror)
@@ -209,7 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
spin_lock(&inode->i_lock);
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
- if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)
+ if (!ff_mirror_match_devid(mirror, pos))
continue;
if (!ff_mirror_match_fh(mirror, pos))
continue;
@@ -240,29 +273,37 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
+ u32 dss_id;
mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) {
spin_lock_init(&mirror->lock);
refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
- nfs_localio_file_init(&mirror->nfl);
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ nfs_localio_file_init(&mirror->dss[dss_id].nfl);
}
return mirror;
}
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ const struct cred *cred;
+ u32 dss_id;
ff_layout_remove_mirror(mirror);
- kfree(mirror->fh_versions);
- nfs_close_local_fh(&mirror->nfl);
- cred = rcu_access_pointer(mirror->ro_cred);
- put_cred(cred);
- cred = rcu_access_pointer(mirror->rw_cred);
- put_cred(cred);
- nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
+
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
+ kfree(mirror->dss[dss_id].fh_versions);
+ cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
+ put_cred(cred);
+ cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
+ put_cred(cred);
+ nfs_close_local_fh(&mirror->dss[dss_id].nfl);
+ nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
+ }
+
+ kfree(mirror->dss);
kfree(mirror);
}
@@ -366,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
free_me);
}
+static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
+{
+ u32 dss_id, sum = 0;
+
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ sum += mirror->dss[dss_id].efficiency;
+
+ return sum;
+}
+
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++)
- if (fls->mirror_array[i]->efficiency <
- fls->mirror_array[j]->efficiency)
+ if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
+ ff_mirror_efficiency_sum(fls->mirror_array[j]))
swap(fls->mirror_array[i],
fls->mirror_array[j]);
}
@@ -388,20 +439,21 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_ff_layout_segment *fls = NULL;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
u64 stripe_unit;
u32 mirror_array_cnt;
__be32 *p;
int i, rc;
+ struct nfs4_ff_layout_ds_stripe *dss_info;
dprintk("--> %s\n", __func__);
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
return ERR_PTR(-ENOMEM);
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
lgr->layoutp->len);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* stripe unit and mirror_array_cnt */
rc = -EIO;
@@ -427,23 +479,32 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
fls->mirror_array_cnt = mirror_array_cnt;
fls->stripe_unit = stripe_unit;
+ u32 dss_count = 0;
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
struct cred *kcred;
const struct cred __rcu *cred;
kuid_t uid;
kgid_t gid;
- u32 ds_count, fh_count, id;
- int j;
+ u32 fh_count, id;
+ int j, dss_id;
rc = -EIO;
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
- ds_count = be32_to_cpup(p);
- /* FIXME: allow for striping? */
- if (ds_count != 1)
+ // Ensure all mirrors have same stripe count.
+ if (dss_count == 0)
+ dss_count = be32_to_cpup(p);
+ else if (dss_count != be32_to_cpup(p))
+ goto out_err_free;
+
+ if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
+ dss_count == 0)
+ goto out_err_free;
+
+ if (dss_count > 1 && stripe_unit == 0)
goto out_err_free;
fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
@@ -452,91 +513,105 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;
}
- fls->mirror_array[i]->ds_count = ds_count;
+ fls->mirror_array[i]->dss_count = dss_count;
+ fls->mirror_array[i]->dss =
+ kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
+ gfp_flags);
- /* deviceid */
- rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);
- if (rc)
- goto out_err_free;
+ for (dss_id = 0; dss_id < dss_count; dss_id++) {
+ dss_info = &fls->mirror_array[i]->dss[dss_id];
+ dss_info->mirror = fls->mirror_array[i];
- /* efficiency */
-