summaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-24 15:44:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-24 15:44:18 -0700
commit684a64bf32b6e488004e0ad7f0d7e922798f65b6 (patch)
tree61bd3035879b2b1548f30b8bf8d8f40a79a91985 /fs/nfs
parentf7fccaa772718f6d2e798dece4a5210fe4c406ec (diff)
parent68898131d2df70d1a9ad5c2f93f0f54dd6d5c336 (diff)
downloadlinux-684a64bf32b6e488004e0ad7f0d7e922798f65b6.tar.gz
linux-684a64bf32b6e488004e0ad7f0d7e922798f65b6.tar.bz2
linux-684a64bf32b6e488004e0ad7f0d7e922798f65b6.zip
Merge tag 'nfs-for-6.12-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "New Features: - Add a 'noalignwrite' mount option for lock-less 'lost writes' prevention - Add support for the LOCALIO protocol extention Bugfixes: - Fix memory leak in error path of nfs4_do_reclaim() - Simplify and guarantee lock owner uniqueness - Fix -Wformat-truncation warning - Fix folio refcounts by using folio_attach_private() - Fix failing the mount system call when the server is down - Fix detection of "Proxying of Times" server support Cleanups: - Annotate struct nfs_cache_array with __counted_by() - Remove unnecessary NULL checks before kfree() - Convert RPC_TASK_* constants to an enum - Remove obsolete or misleading comments and declerations" * tag 'nfs-for-6.12-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (41 commits) nfs: Fix `make htmldocs` warnings in the localio documentation nfs: add "NFS Client and Server Interlock" section to localio.rst nfs: add FAQ section to Documentation/filesystems/nfs/localio.rst nfs: add Documentation/filesystems/nfs/localio.rst nfs: implement client support for NFS_LOCALIO_PROGRAM nfs/localio: use dedicated workqueues for filesystem read and write pnfs/flexfiles: enable localio support nfs: enable localio for non-pNFS IO nfs: add LOCALIO support nfs: pass struct nfsd_file to nfs_init_pgio and nfs_init_commit nfsd: implement server support for NFS_LOCALIO_PROGRAM nfsd: add LOCALIO support nfs_common: prepare for the NFS client to use nfsd_file for LOCALIO nfs_common: add NFS LOCALIO auxiliary protocol enablement SUNRPC: replace program list with program array SUNRPC: add svcauth_map_clnt_to_svc_cred_local SUNRPC: remove call_allocate() BUG_ONs nfsd: add nfsd_serv_try_get and nfsd_serv_put nfsd: add nfsd_file_acquire_local() nfsd: factor out __fh_verify to allow NULL rqstp to be passed ...
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/client.c21
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/filelayout/filelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c56
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/fs_context.c8
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c53
-rw-r--r--fs/nfs/internal.h54
-rw-r--r--fs/nfs/localio.c757
-rw-r--r--fs/nfs/nfs2xdr.c70
-rw-r--r--fs/nfs/nfs3xdr.c108
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/nfs4state.c22
-rw-r--r--fs/nfs/nfs4xdr.c90
-rw-r--r--fs/nfs/nfstrace.h61
-rw-r--r--fs/nfs/pagelist.c16
-rw-r--r--fs/nfs/pnfs_nfs.c2
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfs/write.c21
24 files changed, 1073 insertions, 312 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 57249f040dfc..0eb20012792f 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -4,6 +4,7 @@ config NFS_FS
depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD
select SUNRPC
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 5f6db37f461e..9fb2f2cac87e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
+nfs-$(CONFIG_NFS_LOCALIO) += localio.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8286edd6062d..a1d21c4be0ac 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -178,6 +178,14 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
clp->cl_net = get_net(cl_init->net);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ seqlock_init(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ clp->cl_uuid.net = NULL;
+ clp->cl_uuid.dom = NULL;
+ spin_lock_init(&clp->cl_localio_lock);
+#endif /* CONFIG_NFS_LOCALIO */
+
clp->cl_principal = "*";
clp->cl_xprtsec = cl_init->xprtsec;
return clp;
@@ -233,6 +241,8 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
+ nfs_local_disable(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
@@ -424,7 +434,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- return rpc_ops->init_client(new, cl_init);
+ new = rpc_ops->init_client(new, cl_init);
+ if (!IS_ERR(new))
+ nfs_local_probe(new);
+ return new;
}
spin_unlock(&nn->nfs_client_lock);
@@ -997,8 +1010,8 @@ struct nfs_server *nfs_alloc_server(void)
init_waitqueue_head(&server->write_congestion_wait);
atomic_long_set(&server->writeback, 0);
- ida_init(&server->openowner_id);
- ida_init(&server->lockowner_id);
+ atomic64_set(&server->owner_ctr, 0);
+
pnfs_init_server(server);
rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
@@ -1037,8 +1050,6 @@ void nfs_free_server(struct nfs_server *server)
}
ida_free(&s_sysfs_ids, server->s_sysfs_id);
- ida_destroy(&server->lockowner_id);
- ida_destroy(&server->openowner_id);
put_cred(server->cred);
nfs_release_automount_timer();
call_rcu(&server->rcu, delayed_free);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4cb97ef41350..492cffd9d3d8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -151,7 +151,7 @@ struct nfs_cache_array {
unsigned char folio_full : 1,
folio_is_eof : 1,
cookies_are_ordered : 1;
- struct nfs_cache_array_entry array[];
+ struct nfs_cache_array_entry array[] __counted_by(size);
};
struct nfs_readdir_descriptor {
@@ -328,7 +328,8 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
goto out;
}
- cache_entry = &array->array[array->size];
+ array->size++;
+ cache_entry = &array->array[array->size - 1];
cache_entry->cookie = array->last_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
@@ -337,7 +338,6 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
array->last_cookie = entry->cookie;
if (array->last_cookie <= cache_entry->cookie)
array->cookies_are_ordered = 0;
- array->size++;
if (entry->eof != 0)
nfs_readdir_array_set_eof(array);
out:
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index b6e9aeaf4ce2..d39a1f58e18d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -1011,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
data->args.fh = fh;
return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
&filelayout_commit_call_ops, how,
- RPC_TASK_SOFTCONN);
+ RPC_TASK_SOFTCONN, NULL);
out_err:
pnfs_generic_prepare_to_resend_writes(data);
pnfs_generic_commit_release(data);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 39ba9f4208aa..f78115c6c2c1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,21 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+static struct nfsd_file *
+ff_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+ if (mode & FMODE_WRITE) {
+ /*
+ * Always request read and write access since this corresponds
+ * to a rw layout.
+ */
+ mode |= FMODE_READ;
+ }
+
+ return nfs_local_open_fh(clp, cred, fh, mode);
+}
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -237,7 +253,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
@@ -1756,6 +1772,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1802,11 +1819,18 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, FMODE_READ);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1826,6 +1850,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1870,11 +1895,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1908,6 +1941,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1946,10 +1980,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN);
+ how, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return ret;
out_err:
@@ -2087,12 +2129,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
}
static void
-encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
-static void
ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs42_layoutstat_devinfo *devinfo)
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..e58bedfb1dcc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 6c9f3f6645dd..7e000d782e28 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -49,6 +49,7 @@ enum nfs_param {
Opt_bsize,
Opt_clientaddr,
Opt_cto,
+ Opt_alignwrite,
Opt_fg,
Opt_fscache,
Opt_fscache_flag,
@@ -149,6 +150,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("bsize", Opt_bsize),
fsparam_string("clientaddr", Opt_clientaddr),
fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag_no("alignwrite", Opt_alignwrite),
fsparam_flag ("fg", Opt_fg),
fsparam_flag_no("fsc", Opt_fscache_flag),
fsparam_string("fsc", Opt_fscache),
@@ -592,6 +594,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
break;
+ case Opt_alignwrite:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
+ else
+ ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 11ff2b2e060f..f13d25d95b85 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
}
/*
- * get an NFS2/NFS3 root dentry from the root filehandle
+ * get a root dentry from the root filehandle
*/
int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4914a11c3c2..542c7d97b235 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2461,35 +2461,54 @@ static void nfs_destroy_inodecache(void)
kmem_cache_destroy(nfs_inode_cachep);
}
+struct workqueue_struct *nfslocaliod_workqueue;
struct workqueue_struct *nfsiod_workqueue;
EXPORT_SYMBOL_GPL(nfsiod_workqueue);
/*
- * start up the nfsiod workqueue
+ * Destroy the nfsiod workqueues
*/
-static int nfsiod_start(void)
+static void nfsiod_stop(void)
{
struct workqueue_struct *wq;
- dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (wq == NULL)
- return -ENOMEM;
- nfsiod_workqueue = wq;
- return 0;
+
+ wq = nfsiod_workqueue;
+ if (wq != NULL) {
+ nfsiod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ wq = nfslocaliod_workqueue;
+ if (wq != NULL) {
+ nfslocaliod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#endif /* CONFIG_NFS_LOCALIO */
}
/*
- * Destroy the nfsiod workqueue
+ * Start the nfsiod workqueues
*/
-static void nfsiod_stop(void)
+static int nfsiod_start(void)
{
- struct workqueue_struct *wq;
-
- wq = nfsiod_workqueue;
- if (wq == NULL)
- return;
- nfsiod_workqueue = NULL;
- destroy_workqueue(wq);
+ dprintk("RPC: creating workqueue nfsiod\n");
+ nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (nfsiod_workqueue == NULL)
+ return -ENOMEM;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /*
+ * localio writes need to use a normal (non-memreclaim) workqueue.
+ * When we start getting low on space, XFS goes and calls flush_work() on
+ * a non-memreclaim work queue, which causes a priority inversion problem.
+ */
+ dprintk("RPC: creating workqueue nfslocaliod\n");
+ nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0);
+ if (unlikely(nfslocaliod_workqueue == NULL)) {
+ nfsiod_stop();
+ return -ENOMEM;
+ }
+#endif /* CONFIG_NFS_LOCALIO */
+ return 0;
}
unsigned int nfs_net_id;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5902a9beca1f..430733e3eff2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -9,6 +9,7 @@
#include <linux/crc32.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
+#include <linux/nfslocalio.h>
#include <linux/wait_bit.h>
#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
@@ -308,7 +309,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags);
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio);
void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
@@ -438,6 +440,7 @@ int nfs_check_flags(int);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
+extern struct workqueue_struct *nfslocaliod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_free_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
@@ -449,6 +452,51 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/* localio.c */
+extern void nfs_local_disable(struct nfs_client *);
+extern void nfs_local_probe(struct nfs_client *);
+extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
+ const struct cred *,
+ struct nfs_fh *,
+ const fmode_t);
+extern int nfs_local_doio(struct nfs_client *,
+ struct nfsd_file *,
+ struct nfs_pgio_header *,
+ const struct rpc_call_ops *);
+extern int nfs_local_commit(struct nfsd_file *,
+ struct nfs_commit_data *,
+ const struct rpc_call_ops *, int);
+extern bool nfs_server_is_local(const struct nfs_client *clp);
+
+#else /* CONFIG_NFS_LOCALIO */
+static inline void nfs_local_disable(struct nfs_client *clp) {}
+static inline void nfs_local_probe(struct nfs_client *clp) {}
+static inline struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, const fmode_t mode)
+{
+ return NULL;
+}
+static inline int nfs_local_doio(struct nfs_client *clp,
+ struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ return -EINVAL;
+}
+static inline int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return -EINVAL;
+}
+static inline bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return false;
+}
+#endif /* CONFIG_NFS_LOCALIO */
+
/* super.c */
extern const struct super_operations nfs_sops;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
@@ -505,7 +553,6 @@ extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio);
extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio);
-extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
@@ -528,7 +575,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags);
+ int how, int flags,
+ struct nfsd_file *localio);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
new file mode 100644
index 000000000000..c29cdf51c458
--- /dev/null
+++ b/fs/nfs/localio.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS client support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/vfs.h>
+#include <linux/file.h>
+#include <linux/inet.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/module.h>
+#include <linux/bvec.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "internal.h"
+#include "pnfs.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+struct nfs_local_kiocb {
+ struct kiocb kiocb;
+ struct bio_vec *bvec;
+ struct nfs_pgio_header *hdr;
+ struct work_struct work;
+ struct nfsd_file *localio;
+};
+
+struct nfs_local_fsync_ctx {
+ struct nfsd_file *localio;
+ struct nfs_commit_data *data;
+ struct work_struct work;
+ struct kref kref;
+ struct completion *done;
+};
+static void nfs_local_fsync_work(struct work_struct *work);
+
+static bool localio_enabled __read_mostly = true;
+module_param(localio_enabled, bool, 0644);
+
+static inline bool nfs_client_is_local(const struct nfs_client *clp)
+{
+ return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
+}
+
+bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return nfs_client_is_local(clp) && localio_enabled;
+}
+EXPORT_SYMBOL_GPL(nfs_server_is_local);
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const u8 *uuid = data;
+
+ encode_opaque_fixed(xdr, uuid, UUID_SIZE);
+}
+
+static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ void *result)
+{
+ /* void return */
+ return 0;
+}
+
+static const struct rpc_procinfo nfs_localio_procedures[] = {
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_encode = localio_xdr_enc_uuidargs,
+ .p_decode = localio_xdr_dec_uuidres,
+ .p_arglen = XDR_QUADLEN(UUID_SIZE),
+ .p_replen = 0,
+ .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_name = "UUID_IS_LOCAL",
+ },
+};
+
+static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
+static const struct rpc_version nfslocalio_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
+ .procs = nfs_localio_procedures,
+ .counts = nfs_localio_counts,
+};
+
+static const struct rpc_version *nfslocalio_version[] = {
+ [1] = &nfslocalio_version1,
+};
+
+extern const struct rpc_program nfslocalio_program;
+static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
+
+const struct rpc_program nfslocalio_program = {
+ .name = "nfslocalio",
+ .number = NFS_LOCALIO_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfslocalio_version),
+ .version = nfslocalio_version,
+ .stats = &nfslocalio_rpcstat,
+};
+
+/*
+ * nfs_local_enable - enable local i/o for an nfs_client
+ */
+static void nfs_local_enable(struct nfs_client *clp)
+{
+ spin_lock(&clp->cl_localio_lock);
+ set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
+ trace_nfs_local_enable(clp);
+ spin_unlock(&clp->cl_localio_lock);
+}
+
+/*
+ * nfs_local_disable - disable local i/o for an nfs_client
+ */
+void nfs_local_disable(struct nfs_client *clp)
+{
+ spin_lock(&clp->cl_localio_lock);
+ if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
+ trace_nfs_local_disable(clp);
+ nfs_uuid_invalidate_one_client(&clp->cl_uuid);
+ }
+ spin_unlock(&clp->cl_localio_lock);
+}
+
+/*
+ * nfs_init_localioclient - Initialise an NFS localio client connection
+ */
+static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
+{
+ struct rpc_clnt *rpcclient_localio;
+
+ rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
+ &nfslocalio_program, 1);
+
+ dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
+ __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
+
+ return rpcclient_localio;
+}
+
+static bool nfs_server_uuid_is_local(struct nfs_client *clp)
+{
+ u8 uuid[UUID_SIZE];
+ struct rpc_message msg = {
+ .rpc_argp = &uuid,
+ };
+ struct rpc_clnt *rpcclient_localio;
+ int status;
+
+ rpcclient_localio = nfs_init_localioclient(clp);
+ if (IS_ERR(rpcclient_localio))
+ return false;
+
+ export_uuid(uuid, &clp->cl_uuid.uuid);
+
+ msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
+ status = rpc_call_sync(rpcclient_localio, &msg, 0);
+ dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
+ __func__, status);
+ rpc_shutdown_client(rpcclient_localio);
+
+ /* Server is only local if it initialized required struct members */
+ if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
+ return false;
+
+ return true;
+}
+
+/*
+ * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
+ * - called after alloc_client and init_client (so cl_rpcclient exists)
+ * - this function is idempotent, it can be called for old or new clients
+ */
+void nfs_local_probe(struct nfs_client *clp)
+{
+ /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
+ if (!localio_enabled ||
+ clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
+ nfs_local_disable(clp);
+ return;
+ }
+
+ if (nfs_client_is_local(clp)) {
+ /* If already enabled, disable and re-enable */
+ nfs_local_disable(clp);
+ }
+
+ nfs_uuid_begin(&clp->cl_uuid);
+ if (nfs_server_uuid_is_local(clp))
+ nfs_local_enable(clp);
+ nfs_uuid_end(&clp->cl_uuid);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe);
+
+/*
+ * nfs_local_open_fh - open a local filehandle in terms of nfsd_file
+ *
+ * Returns a pointer to a struct nfsd_file or NULL
+ */
+struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, const fmode_t mode)
+{
+ struct nfsd_file *localio;
+ int status;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & ~(FMODE_READ | FMODE_WRITE))
+ return NULL;
+
+ localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
+ cred, fh, mode);
+ if (IS_ERR(localio)) {
+ status = PTR_ERR(localio);
+ trace_nfs_local_open_fh(fh, mode, status);
+ switch (status) {
+ case -ENOMEM:
+ case -ENXIO:
+ case -ENOENT:
+ /* Revalidate localio, will disable if unsupported */
+ nfs_local_probe(clp);
+ }
+ return NULL;
+ }
+ return localio;
+}
+EXPORT_SYMBOL_GPL(nfs_local_open_fh);
+
+static struct bio_vec *
+nfs_bvec_alloc_and_import_pagevec(struct page **pagevec,
+ unsigned int npages, gfp_t flags)
+{
+ struct bio_vec *bvec, *p;
+
+ bvec = kmalloc_array(npages, sizeof(*bvec), flags);
+ if (bvec != NULL) {
+ for (p = bvec; npages > 0; p++, pagevec++, npages--) {
+ p->bv_page = *pagevec;
+ p->bv_len = PAGE_SIZE;
+ p->bv_offset = 0;
+ }
+ }
+ return bvec;
+}
+
+static void
+nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
+{
+ kfree(iocb->bvec);
+ kfree(iocb);
+}
+
+static struct nfs_local_kiocb *
+nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
+ struct nfsd_file *localio, gfp_t flags)
+{
+ struct nfs_local_kiocb *iocb;
+
+ iocb = kmalloc(sizeof(*iocb), flags);
+ if (iocb == NULL)
+ return NULL;
+ iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec,
+ hdr->page_array.npages, flags);
+ if (iocb->bvec == NULL) {
+ kfree(iocb);
+ return NULL;
+ }
+ init_sync_kiocb(&iocb->kiocb, nfs_to->nfsd_f