summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorBenjamin Coddington <bcodding@redhat.com>2024-11-22 10:11:12 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-12-05 14:03:09 +0100
commitbff8460376e53de3f549e90666f9c0e9d2c6c981 (patch)
treea7e881843e9b76ed4fd8b8409e647eb45a86dbd8 /fs
parentfaa4bacfaeed827a4ca8cb8529a3ce65a9e8ef46 (diff)
downloadlinux-bff8460376e53de3f549e90666f9c0e9d2c6c981.tar.gz
linux-bff8460376e53de3f549e90666f9c0e9d2c6c981.tar.bz2
linux-bff8460376e53de3f549e90666f9c0e9d2c6c981.zip
nfs/blocklayout: Limit repeat device registration on failure
[ Upstream commit 614733f9441ed53bb442d4734112ec1e24bd6da7 ] Every pNFS SCSI IO wants to do LAYOUTGET, then within the layout find the device which can drive GETDEVINFO, then finally may need to prep the device with a reservation. This slow work makes a mess of IO latencies if one of the later steps is going to fail for awhile. If we're unable to register a SCSI device, ensure we mark the device as unavailable so that it will timeout and be re-added via GETDEVINFO. This avoids repeated doomed attempts to register a device in the IO path. Add some clarifying comments as well. Fixes: d869da91cccb ("nfs/blocklayout: Fix premature PR key unregistration") Signed-off-by: Benjamin Coddington <bcodding@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0becdec12970..47189476b553 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -571,19 +571,32 @@ retry:
if (!node)
return ERR_PTR(-ENODEV);
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
unsigned long end = jiffies;
unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
goto out_put;
}
- if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
goto out_put;
+ }
return node;