From 3f19b2ab97a97b413c24b66c67ae16daa4f56c35 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:16 +0000 Subject: vfs, afs, ext4: Make the inode hash table RCU searchable Make the inode hash table RCU searchable so that searches that want to access or modify an inode without taking a ref on that inode can do so without taking the inode hash table lock. The main thing this requires is some RCU annotation on the list manipulation operations. Inodes are already freed by RCU in most cases. Users of this interface must take care as the inode may be still under construction or may be being torn down around them. There are at least three instances where this can be of use: (1) Testing whether the inode number iunique() is going to return is currently unique (the iunique_lock is still held). (2) Ext4 date stamp updating. (3) AFS callback breaking. Signed-off-by: David Howells Acked-by: Konstantin Khlebnikov cc: linux-ext4@vger.kernel.org cc: linux-afs@lists.infradead.org --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 45cc10cdf6dd..5f9b2bb4b44f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3070,6 +3070,9 @@ extern struct inode *find_inode_nowait(struct super_block *, int (*match)(struct inode *, unsigned long, void *), void *data); +extern struct inode *find_inode_rcu(struct super_block *, unsigned long, + int (*)(struct inode *, void *), void *); +extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From 977e5f8ed0ab2786755f8d2a96b78a3c7320f7c4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Apr 2020 17:31:26 +0100 Subject: afs: Split the usage count on struct afs_server Split the usage count on the afs_server struct to have an active count that registers who's actually using it separately from the reference count on the object. This allows a future patch to dispatch polling probes without advancing the "unuse" time into the future each time we emit a probe, which would otherwise prevent unused server records from expiring. Included in this: (1) The latter part of afs_destroy_server() in which the RCU destruction of afs_server objects is invoked and the outstanding server count is decremented is split out into __afs_put_server(). (2) afs_put_server() now calls __afs_put_server() rather then setting the management timer. (3) The calls begun by afs_fs_give_up_all_callbacks() and afs_fs_get_capabilities() can now take a ref on the server record, so afs_destroy_server() can just drop its ref and needn't wait for the completion of these calls. They'll put the ref when they're done. (4) Because of (3), afs_fs_probe_done() no longer needs to wake up afs_destroy_server() with server->probe_outstanding. (5) afs_gc_servers can be simplified. It only needs to check if server->active is 0 rather than playing games with the refcount. (6) afs_manage_servers() can propose a server for gc if usage == 0 rather than if ref == 1. The gc is effected by (5). Signed-off-by: David Howells --- include/trace/events/afs.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index c612cabbc378..f9691f69b2d6 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -33,6 +33,7 @@ enum afs_server_trace { afs_server_trace_destroy, afs_server_trace_free, afs_server_trace_gc, + afs_server_trace_get_by_addr, afs_server_trace_get_by_uuid, afs_server_trace_get_caps, afs_server_trace_get_install, @@ -241,6 +242,7 @@ enum afs_cb_break_reason { EM(afs_server_trace_destroy, "DESTROY ") \ EM(afs_server_trace_free, "FREE ") \ EM(afs_server_trace_gc, "GC ") \ + EM(afs_server_trace_get_by_addr, "GET addr ") \ EM(afs_server_trace_get_by_uuid, "GET uuid ") \ EM(afs_server_trace_get_caps, "GET caps ") \ EM(afs_server_trace_get_install, "GET inst ") \ @@ -1271,26 +1273,30 @@ TRACE_EVENT(afs_cb_miss, ); TRACE_EVENT(afs_server, - TP_PROTO(struct afs_server *server, int usage, enum afs_server_trace reason), + TP_PROTO(struct afs_server *server, int ref, int active, + enum afs_server_trace reason), - TP_ARGS(server, usage, reason), + TP_ARGS(server, ref, active, reason), TP_STRUCT__entry( __field(unsigned int, server ) - __field(int, usage ) + __field(int, ref ) + __field(int, active ) __field(int, reason ) ), TP_fast_assign( __entry->server = server->debug_id; - __entry->usage = usage; + __entry->ref = ref; + __entry->active = active; __entry->reason = reason; ), - TP_printk("s=%08x %s u=%d", + TP_printk("s=%08x %s u=%d a=%d", __entry->server, __print_symbolic(__entry->reason, afs_server_traces), - __entry->usage) + __entry->ref, + __entry->active) ); #endif /* _TRACE_AFS_H */ -- cgit v1.2.3 From f6cbb368bcb0bc4fa7c11554d5293658bb4b26a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Apr 2020 15:10:00 +0100 Subject: afs: Actively poll fileservers to maintain NAT or firewall openings When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch Signed-off-by: David Howells --- include/trace/events/afs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f9691f69b2d6..19a07fbf35df 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -38,10 +38,12 @@ enum afs_server_trace { afs_server_trace_get_caps, afs_server_trace_get_install, afs_server_trace_get_new_cbi, + afs_server_trace_get_probe, afs_server_trace_give_up_cb, afs_server_trace_put_call, afs_server_trace_put_cbi, afs_server_trace_put_find_rsq, + afs_server_trace_put_probe, afs_server_trace_put_slist, afs_server_trace_put_slist_isort, afs_server_trace_put_uuid_rsq, @@ -247,10 +249,12 @@ enum afs_cb_break_reason { EM(afs_server_trace_get_caps, "GET caps ") \ EM(afs_server_trace_get_install, "GET inst ") \ EM(afs_server_trace_get_new_cbi, "GET cbi ") \ + EM(afs_server_trace_get_probe, "GET probe") \ EM(afs_server_trace_give_up_cb, "giveup-cb") \ EM(afs_server_trace_put_call, "PUT call ") \ EM(afs_server_trace_put_cbi, "PUT cbi ") \ EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \ + EM(afs_server_trace_put_probe, "PUT probe") \ EM(afs_server_trace_put_slist, "PUT slist") \ EM(afs_server_trace_put_slist_isort, "PUT isort") \ EM(afs_server_trace_put_uuid_rsq, "PUT u-req") \ -- cgit v1.2.3 From 7126ead910aa9fcc9e16e9e7a8c9179658261f1d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2020 16:49:08 +0100 Subject: afs: Remove the error argument from afs_protocol_error() Remove the error argument from afs_protocol_error() as it's always -EBADMSG. Signed-off-by: David Howells --- include/trace/events/afs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 19a07fbf35df..a6d8a9891164 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -994,24 +994,22 @@ TRACE_EVENT(afs_edit_dir, ); TRACE_EVENT(afs_protocol_error, - TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause), + TP_PROTO(struct afs_call *call, enum afs_eproto_cause cause), - TP_ARGS(call, error, cause), + TP_ARGS(call, cause), TP_STRUCT__entry( __field(unsigned int, call ) - __field(int, error ) __field(enum afs_eproto_cause, cause ) ), TP_fast_assign( __entry->call = call ? call->debug_id : 0; - __entry->error = error; __entry->cause = cause; ), - TP_printk("c=%08x r=%d %s", - __entry->call, __entry->error, + TP_printk("c=%08x %s", + __entry->call, __print_symbolic(__entry->cause, afs_eproto_causes)) ); -- cgit v1.2.3 From e49c7b2f6de7ff81ca34c56e4eeb4fa740c099f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Apr 2020 20:51:51 +0100 Subject: afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells --- include/trace/events/afs.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index a6d8a9891164..f4d66919fb22 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -642,7 +642,7 @@ TRACE_EVENT(afs_make_fs_calli, TRACE_EVENT(afs_make_fs_call1, TP_PROTO(struct afs_call *call, const struct afs_fid *fid, - const char *name), + const struct qstr *name), TP_ARGS(call, fid, name), @@ -654,8 +654,7 @@ TRACE_EVENT(afs_make_fs_call1, ), TP_fast_assign( - int __len = strlen(name); - __len = min(__len, 23); + unsigned int __len = min_t(unsigned int, name->len, 23); __entry->call = call->debug_id; __entry->op = call->operation_ID; if (fid) { @@ -665,7 +664,7 @@ TRACE_EVENT(afs_make_fs_call1, __entry->fid.vnode = 0; __entry->fid.unique = 0; } - memcpy(__entry->name, name, __len); + memcpy(__entry->name, name->name, __len); __entry->name[__len] = 0; ), @@ -680,7 +679,7 @@ TRACE_EVENT(afs_make_fs_call1, TRACE_EVENT(afs_make_fs_call2, TP_PROTO(struct afs_call *call, const struct afs_fid *fid, - const char *name, const char *name2), + const struct qstr *name, const struct qstr *name2), TP_ARGS(call, fid, name, name2), @@ -693,10 +692,8 @@ TRACE_EVENT(afs_make_fs_call2, ), TP_fast_assign( - int __len = strlen(name); - int __len2 = strlen(name2); - __len = min(__len, 23); - __len2 = min(__len2, 23); + unsigned int __len = min_t(unsigned int, name->len, 23); + unsigned int __len2 = min_t(unsigned int, name2->len, 23); __entry->call = call->debug_id; __entry->op = call->operation_ID; if (fid) { @@ -706,9 +703,9 @@ TRACE_EVENT(afs_make_fs_call2, __entry->fid.vnode = 0; __entry->fid.unique = 0; } - memcpy(__entry->name, name, __len); + memcpy(__entry->name, name->name, __len); __entry->name[__len] = 0; - memcpy(__entry->name2, name2, __len2); + memcpy(__entry->name2, name2->name, __len2); __entry->name2[__len2] = 0; ), -- cgit v1.2.3 From c3e9f888263bb4df11cbd623ceced02081cb2f9f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Apr 2020 17:26:41 +0100 Subject: afs: Implement client support for the YFSVL.GetCellName RPC op Implement client support for the YFSVL.GetCellName RPC operation by which YFS permits the canonical cell name to be queried from a VL server. Signed-off-by: David Howells --- include/trace/events/afs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f4d66919fb22..f320b3ad54da 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -111,6 +111,7 @@ enum afs_vl_operation { afs_VL_GetEntryByNameU = 527, /* AFS Get Vol Entry By Name operation ID */ afs_VL_GetAddrsU = 533, /* AFS Get FS server addresses */ afs_YFSVL_GetEndpoints = 64002, /* YFS Get FS & Vol server addresses */ + afs_YFSVL_GetCellName = 64014, /* YFS Get actual cell name */ afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ }; @@ -143,6 +144,7 @@ enum afs_eproto_cause { afs_eproto_bad_status, afs_eproto_cb_count, afs_eproto_cb_fid_count, + afs_eproto_cellname_len, afs_eproto_file_type, afs_eproto_ibulkst_cb_count, afs_eproto_ibulkst_count, @@ -316,6 +318,7 @@ enum afs_cb_break_reason { EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \ EM(afs_VL_GetAddrsU, "VL.GetAddrsU") \ EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \ + EM(afs_YFSVL_GetCellName, "YFSVL.GetCellName") \ E_(afs_VL_GetCapabilities, "VL.GetCapabilities") #define afs_edit_dir_ops \ @@ -345,6 +348,7 @@ enum afs_cb_break_reason { EM(afs_eproto_bad_status, "BadStatus") \ EM(afs_eproto_cb_count, "CbCount") \ EM(afs_eproto_cb_fid_count, "CbFidCount") \ + EM(afs_eproto_cellname_len, "CellNameLen") \ EM(afs_eproto_file_type, "FileTYpe") \ EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \ EM(afs_eproto_ibulkst_count, "IBS.FidCount") \ -- cgit v1.2.3 From cca37d45d547434144409ae648a19b7eb6db5eb4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Apr 2020 17:02:04 +0100 Subject: afs: Add a tracepoint to track the lifetime of the afs_volume struct Add a tracepoint to track the lifetime of the afs_volume struct. Signed-off-by: David Howells --- include/trace/events/afs.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f320b3ad54da..5f0c1cf1ea13 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -50,6 +50,23 @@ enum afs_server_trace { afs_server_trace_update, }; +enum afs_volume_trace { + afs_volume_trace_alloc, + afs_volume_trace_free, + afs_volume_trace_get_alloc_sbi, + afs_volume_trace_get_cell_insert, + afs_volume_trace_get_new_op, + afs_volume_trace_get_query_alias, + afs_volume_trace_put_cell_dup, + afs_volume_trace_put_cell_root, + afs_volume_trace_put_destroy_sbi, + afs_volume_trace_put_free_fc, + afs_volume_trace_put_put_op, + afs_volume_trace_put_query_alias, + afs_volume_trace_put_validate_fc, + afs_volume_trace_remove, +}; + enum afs_fs_operation { afs_FS_FetchData = 130, /* AFS Fetch file data */ afs_FS_FetchACL = 131, /* AFS Fetch file ACL */ @@ -262,6 +279,22 @@ enum afs_cb_break_reason { EM(afs_server_trace_put_uuid_rsq, "PUT u-req") \ E_(afs_server_trace_update, "UPDATE") +#define afs_volume_traces \ + EM(afs_volume_trace_alloc, "ALLOC ") \ + EM(afs_volume_trace_free, "FREE ") \ + EM(afs_volume_trace_get_alloc_sbi, "GET sbi-alloc ") \ + EM(afs_volume_trace_get_cell_insert, "GET cell-insrt") \ + EM(afs_volume_trace_get_new_op, "GET op-new ") \ + EM(afs_volume_trace_get_query_alias, "GET cell-alias") \ + EM(afs_volume_trace_put_cell_dup, "PUT cell-dup ") \ + EM(afs_volume_trace_put_cell_root, "PUT cell-root ") \ + EM(afs_volume_trace_put_destroy_sbi, "PUT sbi-destry") \ + EM(afs_volume_trace_put_free_fc, "PUT fc-free ") \ + EM(afs_volume_trace_put_put_op, "PUT op-put ") \ + EM(afs_volume_trace_put_query_alias, "PUT cell-alias") \ + EM(afs_volume_trace_put_validate_fc, "PUT fc-validat") \ + E_(afs_volume_trace_remove, "REMOVE ") + #define afs_fs_operations \ EM(afs_FS_FetchData, "FS.FetchData") \ EM(afs_FS_FetchStatus, "FS.FetchStatus") \ @@ -1302,6 +1335,29 @@ TRACE_EVENT(afs_server, __entry->active) ); +TRACE_EVENT(afs_volume, + TP_PROTO(afs_volid_t vid, int ref, enum afs_volume_trace reason), + + TP_ARGS(vid, ref, reason), + + TP_STRUCT__entry( + __field(afs_volid_t, vid ) + __field(int, ref ) + __field(enum afs_volume_trace, reason ) + ), + + TP_fast_assign( + __entry->vid = vid; + __entry->ref = ref; + __entry->reason = reason; + ), + + TP_printk("V=%llx %s u=%d", + __entry->vid, + __print_symbolic(__entry->reason, afs_volume_traces), + __entry->ref) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3