From d927a595ab2f6de4e10b3e3962bc70ab61d8f907 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 25 Sep 2025 12:45:12 +0200 Subject: ceph: add trace points to the MDS client This patch adds trace points to the Ceph filesystem MDS client: - request submission (CEPH_MSG_CLIENT_REQUEST) and completion (CEPH_MSG_CLIENT_REPLY) - capabilities (CEPH_MSG_CLIENT_CAPS) These are the central pieces that are useful for analyzing MDS latency/performance problems from the client's perspective. In the long run, all doutc() calls should be replaced with tracepoints. This way, the Ceph filesystem can be traced at any time (without spamming the kernel log). Additionally, trace points can be used in BPF programs (which can even deference the pointer parameters and extract more values). Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 ++++ fs/ceph/mds_client.c | 20 ++++++++++++++++++-- fs/ceph/super.c | 3 +++ 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b1a8ff612c41..2f663972da99 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -18,6 +18,7 @@ #include "crypto.h" #include #include +#include /* * Capability management @@ -4452,6 +4453,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, session->s_mds, ceph_cap_op_name(op), vino.ino, vino.snap, inode, seq, issue_seq, mseq); + trace_ceph_handle_caps(mdsc, session, op, &vino, ceph_inode(inode), + seq, issue_seq, mseq); + mutex_lock(&session->s_mutex); if (!inode) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1740047aef0f..7e4eab824dae 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -24,6 +24,7 @@ #include #include #include +#include #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) @@ -3288,6 +3289,8 @@ static void complete_request(struct ceph_mds_client *mdsc, { req->r_end_latency = ktime_get(); + trace_ceph_mdsc_complete_request(mdsc, req); + if (req->r_callback) req->r_callback(mdsc, req); complete_all(&req->r_completion); @@ -3419,6 +3422,8 @@ static int __send_request(struct ceph_mds_session *session, { int err; + trace_ceph_mdsc_send_request(session, req); + err = __prepare_send_request(session, req, drop_cap_releases); if (!err) { ceph_msg_get(req->r_request); @@ -3470,6 +3475,8 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (mdsc->mdsmap->m_epoch == 0) { doutc(cl, "no mdsmap, waiting for map\n"); + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_no_mdsmap); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3491,6 +3498,8 @@ static void __do_request(struct ceph_mds_client *mdsc, goto finish; } doutc(cl, "no mds or not active, waiting for map\n"); + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_no_active_mds); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3536,9 +3545,11 @@ static void __do_request(struct ceph_mds_client *mdsc, * it to the mdsc queue. */ if (session->s_state == CEPH_MDS_SESSION_REJECTED) { - if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) + if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) { + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_rejected); list_add(&req->r_wait, &mdsc->waiting_for_map); - else + } else err = -EACCES; goto out_session; } @@ -3552,6 +3563,8 @@ static void __do_request(struct ceph_mds_client *mdsc, if (random) req->r_resend_mds = mds; } + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_session); list_add(&req->r_wait, &session->s_waiting); goto out_session; } @@ -3652,6 +3665,7 @@ static void __wake_requests(struct ceph_mds_client *mdsc, list_del_init(&req->r_wait); doutc(cl, " wake request %p tid %llu\n", req, req->r_tid); + trace_ceph_mdsc_resume_request(mdsc, req); __do_request(mdsc, req); } } @@ -3678,6 +3692,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) req->r_session->s_mds == mds) { doutc(cl, " kicking tid %llu\n", req->r_tid); list_del_init(&req->r_wait); + trace_ceph_mdsc_resume_request(mdsc, req); __do_request(mdsc, req); } } @@ -3724,6 +3739,7 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, doutc(cl, "submit_request on %p for inode %p\n", req, dir); mutex_lock(&mdsc->mutex); __register_request(mdsc, req, dir); + trace_ceph_mdsc_submit_request(mdsc, req); __do_request(mdsc, req); err = req->r_err; mutex_unlock(&mdsc->mutex); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f6bf24b5c683..7c1c1dac320d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -30,6 +30,9 @@ #include +#define CREATE_TRACE_POINTS +#include + static DEFINE_SPINLOCK(ceph_fsc_lock); static LIST_HEAD(ceph_fsc_list); -- cgit v1.2.3 From 87327d4eaaeafd3a2f6a1ffe84d6d25a96a2495d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Nov 2025 15:44:04 +0100 Subject: ceph: Amend checking to fix `make W=1` build breakage In a few cases the code compares 32-bit value to a SIZE_MAX derived constant which is much higher than that value on 64-bit platforms, Clang, in particular, is not happy about this fs/ceph/snap.c:377:10: error: result of comparison of constant 2305843009213693948 with expression of type 'u32' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 377 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) | ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by casting to size_t. Note, that possible replacement of SIZE_MAX by U32_MAX may lead to the behaviour changes on the corner cases. Signed-off-by: Andy Shevchenko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c65f2b202b2b..521507ea8260 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -374,7 +374,7 @@ static int build_snap_context(struct ceph_mds_client *mdsc, /* alloc new snap context */ err = -ENOMEM; - if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) + if ((size_t)num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) goto fail; snapc = ceph_create_snap_context(num, GFP_NOFS); if (!snapc) -- cgit v1.2.3 From 3680fc138e31d8a9e8e344d72c6692e921dbb4a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Dec 2025 22:51:04 -0800 Subject: ceph: stop selecting CRC32, CRYPTO, and CRYPTO_AES None of the CEPH_FS code directly requires CRC32, CRYPTO, or CRYPTO_AES. These options do get selected indirectly anyway via CEPH_LIB, which does need them, but there is no need for CEPH_FS to select them too. Signed-off-by: Eric Biggers Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 3e7def3d31c1..3d64a316ca31 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,9 +3,6 @@ config CEPH_FS tristate "Ceph distributed file system" depends on INET select CEPH_LIB - select CRC32 - select CRYPTO_AES - select CRYPTO select NETFS_SUPPORT select FS_ENCRYPTION_ALGS if FS_ENCRYPTION default n -- cgit v1.2.3