diff options
41 files changed, 7224 insertions, 2192 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2ed79b09439a..59cfe71d0b3a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev, sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); for (i = 0; i < num_watchers; i++) { - if (!memcmp(&watchers[i].addr, &locker->info.addr, - sizeof(locker->info.addr)) && + /* + * Ignore addr->type while comparing. This mimics + * entity_addr_t::get_legacy_str() + strcmp(). + */ + if (ceph_addr_equal_no_type(&watchers[i].addr, + &locker->info.addr) && watchers[i].cookie == cookie) { struct rbd_client_id cid = { .gid = le64_to_cpu(watchers[i].name.num), diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 35c83f65475b..950552944436 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); - if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { if (ci->i_wrbuffer_ref > 0) { pr_warn_ratelimited( "writepage_start %p %lld forced umount\n", @@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page) struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); - if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { dout(" page %p forced umount\n", page); return ERR_PTR(-EIO); } @@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); for (;;) { - page = grab_cache_page_write_begin(mapping, index, 0); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { r = -ENOMEM; break; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ded4229c314a..255a512f1277 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; - struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc; int removed = 0; + /* 'ci' being NULL means the remove have already occurred */ + if (!ci) { + dout("%s: cap inode is NULL\n", __func__); + return; + } + dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc; + /* remove from inode's cap rbtree, and clear auth cap */ rb_erase(&cap->ci_node, &ci->i_caps); if (ci->i_auth_cap == cap) { - WARN_ON_ONCE(!list_empty(&ci->i_dirty_item)); + WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) && + !mdsc->fsc->blocklisted); ci->i_auth_cap = NULL; } @@ -2746,7 +2754,7 @@ again: goto out_unlock; } - if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { dout("get_cap_refs %p forced umount\n", inode); ret = -EIO; goto out_unlock; @@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session, } if (msg_version >= 8) { - u64 flush_tid; - u32 caller_uid, caller_gid; u32 pool_ns_len; /* version >= 6 */ - ceph_decode_64_safe(&p, end, flush_tid, bad); + ceph_decode_skip_64(&p, end, bad); // flush_tid /* version >= 7 */ - ceph_decode_32_safe(&p, end, caller_uid, bad); - ceph_decode_32_safe(&p, end, caller_gid, bad); + ceph_decode_skip_32(&p, end, bad); // caller_uid + ceph_decode_skip_32(&p, end, bad); // caller_gid /* version >= 8 */ ceph_decode_32_safe(&p, end, pool_ns_len, bad); if (pool_ns_len > 0) { @@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, } if (msg_version >= 11) { - u32 flags; /* version >= 10 */ - ceph_decode_32_safe(&p, end, flags, bad); + ceph_decode_skip_32(&p, end, bad); // flags /* version >= 11 */ extra_info.dirstat_valid = true; ceph_decode_64_safe(&p, end, extra_info.nfiles, bad); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 7a8fbe3e4751..66989c880adb 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr) return 0; } +static int status_show(struct seq_file *s, void *p) +{ + struct ceph_fs_client *fsc = s->private; + struct ceph_entity_inst *inst = &fsc->client->msgr.inst; + struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client); + + seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name), + ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce)); + seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false"); + + return 0; +} + DEFINE_SHOW_ATTRIBUTE(mdsmap); DEFINE_SHOW_ATTRIBUTE(mdsc); DEFINE_SHOW_ATTRIBUTE(caps); DEFINE_SHOW_ATTRIBUTE(mds_sessions); DEFINE_SHOW_ATTRIBUTE(metric); +DEFINE_SHOW_ATTRIBUTE(status); /* @@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) fsc->client->debugfs_dir, fsc, &caps_fops); + + fsc->debugfs_status = debugfs_create_file("status", + 0400, + fsc->client->debugfs_dir, + fsc, + &status_fops); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a4d48370b2b3..858ee7362ff5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, op = CEPH_MDS_OP_RENAMESNAP; else return -EROFS; - } else if (old_dir != new_dir) { - err = ceph_quota_check_rename(mdsc, d_inode(old_dentry), - new_dir); - if (err) - return err; } + /* don't allow cross-quota renames */ + if ((old_dir != new_dir) && + (!ceph_quota_is_same_realm(old_dir, new_dir))) + return -EXDEV; dout("rename dir %p dentry %p to dir %p dentry %p\n", old_dir, old_dentry, new_dir, new_dentry); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 526faf4778ce..adc8fc3c5d85 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1315,15 +1315,10 @@ retry_lookup: } if (rinfo->head->is_target) { - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - - in = ceph_get_inode(sb, tvino); - if (IS_ERR(in)) { - err = PTR_ERR(in); - goto done; - } + /* Should be filled in by handle_reply */ + BUG_ON(!req->r_target_inode); + in = req->r_target_inode; err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && @@ -1333,11 +1328,13 @@ retry_lookup: if (err < 0) { pr_err("ceph_fill_inode badness %p %llx.%llx\n", in, ceph_vinop(in)); + req->r_target_inode = NULL; if (in->i_state & I_NEW) discard_new_inode(in); + else + iput(in); goto done; } - req->r_target_inode = in; if (in->i_state & I_NEW) unlock_new_inode(in); } @@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct dentry *dn; struct inode *in; int err = 0, skipped = 0, ret, i; - struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; - u32 frag = le32_to_cpu(rhead->args.readdir.frag); + u32 frag = le32_to_cpu(req->r_args.readdir.frag); u32 last_hash = 0; u32 fpos_offset; struct ceph_readdir_cache_control cache_ctl = {}; @@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, } else if (rinfo->offset_hash) { /* mds understands offset_hash */ WARN_ON_ONCE(req->r_readdir_offset != 2); - last_hash = le32_to_cpu(rhead->args.readdir.offset_hash); + last_hash = le32_to_cpu(req->r_args.readdir.offset_hash); } } @@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode) mutex_lock(&ci->i_truncate_mutex); - if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n", inode, ceph_ino(inode)); mapping_set_error(inode->i_mapping, -EIO); @@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask) } /* Craft a mask of needed caps given a set of requested statx attrs. */ -static int statx_to_caps(u32 want) +static int statx_to_caps(u32 want, umode_t mode) { int mask = 0; if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) mask |= CEPH_CAP_AUTH_SHARED; - if (want & (STATX_NLINK|STATX_CTIME)) - mask |= CEPH_CAP_LINK_SHARED; + if (want & (STATX_NLINK|STATX_CTIME)) { + /* + * The link count for directories depends on inode->i_subdirs, + * and that is only updated when Fs caps are held. + */ + if (S_ISDIR(mode)) + mask |= CEPH_CAP_FILE_SHARED; + else + mask |= CEPH_CAP_LINK_SHARED; + } if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| STATX_BLOCKS)) @@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat, /* Skip the getattr altogether if we're asked not to sync */ if (!(flags & AT_STATX_DONT_SYNC)) { - err = ceph_do_getattr(inode, statx_to_caps(request_mask), - flags & AT_STATX_FORCE_SYNC); + err = ceph_do_getattr(inode, + statx_to_caps(request_mask, inode->i_mode), + flags & AT_STATX_FORCE_SYNC); if (err) return err; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 048a435a29be..fa8a847743d0 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = { .fl_release_private = ceph_fl_release_lock, }; -/** +/* * Implement fcntl and flock locking functions. */ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, @@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl) return 1; } -/** +/* * Attempt to set an fcntl lock. * For now, this just goes away to the server. Later it may be more awesome. */ @@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, return err; } -/** +/* * Encode the flock and fcntl locks for the given inode into the ceph_filelock * array. Must be called with inode->i_lock already held. * If we encounter more of a specific lock type than expected, return -ENOSPC. @@ -458,7 +458,7 @@ fail: return err; } -/** +/* * Copy the encoded flock and fcntl locks into the pagelist. * Format is: #fcntl locks, sequential fcntl locks, #flock locks, * sequential flock locks. diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8f1d7500a7ec..98c15ff2e599 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end, /* Malformed reply? */ info->has_create_ino = false; } else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) { - u8 struct_v, struct_compat; - u32 len; - info->has_create_ino = true; - ceph_decode_8_safe(p, end, struct_v, bad); - ceph_decode_8_safe(p, end, struct_compat, bad); - ceph_decode_32_safe(p, end, len, bad); + /* struct_v, struct_compat, and len */ + ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad); ceph_decode_64_safe(p, end, info->ino, bad); ret = ceph_parse_deleg_inos(p, end, s); if (ret) @@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref) } kfree(req->r_path1); kfree(req->r_path2); + put_cred(req->r_cred); if (req->r_pagelist) ceph_pagelist_release(req->r_pagelist); put_request_session(req); @@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc, ceph_mdsc_get_request(req); insert_request(&mdsc->request_tree, req); - req->r_uid = current_fsuid(); - req->r_gid = current_fsgid(); + req->r_cred = get_current_cred(); if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) mdsc->oldest_tid = req->r_tid; @@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 { struct ceph_msg *msg; struct ceph_mds_session_head *h; - int i = -1; + int i; int extra_bytes = 0; int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; @@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_cap_flush *cf; struct ceph_mds_client *mdsc = fsc->mdsc; - if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) { if (inode->i_data.nrpages > 0) invalidate = true; if (ci->i_wrbuffer_ref > 0) @@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, /* * called under mdsc->mutex */ -static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, +static struct ceph_msg *create_request_message(struct ceph_mds_session *session, struct ceph_mds_request *req, - int mds, bool drop_cap_releases) + bool drop_cap_releases) { + int mds = session->s_mds; + struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_msg *msg; - struct ceph_mds_request_head *head; + struct ceph_mds_request_head_old *head; const char *path1 = NULL; const char *path2 = NULL; u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; bool freepath1 = false, freepath2 = false; - int len; + int len, i; u16 releases; void *p, *end; int ret; + bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); ret = set_request_path_attr(req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, @@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free1; } - len = sizeof(*head) + - pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + + if (legacy) { + /* Old style */ + len = sizeof(*head); + } else { + /* New style: add gid_list and any later fields */ + len = sizeof(struct ceph_mds_request_head) + sizeof(u32) + + (sizeof(u64) * req->r_cred->group_info->ngroups); + } + + len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * (!!req->r_inode_drop + !!req->r_dentry_drop + !!req->r_old_inode_drop + !!req->r_old_dentry_drop); + if (req->r_dentry_drop) len += pathlen1; if (req->r_old_dentry_drop) @@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free2; } - msg->hdr.version = cpu_to_le16(2); msg->hdr.tid = cpu_to_le64(req->r_tid); - head = msg->front.iov_base; - p = msg->front.iov_base + sizeof(*head); + /* + * The old ceph_mds_request_header didn't contain a version field, and + * one was added when we moved the message version from 3->4. + */ + if (legacy) { + msg->hdr.version = cpu_to_le16(3); + head = msg->front.iov_base; + p = msg->front.iov_base + sizeof(*head); + } else { + struct ceph_mds_request_head *new_head = msg->front.iov_base; + + msg->hdr.version = cpu_to_le16(4); + new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); + head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid; + p = msg->front.iov_base + sizeof(*new_head); + } + end = msg->front.iov_base + msg->front.iov_len; head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); head->op = cpu_to_le32(req->r_op); - head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid)); - head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid)); + head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, + req->r_cred->fsuid)); + head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, + req->r_cred->fsgid)); head->ino = cpu_to_le64(req->r_deleg_ino); head->args = req->r_args; @@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ceph_encode_copy(&p, &ts, sizeof(ts)); } + /* gid list */ + if (!legacy) { + ceph_encode_32(&p, req->r_cred->group_info->ngroups); + for (i = 0; i < req->r_cred->group_info->ngroups; i++) + ceph_encode_64(&p, from_kgid(&init_user_ns, + req->r_cred->group_info->gid[i])); + } + if (WARN_ON_ONCE(p > end)) { ceph_msg_put(msg); msg = ERR_PTR(-ERANGE); @@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc, complete_all(&req->r_completion); } +static struct ceph_mds_request_head_old * +find_old_request_head(void *p, u64 features) +{ + bool legacy = !(features & CEPH_FEATURE_FS_BTIME); + struct ceph_mds_request_head *new_head; + + if (legacy) + return (struct ceph_mds_request_head_old *)p; + new_head = (struct ceph_mds_request_head *)p; + return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid; +} + /* * called under mdsc->mutex */ -static int __prepare_send_request(struct ceph_mds_client *mdsc, +static int __prepare_send_request(struct ceph_mds_session *session, struct ceph_mds_request *req, - int mds, bool drop_cap_releases) + bool drop_cap_releases) { - struct ceph_mds_request_head *rhead; + int mds = session->s_mds; + struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_mds_request_head_old *rhead; struct ceph_msg *msg; int flags = 0; @@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { void *p; + /* * Replay. Do not regenerate message (and rebuild * paths, etc.); just use the original message. @@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, * d_move mangles the src name. */ msg = req->r_request; - rhead = msg->front.iov_base; + rhead = find_old_request_head(msg->front.iov_base, + session->s_con.peer_features); flags = le32_to_cpu(rhead->flags); flags |= CEPH_MDS_FLAG_REPLAY; @@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, ceph_msg_put(req->r_request); req->r_request = NULL; } - msg = create_request_message(mdsc, req, mds, drop_cap_releases); + msg = create_request_message(session, req, drop_cap_releases); if (IS_ERR(msg)) { req->r_err = PTR_ERR(msg); return PTR_ERR(msg); } req->r_request = msg; - rhead = msg->front.iov_base; + rhead = find_old_request_head(msg->front.iov_base, + session->s_con.peer_features); rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) flags |= CEPH_MDS_FLAG_REPLAY; @@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, /* * called under mdsc->mutex */ -static int __send_request(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, +static int __send_request(struct ceph_mds_session *session, struct ceph_mds_request *req, bool drop_cap_releases) { int err; - err = __prepare_send_request(mdsc, req, session->s_mds, - drop_cap_releases); + err = __prepare_send_request(session, req, drop_cap_releases); if (!err) { ceph_msg_get(req->r_request); ceph_con_send(&session->s_con, req->r_request); @@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc, ceph_session_state_name(session->s_state)); if (session->s_state != CEPH_MDS_SESSION_OPEN && session->s_state != CEPH_MDS_SESSION_HUNG) { - if (session->s_state == CEPH_MDS_SESSION_REJECTED) { - err = -EACCES; - goto out_session; - } /* * We cannot queue async requests since the caps and delegated * inodes are bound to the session. Just return -EJUKEBOX and @@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc, err = -EJUKEBOX; goto out_session; } + + /* + * If the session has been REJECTED, then return a hard error, + * unless it's a CLEANRECOVER mount, in which case we'll queue + * it to the mdsc queue. + */ + if (session->s_state == CEPH_MDS_SESSION_REJECTED) { + if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) + list_add(&req->r_wait, &mdsc->waiting_for_map); + else + err = -EACCES; + goto out_session; + } + if (session->s_state == CEPH_MDS_SESSION_NEW || session->s_state == CEPH_MDS_SESSION_CLOSING) { err = __open_session(mdsc, session); @@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc, if (req->r_request_started == 0) /* note request start time */ req->r_request_started = jiffies; - err = __send_request(mdsc, session, req, false); + err = __send_request(session, req, false); out_session: ceph_put_mds_session(session); @@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features); mutex_unlock(&mdsc->mutex); + /* Must find target inode outside of mutexes to avoid deadlocks */ + if ((err >= 0) && rinfo->head->is_target) { + struct inode *in; + struct ceph_vino tvino = { + .ino = le64_to_cpu(rinfo->targeti.in->ino), + .snap = le64_to_cpu(rinfo->targeti.in->snapid) + }; + + in = ceph_get_inode(mdsc->fsc->sb, tvino); + if (IS_ERR(in)) { + err = PTR_ERR(in); + mutex_lock(&session->s_mutex); + goto out_err; + } + req->r_target_inode = in; + } + mutex_lock(&session->s_mutex); if (err < 0) { pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); @@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) - __send_request(mdsc, session, req, true); + __send_request(session, req, true); /* * also re-send old requests when MDS enters reconnect stage. So that MDS @@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, ceph_mdsc_release_dir_caps_no_check(req); - __send_request(mdsc, session, req, true); + __send_request(session, req, true); } mutex_unlock(&mdsc->mutex); } @@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) if (!READ_ONCE(fsc->blocklisted)) return; - if (fsc->last_auto_reconnect && - time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30)) - return; - pr_info("auto reconnect after blocklisted\n"); - fsc->last_auto_reconnect = jiffies; ceph_force_reconnect(fsc->sb); } @@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush; - if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) + if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; u32 epoch; - u32 map_len; u32 num_fs; u32 mount_fscid = (u32)-1; - u8 struct_v, struct_cv; int err = -EINVAL; ceph_decode_need(&p, end, sizeof(u32), bad); @@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) dout("handle_fsmap epoch %u\n", epoch); - ceph_decode_need(&p, end, 2 + sizeof(u32), bad); - struct_v = ceph_decode_8(&p); - struct_cv = ceph_decode_8(&p); - map_len = ceph_decode_32(&p); - - ceph_decode_need(&p, end, sizeof(u32) * 3, bad); - p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */ + /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */ + ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad); - num_fs = ceph_decode_32(&p); + ceph_decode_32_safe(&p, end, num_fs, bad); while (num_fs-- > 0) { void *info_p, *info_end; u32 info_len; - u8 info_v, info_cv; u32 fscid, namelen; ceph_decode_need(&p, end, 2 + sizeof(u32), bad); - info_v = ceph_decode_8(&p); - info_cv = ceph_decode_8(&p); + p += 2; // info_v, info_cv info_len = ceph_decode_32(&p); ceph_decode_need(&p, end, info_len, bad); info_p = p; @@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; } - newmap = ceph_mdsmap_decode(&p, end); + newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client)); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad_unlock; @@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con |
