summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/block/rbd.c8
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/debugfs.c20
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/inode.c41
-rw-r--r--fs/ceph/locks.c8
-rw-r--r--fs/ceph/mds_client.c280
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/mdsmap.c25
-rw-r--r--fs/ceph/metric.c18
-rw-r--r--fs/ceph/metric.h14
-rw-r--r--fs/ceph/quota.c58
-rw-r--r--fs/ceph/super.c14
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/ceph/xattr.c81
-rw-r--r--include/linux/ceph/auth.h68
-rw-r--r--include/linux/ceph/ceph_features.h11
-rw-r--r--include/linux/ceph/ceph_fs.h44
-rw-r--r--include/linux/ceph/decode.h8
-rw-r--r--include/linux/ceph/libceph.h11
-rw-r--r--include/linux/ceph/mdsmap.h2
-rw-r--r--include/linux/ceph/messenger.h285
-rw-r--r--include/linux/ceph/msgr.h66
-rw-r--r--include/linux/ceph/osdmap.h4
-rw-r--r--net/ceph/Kconfig3
-rw-r--r--net/ceph/Makefile3
-rw-r--r--net/ceph/auth.c408
-rw-r--r--net/ceph/auth_none.c5
-rw-r--r--net/ceph/auth_x.c298
-rw-r--r--net/ceph/auth_x_protocol.h3
-rw-r--r--net/ceph/ceph_common.c63
-rw-r--r--net/ceph/ceph_strings.c28
-rw-r--r--net/ceph/crypto.h3
-rw-r--r--net/ceph/decode.c101
-rw-r--r--net/ceph/messenger.c1958
-rw-r--r--net/ceph/messenger_v1.c1506
-rw-r--r--net/ceph/messenger_v2.c3443
-rw-r--r--net/ceph/mon_client.c320
-rw-r--r--net/ceph/osd_client.c111
-rw-r--r--net/ceph/osdmap.c45
41 files changed, 7224 insertions, 2192 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 2ed79b09439a..59cfe71d0b3a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
- if (!memcmp(&watchers[i].addr, &locker->info.addr,
- sizeof(locker->info.addr)) &&
+ /*
+ * Ignore addr->type while comparing. This mimics
+ * entity_addr_t::get_legacy_str() + strcmp().
+ */
+ if (ceph_addr_equal_no_type(&watchers[i].addr,
+ &locker->info.addr) &&
watchers[i].cookie == cookie) {
struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num),
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 35c83f65475b..950552944436 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
}
@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
- page = grab_cache_page_write_begin(mapping, index, 0);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ded4229c314a..255a512f1277 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc;
int removed = 0;
+ /* 'ci' being NULL means the remove have already occurred */
+ if (!ci) {
+ dout("%s: cap inode is NULL\n", __func__);
+ return;
+ }
+
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) {
- WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
+ WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
+ !mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL;
}
@@ -2746,7 +2754,7 @@ again:
goto out_unlock;
}
- if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
goto out_unlock;
@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 8) {
- u64 flush_tid;
- u32 caller_uid, caller_gid;
u32 pool_ns_len;
/* version >= 6 */
- ceph_decode_64_safe(&p, end, flush_tid, bad);
+ ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */
- ceph_decode_32_safe(&p, end, caller_uid, bad);
- ceph_decode_32_safe(&p, end, caller_gid, bad);
+ ceph_decode_skip_32(&p, end, bad); // caller_uid
+ ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) {
@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 11) {
- u32 flags;
/* version >= 10 */
- ceph_decode_32_safe(&p, end, flags, bad);
+ ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */
extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7a8fbe3e4751..66989c880adb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0;
}
+static int status_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
+ struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
+
+ seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
+ ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
+ seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
+
+ return 0;
+}
+
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric);
+DEFINE_SHOW_ATTRIBUTE(status);
/*
@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir,
fsc,
&caps_fops);
+
+ fsc->debugfs_status = debugfs_create_file("status",
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &status_fops);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a4d48370b2b3..858ee7362ff5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
- } else if (old_dir != new_dir) {
- err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
- new_dir);
- if (err)
- return err;
}
+ /* don't allow cross-quota renames */
+ if ((old_dir != new_dir) &&
+ (!ceph_quota_is_same_realm(old_dir, new_dir)))
+ return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 526faf4778ce..adc8fc3c5d85 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1315,15 +1315,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
- tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-
- in = ceph_get_inode(sb, tvino);
- if (IS_ERR(in)) {
- err = PTR_ERR(in);
- goto done;
- }
+ /* Should be filled in by handle_reply */
+ BUG_ON(!req->r_target_inode);
+ in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@@ -1333,11 +1328,13 @@ retry_lookup:
if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in));
+ req->r_target_inode = NULL;
if (in->i_state & I_NEW)
discard_new_inode(in);
+ else
+ iput(in);
goto done;
}
- req->r_target_inode = in;
if (in->i_state & I_NEW)
unlock_new_inode(in);
}
@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn;
struct inode *in;
int err = 0, skipped = 0, ret, i;
- struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
- u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+ u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 last_hash = 0;
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
- last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+ last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
}
}
@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex);
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
}
/* Craft a mask of needed caps given a set of requested statx attrs. */
-static int statx_to_caps(u32 want)
+static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED;
- if (want & (STATX_NLINK|STATX_CTIME))
- mask |= CEPH_CAP_LINK_SHARED;
+ if (want & (STATX_NLINK|STATX_CTIME)) {
+ /*
+ * The link count for directories depends on inode->i_subdirs,
+ * and that is only updated when Fs caps are held.
+ */
+ if (S_ISDIR(mode))
+ mask |= CEPH_CAP_FILE_SHARED;
+ else
+ mask |= CEPH_CAP_LINK_SHARED;
+ }
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
@@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) {
- err = ceph_do_getattr(inode, statx_to_caps(request_mask),
- flags & AT_STATX_FORCE_SYNC);
+ err = ceph_do_getattr(inode,
+ statx_to_caps(request_mask, inode->i_mode),
+ flags & AT_STATX_FORCE_SYNC);
if (err)
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 048a435a29be..fa8a847743d0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock,
};
-/**
+/*
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1;
}
-/**
+/*
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
*/
@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err;
}
-/**
+/*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
@@ -458,7 +458,7 @@ fail:
return err;
}
-/**
+/*
* Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks.
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8f1d7500a7ec..98c15ff2e599 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
/* Malformed reply? */
info->has_create_ino = false;
} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
- u8 struct_v, struct_compat;
- u32 len;
-
info->has_create_ino = true;
- ceph_decode_8_safe(p, end, struct_v, bad);
- ceph_decode_8_safe(p, end, struct_compat, bad);
- ceph_decode_32_safe(p, end, len, bad);
+ /* struct_v, struct_compat, and len */
+ ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
ceph_decode_64_safe(p, end, info->ino, bad);
ret = ceph_parse_deleg_inos(p, end, s);
if (ret)
@@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
kfree(req->r_path1);
kfree(req->r_path2);
+ put_cred(req->r_cred);
if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
@@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req);
insert_request(&mdsc->request_tree, req);
- req->r_uid = current_fsuid();
- req->r_gid = current_fsgid();
+ req->r_cred = get_current_cred();
if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
mdsc->oldest_tid = req->r_tid;
@@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
{
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
- int i = -1;
+ int i;
int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
@@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0)
invalidate = true;
if (ci->i_wrbuffer_ref > 0)
@@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
/*
* called under mdsc->mutex
*/
-static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
+static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_mds_request *req,
- int mds, bool drop_cap_releases)
+ bool drop_cap_releases)
{
+ int mds = session->s_mds;
+ struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg;
- struct ceph_mds_request_head *head;
+ struct ceph_mds_request_head_old *head;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
- int len;
+ int len, i;
u16 releases;
void *p, *end;
int ret;
+ bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free1;
}
- len = sizeof(*head) +
- pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+ if (legacy) {
+ /* Old style */
+ len = sizeof(*head);
+ } else {
+ /* New style: add gid_list and any later fields */
+ len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
+ (sizeof(u64) * req->r_cred->group_info->ngroups);
+ }
+
+ len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+
if (req->r_dentry_drop)
len += pathlen1;
if (req->r_old_dentry_drop)
@@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
- msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
- head = msg->front.iov_base;
- p = msg->front.iov_base + sizeof(*head);
+ /*
+ * The old ceph_mds_request_header didn't contain a version field, and
+ * one was added when we moved the message version from 3->4.
+ */
+ if (legacy) {
+ msg->hdr.version = cpu_to_le16(3);
+ head = msg->front.iov_base;
+ p = msg->front.iov_base + sizeof(*head);
+ } else {
+ struct ceph_mds_request_head *new_head = msg->front.iov_base;
+
+ msg->hdr.version = cpu_to_le16(4);
+ new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+ head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+ p = msg->front.iov_base + sizeof(*new_head);
+ }
+
end = msg->front.iov_base + msg->front.iov_len;
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
- head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
- head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+ head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+ req->r_cred->fsuid));
+ head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+ req->r_cred->fsgid));
head->ino = cpu_to_le64(req->r_deleg_ino);
head->args = req->r_args;
@@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_copy(&p, &ts, sizeof(ts));
}
+ /* gid list */
+ if (!legacy) {
+ ceph_encode_32(&p, req->r_cred->group_info->ngroups);
+ for (i = 0; i < req->r_cred->group_info->ngroups; i++)
+ ceph_encode_64(&p, from_kgid(&init_user_ns,
+ req->r_cred->group_info->gid[i]));
+ }
+
if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg);
msg = ERR_PTR(-ERANGE);
@@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
complete_all(&req->r_completion);
}
+static struct ceph_mds_request_head_old *
+find_old_request_head(void *p, u64 features)
+{
+ bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+ struct ceph_mds_request_head *new_head;
+
+ if (legacy)
+ return (struct ceph_mds_request_head_old *)p;
+ new_head = (struct ceph_mds_request_head *)p;
+ return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+}
+
/*
* called under mdsc->mutex
*/
-static int __prepare_send_request(struct ceph_mds_client *mdsc,
+static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
- int mds, bool drop_cap_releases)
+ bool drop_cap_releases)
{
- struct ceph_mds_request_head *rhead;
+ int mds = session->s_mds;
+ struct ceph_mds_client *mdsc = session->s_mdsc;
+ struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg;
int flags = 0;
@@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
+
/*
* Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message.
@@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
* d_move mangles the src name.
*/
msg = req->r_request;
- rhead = msg->front.iov_base;
+ rhead = find_old_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
flags = le32_to_cpu(rhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
- msg = create_request_message(mdsc, req, mds, drop_cap_releases);
+ msg = create_request_message(session, req, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
return PTR_ERR(msg);
}
req->r_request = msg;
- rhead = msg->front.iov_base;
+ rhead = find_old_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* called under mdsc->mutex
*/
-static int __send_request(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
+static int __send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
bool drop_cap_releases)
{
int err;
- err = __prepare_send_request(mdsc, req, session->s_mds,
- drop_cap_releases);
+ err = __prepare_send_request(session, req, drop_cap_releases);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
- if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
- err = -EACCES;
- goto out_session;
- }
/*
* We cannot queue async requests since the caps and delegated
* inodes are bound to the session. Just return -EJUKEBOX and
@@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
err = -EJUKEBOX;
goto out_session;
}
+
+ /*
+ * If the session has been REJECTED, then return a hard error,
+ * unless it's a CLEANRECOVER mount, in which case we'll queue
+ * it to the mdsc queue.
+ */
+ if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
+ if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
+ list_add(&req->r_wait, &mdsc->waiting_for_map);
+ else
+ err = -EACCES;
+ goto out_session;
+ }
+
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING) {
err = __open_session(mdsc, session);
@@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
- err = __send_request(mdsc, session, req, false);
+ err = __send_request(session, req, false);
out_session:
ceph_put_mds_session(session);
@@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
+ /* Must find target inode outside of mutexes to avoid deadlocks */
+ if ((err >= 0) && rinfo->head->is_target) {
+ struct inode *in;
+ struct ceph_vino tvino = {
+ .ino = le64_to_cpu(rinfo->targeti.in->ino),
+ .snap = le64_to_cpu(rinfo->targeti.in->snapid)
+ };
+
+ in = ceph_get_inode(mdsc->fsc->sb, tvino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ mutex_lock(&session->s_mutex);
+ goto out_err;
+ }
+ req->r_target_inode = in;
+ }
+
mutex_lock(&session->s_mutex);
if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
- __send_request(mdsc, session, req, true);
+ __send_request(session, req, true);
/*
* also re-send old requests when MDS enters reconnect stage. So that MDS
@@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
ceph_mdsc_release_dir_caps_no_check(req);
- __send_request(mdsc, session, req, true);
+ __send_request(session, req, true);
}
mutex_unlock(&mdsc->mutex);
}
@@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
if (!READ_ONCE(fsc->blocklisted))
return;
- if (fsc->last_auto_reconnect &&
- time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
- return;
-
pr_info("auto reconnect after blocklisted\n");
- fsc->last_auto_reconnect = jiffies;
ceph_force_reconnect(fsc->sb);
}
@@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
- if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
u32 epoch;
- u32 map_len;
u32 num_fs;
u32 mount_fscid = (u32)-1;
- u8 struct_v, struct_cv;
int err = -EINVAL;
ceph_decode_need(&p, end, sizeof(u32), bad);
@@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_fsmap epoch %u\n", epoch);
- ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
- struct_v = ceph_decode_8(&p);
- struct_cv = ceph_decode_8(&p);
- map_len = ceph_decode_32(&p);
-
- ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
- p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
+ /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
+ ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
- num_fs = ceph_decode_32(&p);
+ ceph_decode_32_safe(&p, end, num_fs, bad);
while (num_fs-- > 0) {
void *info_p, *info_end;
u32 info_len;
- u8 info_v, info_cv;
u32 fscid, namelen;
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
- info_v = ceph_decode_8(&p);
- info_cv = ceph_decode_8(&p);
+ p += 2; // info_v, info_cv
info_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, info_len, bad);
info_p = p;
@@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return;
}
- newmap = ceph_mdsmap_decode(&p, end);
+ newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad_unlock;
@@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con