// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/debugfs.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
/*
* Interact with Ceph monitor cluster. Handle requests for new map
* versions, and periodically resend as needed. Also implement
* statfs() and umount().
*
* A small cluster of Ceph "monitors" are responsible for managing critical
* cluster configuration and state information. An odd number (e.g., 3, 5)
* of cmon daemons use a modified version of the Paxos part-time parliament
* algorithm to manage the MDS map (mds cluster membership), OSD map, and
* list of clients who have mounted the file system.
*
* We maintain an open, active session with a monitor at all times in order to
* receive timely MDSMap updates. We periodically send a keepalive byte on the
* TCP socket to ensure we detect a failure. If the connection does break, we
* randomly hunt for a new monitor. Once the connection is reestablished, we
* resend any outstanding requests.
*/
static const struct ceph_connection_operations mon_con_ops;
static int __validate_auth(struct ceph_mon_client *monc);
static int decode_mon_info(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
void *mon_info_end;
u32 struct_len;
u8 struct_v;
int ret;
ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
&struct_len);
if (ret)
return ret;
mon_info_end = *p + struct_len;
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
if (ret)
return ret;
*p = mon_info_end;
return 0;
e_inval:
return -EINVAL;
}
/*
* Decode a monmap blob (e.g., during mount).
*
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
*/
static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_monmap *monmap = NULL;
struct ceph_fsid fsid;
u32 struct_len;
int blob_len;
int num_mon;
u8 struct_v;
u32 epoch;
int ret;
int i;
ceph_decode_32_safe(p, end, blob_len, e_inval);
ceph_decode_need(p, end, blob_len, e_inval);
ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
if (ret)
goto fail;
dout("%s struct_v %d\n", __func__, struct_v);
ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
ceph_decode_32_safe(p, end, epoch, e_inval);
if (struct_v >= 6) {
u32 feat_struct_len;
u8 feat_struct_v;
*p += sizeof(struct ceph_timespec); /* skip last_changed */
*p += sizeof(struct ceph_timespec); /* skip created */
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
*p += feat_struct_len; /* skip persistent_features */
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
*p += feat_struct_len; /* skip optional_features */
}
ceph_decode_32_safe(p, end, num_mon, e_inval);
dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
num_mon);
if (num_mon > CEPH_MAX_MON)
goto e_inval;
monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
if (!monmap) {
ret = -ENOMEM;
goto fail;
}
monmap->fsid = fsid;
monmap->epoch = epoch;
monmap->num_mon = num_mon;
/* legacy_mon_addr map or mon_info map */
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monmap->mon_inst[i];
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
if (struct_v >= 6)
ret = decode_mon_info(p, end, msgr2, &inst->addr);
else
ret = ceph_decode_entity_addr(p, end, &inst->addr);
if (ret)
goto fail;
dout("%s mon%d addr %s\n", __func__, i,
ceph_pr_addr(&inst->addr));
}
return monmap;
e_inval:
ret = -EINVAL;
fail:
kfree(monmap);
return ERR_PTR(ret);
}
/*
* return true if *addr is included in the monmap.
*/
int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
{
int i;
for (i = 0; i < m->num_mon; i++) {
if (ceph_addr_equal_no_type(addr, &m->mon_inst[i].addr))
return 1;
}
return 0;
}
/*
* Send an auth request.
*/
static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
{
monc->pending_auth = 1;
monc->m_auth->front.iov_len = len;
monc->m_auth->hdr.front_len = cpu_to_le32(len);
ceph_msg_revoke(monc->m_auth);
ceph_msg_get(monc->m_auth); /* keep our ref */
ceph_con_send(&monc->con, monc->m_auth);
}
/*
* Close monitor session, if any.
*/
static void __close_session(struct ceph_mon_client *monc)
{
dout("__close_session closing mon%d\n", monc->cur_mon);
ceph_msg_revoke(monc->m_auth);
ceph_msg_revoke_incoming(monc->m_auth_reply);
ceph_msg_revoke(monc->m_subscribe);
ceph_msg_revoke_incoming(monc->m_subscribe_ack);
ceph_con_close(&monc->con);
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
}
/*
* Pick a new monitor at random and set cur_mon. If we are repicking
* (i.e. cur_mon is already set), be sure to pick a different one.
*/
static void
|