// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/bvec.h>
#include <linux/crc32c.h>
#include <linux/net.h>
#include <linux/socket.h>
#include <net/sock.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
/*
* If @buf is NULL, discard up to @len bytes.
*/
static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
{
struct kvec iov = {buf, len};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
if (!buf)
msg.msg_flags |= MSG_TRUNC;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
return r;
}
static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
int page_offset, size_t length)
{
struct bio_vec bvec;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
BUG_ON(page_offset + length > PAGE_SIZE);
bvec_set_page(&bvec, page, length, page_offset);
iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
return r;
}
/*
* write something. @more is true if caller will be sending more data
* shortly.
*/
static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
size_t kvlen, size_t len, bool more)
{
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
if (more)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
if (r == -EAGAIN)
r = 0;
return r;
}
/*
* @more: MSG_MORE or 0.
*/
static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int more)
{
struct msghdr msg = {
.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more,
};
struct bio_vec bvec;
int ret;
/*
* MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0,
* we need to fall back to sendmsg if that's the case.
*
* Same goes for slab pages: skb_can_coalesce() allows
* coalescing neighboring slab objects into a single frag which
* triggers one of hardened usercopy checks.
*/
if (sendpage_ok(page))
msg.msg_flags |= MSG_SPLICE_PAGES;
bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
ret = sock_sendmsg(sock, &msg);
if (ret == -EAGAIN)
ret = 0;
return ret;
}
static void con_out_kvec_reset(struct ceph_connection *con)
{
BUG_ON(con->v1.out_skip);
con->v1.out_kvec_left = 0;
con->v1.out_kvec_bytes = 0;
con->v1.out_kvec_cur = &con->v1.out_kvec[0];
}
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
int index = con->v1.out_kvec_left;
BUG_ON(con->v1.out_skip);
BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec));
con->v1.out_kvec[index].iov_len = size;
con->v1.out_kvec[index].iov_base = data;
con->v1.out_kvec_left++;
con->v1.out_kvec_bytes += size;
}
/*
* Chop off a kvec from the end. Return residual number of bytes for
* that kvec, i.e. how many bytes would have been written if the kvec
* hadn't been nuked.
*/
static int con_out_kvec_skip(struct ceph_connection *con)
{
int skip = 0;
if (con->v1.out_kvec_bytes > 0) {
skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len;
BUG_ON(con->v1.out_kvec_bytes < skip);
BUG_ON(!con->v1.out_kvec_left);
con->v1.out_kvec_bytes -= skip;
con->v1.out_kvec_left--;
}
return skip;
}
static size_t sizeof_footer(struct ceph_connection *con)
{
return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
sizeof(struct ceph_msg_footer) :
sizeof(struct ceph_msg_footer_old);
}
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
u64 len = msg->sparse_read_total ? : data_len;
ceph_msg_data_cursor_init(&ms
|