/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* AF_SMC protocol family socket handler keeping the AF_INET sock address type
* applies to SOCK_STREAM sockets only
* offers an alternative communication option for TCP-protocol sockets
* applicable with RoCE-cards only
*
* Initial restrictions:
* - support for alternate links postponed
* - partial support for non-blocking sockets only
* - support for urgent data postponed
*
* Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
*/
#define KMSG_COMPONENT "smc"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/smc.h>
#include "smc.h"
#include "smc_clc.h"
#include "smc_llc.h"
#include "smc_cdc.h"
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_pnet.h"
#include "smc_tx.h"
#include "smc_rx.h"
#include "smc_close.h"
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
*/
struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
.list = LIST_HEAD_INIT(smc_lgr_list.list),
};
static void smc_tcp_listen_work(struct work_struct *);
static void smc_set_keepalive(struct sock *sk, int val)
{
struct smc_sock *smc = smc_sk(sk);
smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
}
static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
static struct smc_hashinfo smc_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
};
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
struct hlist_head *head;
head = &h->ht;
write_lock_bh(&h->lock);
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
return 0;
}
EXPORT_SYMBOL_GPL(smc_hash_sk);
void smc_unhash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
write_lock_bh(&h->lock);
if (sk_del_node_init(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(smc_unhash_sk);
struct proto smc_proto = {
.name = "SMC",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto);
struct proto smc_proto6 = {
.name = "SMC6",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto6);
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc = 0;
if (!sk)
goto out;
smc = smc_sk(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
*/
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
else
lock_sock(sk);
if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
}
if (smc->clcsock) {
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
if (smc->use_fallback) {
sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
release_sock(sk);
sk->sk_prot->unhash(sk);
sock_put(sk); /* final sock_put */
out:
return rc;
}
static void smc_destruct(struct sock *sk)
{
if (sk->sk_state != SMC_CLOSED)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
sk_refcnt_debug_dec(sk);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
int protocol)
{
struct smc_sock *smc;
struct proto *prot;
struct sock *sk;
prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
return sk;
}
static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc;
smc = smc_sk(sk);
/* replicate tests from inet_bind(), to be safe wrt. future changes */
rc = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
rc = -EAFNOSUPPORT;
if (addr->sin_family != AF_INET &&
addr->sin_family != AF_INET6 &&
addr->sin_family != AF_UNSPEC)
goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
if (addr->sin_family == AF_UNSPEC &&
addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
/* Check if socket is already active */
rc = -EINVAL;
if (sk->sk_state != SMC_INIT)
goto out_rel;
smc->clcsock->sk->sk_reuse = sk->sk_reuse;
rc = kernel_bind(smc->clcsock, uaddr, addr_len);
out_rel:
release_sock(sk);
out:
return rc;
}
static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
unsigned long mask)
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
nsk->sk_sndbuf = osk->sk_sndbuf;
nsk->sk_rcvbuf = osk->sk_rcvbuf;
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = osk->sk_mark;
nsk->sk_priority = osk->sk_priority;
nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
nsk->sk_err = osk->sk_err;
nsk->sk_flags &= ~mask;
nsk->sk_flags |= osk->sk_flags & mask;
}
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
(1UL << SOCK_LINGER) | \
(1UL << SOCK_BROADCAST) | \
(1UL << SOCK_TIMESTAMP) | \
(1UL << SOCK_DBG) | \
(1UL << SOCK_RCVTSTAMP) | \
(1UL << SOCK_RCVTSTAMPNS) | \
(1UL << SOCK_LOCALROUTE) | \
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
(1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_WIFI_STATUS) | \
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED))
/* copy only relevant settings and flags of SOL_SOCKET level from smc to
* clc socket (since smc is not called for these options from net/core)
*/
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
}
#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
(1UL << SOCK_LINGER) | \
(1UL << SOCK_DBG))
/* copy only settings and flags relevant for smc from clc to smc socket */
static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
{
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
int rest;
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
/* receive CONFIRM LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(
|