/*
* Unix SMB/CIFS implementation.
* Wrap Infiniband calls.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "replace.h"
#include "system/network.h"
#include <assert.h>
#include <talloc.h>
#include <tevent.h>
#include "lib/util/dlinklist.h"
#include "lib/util/debug.h"
#include "common/logging.h"
#include <rdma/rdma_cma_abi.h>
#include <rdma/rdma_cma.h>
#include "ibwrapper.h"
#include "ibwrapper_internal.h"
#define IBW_LASTERR_BUFSIZE 512
static char ibw_lasterr[IBW_LASTERR_BUFSIZE];
#define IBW_MAX_SEND_WR 256
#define IBW_MAX_RECV_WR 1024
#define IBW_RECV_BUFSIZE 256
#define IBW_RECV_THRESHOLD (1 * 1024 * 1024)
static void ibw_event_handler_verbs(struct tevent_context *ev,
struct tevent_fd *fde, uint16_t flags, void *private_data);
static int ibw_fill_cq(struct ibw_conn *conn);
static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc);
static int ibw_wc_send(struct ibw_conn *conn, struct ibv_wc *wc);
static int ibw_send_packet(struct ibw_conn *conn, void *buf, struct ibw_wr *p, uint32_t len);
static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn,
uint32_t n, struct ibv_mr **ppmr)
{
void *buf;
DEBUG(DEBUG_DEBUG, ("ibw_alloc_mr(cmid=%p, n=%u)\n", pconn->cm_id, n));
buf = memalign(pctx->pagesize, n);
if (!buf) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"couldn't allocate memory\n");
return NULL;
}
*ppmr = ibv_reg_mr(pconn->pd, buf, n, IBV_ACCESS_LOCAL_WRITE);
if (!*ppmr) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"couldn't allocate mr\n");
free(buf);
return NULL;
}
return buf;
}
static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr)
{
DEBUG(DEBUG_DEBUG, ("ibw_free_mr(%p %p)\n", *ppbuf, *ppmr));
if (*ppmr!=NULL) {
ibv_dereg_mr(*ppmr);
*ppmr = NULL;
}
if (*ppbuf) {
free(*ppbuf);
*ppbuf = NULL;
}
}
static int ibw_init_memory(struct ibw_conn *conn)
{
struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
struct ibw_opts *opts = &pctx->opts;
int i;
struct ibw_wr *p;
DEBUG(DEBUG_DEBUG, ("ibw_init_memory(cmid: %p)\n", pconn->cm_id));
pconn->buf_send = ibw_alloc_mr(pctx, pconn,
opts->max_send_wr * opts->recv_bufsize, &pconn->mr_send);
if (!pconn->buf_send) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"couldn't allocate work send buf\n");
return -1;
}
pconn->buf_recv = ibw_alloc_mr(pctx, pconn,
opts->max_recv_wr * opts->recv_bufsize, &pconn->mr_recv);
if (!pconn->buf_recv) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"couldn't allocate work recv buf\n");
return -1;
}
pconn->wr_index = talloc_size(pconn, opts->max_send_wr * sizeof(struct ibw_wr *));
assert(pconn->wr_index!=NULL);
for(i=0; i<opts->max_send_wr; i++) {
p = pconn->wr_index[i] = talloc_zero(pconn, struct ibw_wr);
p->buf = pconn->buf_send + (i * opts->recv_bufsize);
p->wr_id = i;
DLIST_ADD(pconn->wr_list_avail, p);
}
return 0;
}
static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx)
{
DEBUG(DEBUG_DEBUG, ("ibw_ctx_priv_destruct(%p)\n", pctx));
/*
* tevent_fd must be removed before the fd is closed
*/
TALLOC_FREE(pctx->cm_channel_event);
/* destroy cm */
if (pctx->cm_channel) {
rdma_destroy_event_channel(pctx->cm_channel);
pctx->cm_channel = NULL;
}
if (pctx->cm_id) {
rdma_destroy_id(pctx->cm_id);
pctx->cm_id = NULL;
}
return 0;
}
static int ibw_ctx_destruct(struct ibw_ctx *ctx)
{
DEBUG(DEBUG_DEBUG, ("ibw_ctx_destruct(%p)\n", ctx));
return 0;
}
static int ibw_conn_priv_destruct(struct ibw_conn_priv *pconn)
{
DEBUG(DEBUG_DEBUG, ("ibw_conn_priv_destruct(%p, cmid: %p)\n",
pconn, pconn->cm_id));
/* pconn->wr_index is freed by talloc */
/* pconn->wr_index[i] are freed by talloc */
/*
* tevent_fd must be removed before the fd is closed
*/
TALLOC_FREE(pconn->verbs_channel_event);
/* destroy verbs */
if (pconn->cm_id!=NULL && pconn->cm_id->qp!=NULL) {
rdma_destroy_qp(pconn->cm_id);
pconn->cm_id->qp = NULL;
}
if (pconn->cq!=NULL) {
ibv_destroy_cq(pconn->cq);
pconn->cq = NULL;
}
if (pconn->verbs_channel!=NULL) {
ibv_destroy_comp_channel(pconn->verbs_channel);
pconn->verbs_channel = NULL;
}
/* free memory regions */
ibw_free_mr(&pconn->buf_send, &pconn->mr_send);
ibw_free_mr(&pconn->buf_recv, &pconn->mr_recv);
if (pconn->pd) {
ibv_dealloc_pd(pconn->pd);
pconn->pd = NULL;
DEBUG(DEBUG_DEBUG, ("pconn=%p pd deallocated\n", pconn));
}
if (pconn->cm_id) {
rdma_destroy_id(pconn->cm_id);
pconn->cm_id = NULL;
DEBUG(DEBUG_DEBUG, ("pconn=%p cm_id destroyed\n", pconn));
}
return 0;
}
static int ibw_wr_destruct(struct ibw_wr *wr)
{
if (wr->buf_large!=NULL)
ibw_free_mr(&wr->buf_large, &wr->mr_large);
return 0;
}
static int ibw_conn_destruct(struct ibw_conn *conn)
{
DEBUG(DEBUG_DEBUG, ("ibw_conn_destruct(%p)\n", conn));
/* important here: ctx is a talloc _parent_ */
DLIST_REMOVE(conn->ctx->conn_list, conn);
return 0;
}
struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx, TALLOC_CTX *mem_ctx)
{
struct ibw_conn *conn;
struct ibw_conn_priv *pconn;
assert(ctx!=NULL);
conn = talloc_zero(mem_ctx, struct ibw_conn);
assert(conn!=NULL);
talloc_set_destructor(conn, ibw_conn_destruct);
pconn = talloc_zero(conn, struct ibw_conn_priv);
assert(pconn!=NULL);
talloc_set_destructor(pconn, ibw_conn_priv_destruct);
conn->ctx = ctx;
conn->internal = (void *)pconn;
DLIST_ADD(ctx->conn_list, conn);
return conn;
}
static int ibw_setup_cq_qp(struct ibw_conn *conn)
{
struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
struct ibv_qp_init_attr init_attr;
struct ibv_qp_attr attr;
int rc;
DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id));
/* init verbs */
pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs);
if (!pconn->verbs_channel) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"ibv_create_comp_channel failed %d\n",
errno);
return -1;
}
DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel));
pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */
pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn);
pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs);
if (!pconn->pd) {
snprintf(ibw_lasterr,
sizeof(ibw_lasterr),
"ibv_alloc_pd failed %d\n",
errno);
return -1;
}
DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd));
/* init mr */
if (ibw_init_memory(conn))
return -1;
/* init cq */
|