// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018 Oracle. All rights reserved.
*
* Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
*/
#include <rdma/rw.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
/* Each R/W context contains state for one chain of RDMA Read or
* Write Work Requests.
*
* Each WR chain handles a single contiguous server-side buffer,
* because scatterlist entries after the first have to start on
* page alignment. xdr_buf iovecs cannot guarantee alignment.
*
* Each WR chain handles only one R_key. Each RPC-over-RDMA segment
* from a client may contain a unique R_key, so each WR chain moves
* up to one segment at a time.
*
* The scatterlist makes this data structure over 4KB in size. To
* make it less likely to fail, and to handle the allocation for
* smaller I/O requests without disabling bottom-halves, these
* contexts are created on demand, but cached and reused until the
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
struct llist_node rw_node;
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
unsigned int rw_first_sgl_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
static inline struct svc_rdma_rw_ctxt *
svc_rdma_next_ctxt(struct list_head *list)
{
return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt,
rw_list);
}
static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct ib_device *dev = rdma->sc_cm_id->device;
unsigned int first_sgl_nents = dev->attrs.max_send_sge;
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
spin_lock(&rdma->sc_rw_ctxt_lock);
node = llist_del_first(&rdma->sc_rw_ctxts);
spin_unlock(&rdma->sc_rw_ctxt_lock);
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents),
GFP_KERNEL, ibdev_to_node(dev));
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
ctxt->rw_first_sgl_nents = first_sgl_nents;
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
first_sgl_nents))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
trace_svcrdma_rwctx_empty(rdma, sges);
return NULL;
}
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents);
llist_add(&ctxt->rw_node, list);
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
__svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
}
/**
* svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts
* @rdma: transport about to be destroyed
*
*/
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
kfree(ctxt);
}
}
/**
* svc_rdma_rw_ctx_init - Prepare a R/W context for I/O
* @rdma: controlling transport instance
* @ctxt: R/W context to prepare
* @offset: RDMA offset
* @handle: RDMA tag/handle
* @direction: I/O direction
*
* Returns on success, the number of WQEs that will be needed
* on the workqueue, or a negative errno.
*/
static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt,
u64 offset, u32 handle,
enum dma_data_direction direction)
{
int ret;
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, handle, direction);
if (unlikely(ret < 0)) {
trace_svcrdma_dma_map_rw_err(rdma, offset, handle,
ctxt->rw_nents, ret);
svc_rdma_put_rw_ctxt(rdma, ctxt);
}
return ret;
}
/**
* svc_rdma_cc_init - Initialize an svc_rdma_chunk_ctxt
* @rdma: controlling transport instance
* @cc: svc_rdma_chunk_ctxt to be initialized
*/
void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc)
{
struct rpc_rdma_cid *cid = &cc->cc_cid;
if (unlikely(!