summaryrefslogtreecommitdiff
path: root/drivers/net
diff options
context:
space:
mode:
authorRam Amrani <Ram.Amrani@caviumnetworks.com>2016-10-01 21:59:57 +0300
committerDavid S. Miller <davem@davemloft.net>2016-10-03 23:22:46 -0400
commit51ff17251c9c2c2e71974149d22bc73ea09c27cc (patch)
tree8f6a9a8e6644567eab832e7d7ca8c7b0b3b5dd8a /drivers/net
parentcee9fbd8e2e9e713cd8bf227c6492fd8854de74b (diff)
downloadlinux-51ff17251c9c2c2e71974149d22bc73ea09c27cc.tar.gz
linux-51ff17251c9c2c2e71974149d22bc73ea09c27cc.tar.bz2
linux-51ff17251c9c2c2e71974149d22bc73ea09c27cc.zip
qed: Add support for RoCE hw init
This adds the backbone required for the various HW initalizations which are necessary for the qedr driver - FW notification, resource initializations, etc. Signed-off-by: Ram Amrani <Ram.Amrani@caviumnetworks.com> Signed-off-by: Yuval Mintz <Yuval.Mintz@caviumnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/qlogic/qed/Makefile1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h33
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.h6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c154
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c44
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_reg_addr.h8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.c886
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.h123
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp.h1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_spq.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.c2
13 files changed, 1269 insertions, 7 deletions
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index e067098f10a9..cda0af7fbc20 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -5,3 +5,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
qed_selftest.o qed_dcbx.o qed_debug.o
qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
qed-$(CONFIG_QED_LL2) += qed_ll2.o
+qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 91b571a3670b..c5a098a2ca2c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -35,6 +35,9 @@ extern const struct qed_common_ops qed_common_ops_pass;
#define QED_WFQ_UNIT 100
+#define QED_WID_SIZE (1024)
+#define QED_PF_DEMS_SIZE (4)
+
/* cau states */
enum qed_coalescing_mode {
QED_COAL_MODE_DISABLE,
@@ -50,6 +53,14 @@ enum qed_mcp_protocol_type;
static inline u32 qed_db_addr(u32 cid, u32 DEMS)
{
u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
+ (cid * QED_PF_DEMS_SIZE);
+
+ return db_addr;
+}
+
+static inline u32 qed_db_addr_vf(u32 cid, u32 DEMS)
+{
+ u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid);
return db_addr;
@@ -152,14 +163,17 @@ enum QED_RESOURCES {
QED_RL,
QED_MAC,
QED_VLAN,
+ QED_RDMA_CNQ_RAM,
QED_ILT,
QED_LL2_QUEUE,
+ QED_RDMA_STATS_QUEUE,
QED_MAX_RESC,
};
enum QED_FEATURE {
QED_PF_L2_QUE,
QED_VF,
+ QED_RDMA_CNQ,
QED_MAX_FEATURES,
};
@@ -364,6 +378,7 @@ struct qed_hwfn {
/* Protocol related */
bool using_ll2;
struct qed_ll2_info *p_ll2_info;
+ struct qed_rdma_info *p_rdma_info;
struct qed_pf_params pf_params;
bool b_rdma_enabled_in_prs;
@@ -402,6 +417,17 @@ struct qed_hwfn {
struct dbg_tools_data dbg_info;
+ /* PWM region specific data */
+ u32 dpi_size;
+ u32 dpi_count;
+
+ /* This is used to calculate the doorbell address */
+ u32 dpi_start_offset;
+
+ /* If one of the following is set then EDPM shouldn't be used */
+ u8 dcbx_no_edpm;
+ u8 db_bar_no_edpm;
+
struct qed_simd_fp_handler simd_proto_handler[64];
#ifdef CONFIG_QED_SRIOV
@@ -435,6 +461,8 @@ struct qed_int_params {
bool fp_initialized;
u8 fp_msix_base;
u8 fp_msix_cnt;
+ u8 rdma_msix_base;
+ u8 rdma_msix_cnt;
};
struct qed_dbg_feature {
@@ -541,7 +569,6 @@ struct qed_dev {
bool b_is_vf;
u32 drv_type;
-
struct qed_eth_stats *reset_stats;
struct qed_fw_data *fw_data;
@@ -574,6 +601,10 @@ struct qed_dev {
#endif
const struct firmware *firmware;
+
+ u32 rdma_max_sge;
+ u32 rdma_max_inline;
+ u32 rdma_max_srq_sge;
};
#define NUM_OF_VFS(dev) MAX_NUM_VFS_BB
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index d9bea2a9c9f7..82370a1a59ad 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -48,7 +48,13 @@
#define TM_ELEM_SIZE 4
/* ILT constants */
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */
+#define ILT_DEFAULT_HW_P_SIZE 4
+#else
#define ILT_DEFAULT_HW_P_SIZE 3
+#endif
+
#define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12))
#define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index c6f6f2e8192d..d00ad055802b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -170,6 +170,12 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
*/
void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
u32 cid);
+int qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
+ enum qed_cxt_elem_type elem_type, u32 iid);
+u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn,
+ enum protocol_type type);
+u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn,
+ enum protocol_type type);
#define QED_CTX_WORKING_MEM 0
#define QED_CTX_FL_MEM 1
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 9a8e153df841..754f6a908858 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -35,9 +35,13 @@
#include "qed_sp.h"
#include "qed_sriov.h"
#include "qed_vf.h"
+#include "qed_roce.h"
static DEFINE_SPINLOCK(qm_lock);
+#define QED_MIN_DPIS (4)
+#define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS)
+
/* API common to all protocols */
enum BAR_ID {
BAR_ID_0, /* used for GRC */
@@ -787,6 +791,136 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
return rc;
}
+static int
+qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus)
+{
+ u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size;
+ u32 dpi_bit_shift, dpi_count;
+ u32 min_dpis;
+
+ /* Calculate DPI size */
+ dpi_page_size_1 = QED_WID_SIZE * n_cpus;
+ dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE);
+ dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2);
+ dpi_page_size = roundup_pow_of_two(dpi_page_size);
+ dpi_bit_shift = ilog2(dpi_page_size / 4096);
+
+ dpi_count = pwm_region_size / dpi_page_size;
+
+ min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis;
+ min_dpis = max_t(u32, QED_MIN_DPIS, min_dpis);
+
+ p_hwfn->dpi_size = dpi_page_size;
+ p_hwfn->dpi_count = dpi_count;
+
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPI_BIT_SHIFT, dpi_bit_shift);
+
+ if (dpi_count < min_dpis)
+ return -EINVAL;
+
+ return 0;
+}
+
+enum QED_ROCE_EDPM_MODE {
+ QED_ROCE_EDPM_MODE_ENABLE = 0,
+ QED_ROCE_EDPM_MODE_FORCE_ON = 1,
+ QED_ROCE_EDPM_MODE_DISABLE = 2,
+};
+
+static int
+qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ u32 pwm_regsize, norm_regsize;
+ u32 non_pwm_conn, min_addr_reg1;
+ u32 db_bar_size, n_cpus;
+ u32 roce_edpm_mode;
+ u32 pf_dems_shift;
+ int rc = 0;
+ u8 cond;
+
+ db_bar_size = qed_hw_bar_size(p_hwfn, BAR_ID_1);
+ if (p_hwfn->cdev->num_hwfns > 1)
+ db_bar_size /= 2;
+
+ /* Calculate doorbell regions */
+ non_pwm_conn = qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_CORE) +
+ qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE,
+ NULL) +
+ qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+ NULL);
+ norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096);
+ min_addr_reg1 = norm_regsize / 4096;
+ pwm_regsize = db_bar_size - norm_regsize;
+
+ /* Check that the normal and PWM sizes are valid */
+ if (db_bar_size < norm_regsize) {
+ DP_ERR(p_hwfn->cdev,
+ "Doorbell BAR size 0x%x is too small (normal region is 0x%0x )\n",
+ db_bar_size, norm_regsize);
+ return -EINVAL;
+ }
+
+ if (pwm_regsize < QED_MIN_PWM_REGION) {
+ DP_ERR(p_hwfn->cdev,
+ "PWM region size 0x%0x is too small. Should be at least 0x%0x (Doorbell BAR size is 0x%x and normal region size is 0x%0x)\n",
+ pwm_regsize,
+ QED_MIN_PWM_REGION, db_bar_size, norm_regsize);
+ return -EINVAL;
+ }
+
+ /* Calculate number of DPIs */
+ roce_edpm_mode = p_hwfn->pf_params.rdma_pf_params.roce_edpm_mode;
+ if ((roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE) ||
+ ((roce_edpm_mode == QED_ROCE_EDPM_MODE_FORCE_ON))) {
+ /* Either EDPM is mandatory, or we are attempting to allocate a
+ * WID per CPU.
+ */
+ n_cpus = num_active_cpus();
+ rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus);
+ }
+
+ cond = (rc && (roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE)) ||
+ (roce_edpm_mode == QED_ROCE_EDPM_MODE_DISABLE);
+ if (cond || p_hwfn->dcbx_no_edpm) {
+ /* Either EDPM is disabled from user configuration, or it is
+ * disabled via DCBx, or it is not mandatory and we failed to
+ * allocated a WID per CPU.
+ */
+ n_cpus = 1;
+ rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus);
+
+ if (cond)
+ qed_rdma_dpm_bar(p_hwfn, p_ptt);
+ }
+
+ DP_INFO(p_hwfn,
+ "doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
+ norm_regsize,
+ pwm_regsize,
+ p_hwfn->dpi_size,
+ p_hwfn->dpi_count,
+ ((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
+ "disabled" : "enabled");
+
+ if (rc) {
+ DP_ERR(p_hwfn,
+ "Failed to allocate enough DPIs. Allocated %d but the current minimum is %d.\n",
+ p_hwfn->dpi_count,
+ p_hwfn->pf_params.rdma_pf_params.min_dpis);
+ return -EINVAL;
+ }
+
+ p_hwfn->dpi_start_offset = norm_regsize;
+
+ /* DEMS size is configured log2 of DWORDs, hence the division by 4 */
+ pf_dems_shift = ilog2(QED_PF_DEMS_SIZE / 4);
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_ICID_BIT_SHIFT_NORM, pf_dems_shift);
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_MIN_ADDR_REG1, min_addr_reg1);
+
+ return 0;
+}
+
static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, int hw_mode)
{
@@ -860,6 +994,10 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
/* Pure runtime initializations - directly to the HW */
qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
+ rc = qed_hw_init_pf_doorbell_bar(p_hwfn, p_ptt);
+ if (rc)
+ return rc;
+
if (b_hw_start) {
/* enable interrupts */
qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -1284,6 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
u32 *feat_num = p_hwfn->hw_info.feat_num;
int num_features = 1;
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+ /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the
+ * status blocks equally between L2 / RoCE but with consideration as
+ * to how many l2 queues / cnqs we have
+ */
+ if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+ num_features++;
+
+ feat_num[QED_RDMA_CNQ] =
+ min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features,
+ RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM));
+ }
+#endif
feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
num_features,
RESC_NUM(p_hwfn, QED_L2_QUEUE));
@@ -1325,6 +1476,9 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
num_funcs;
resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+ resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
+ resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB /
+ num_funcs;
for (i = 0; i < QED_MAX_RESC; i++)
resc_start[i] = resc_num[i] * enabled_func_idx;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 48cdf62c025b..4ee3151e80c2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -33,6 +33,11 @@
#include "qed_hw.h"
#include "qed_selftest.h"
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+#define QED_ROCE_QPS (8192)
+#define QED_ROCE_DPIS (8)
+#endif
+
static char version[] =
"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -206,8 +211,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
dev_info->pci_mem_start = cdev->pci_params.mem_start;
dev_info->pci_mem_end = cdev->pci_params.mem_end;
dev_info->pci_irq = cdev->pci_params.irq;
- dev_info->rdma_supported =
- (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE);
+ dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
+ QED_PCI_ETH_ROCE);
dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
@@ -677,6 +682,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
enum qed_int_mode int_mode)
{
struct qed_sb_cnt_info sb_cnt_info;
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+ int num_l2_queues;
+#endif
int rc;
int i;
@@ -707,6 +715,31 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors -
cdev->num_hwfns;
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+ num_l2_queues = 0;
+ for_each_hwfn(cdev, i)
+ num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE);
+
+ DP_VERBOSE(cdev, QED_MSG_RDMA,
+ "cdev->int_params.fp_msix_cnt=%d num_l2_queues=%d\n",
+ cdev->int_params.fp_msix_cnt, num_l2_queues);
+
+ if (cdev->int_params.fp_msix_cnt > num_l2_queues) {
+ cdev->int_params.rdma_msix_cnt =
+ (cdev->int_params.fp_msix_cnt - num_l2_queues)
+ / cdev->num_hwfns;
+ cdev->int_params.rdma_msix_base =
+ cdev->int_params.fp_msix_base + num_l2_queues;
+ cdev->int_params.fp_msix_cnt = num_l2_queues;
+ } else {
+ cdev->int_params.rdma_msix_cnt = 0;
+ }
+
+ DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n",
+ cdev->int_params.rdma_msix_cnt,
+ cdev->int_params.rdma_msix_base);
+#endif
+
return 0;
}
@@ -810,6 +843,13 @@ static void qed_update_pf_params(struct qed_dev *cdev,
{
int i;
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+ params->rdma_pf_params.num_qps = QED_ROCE_QPS;
+ params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
+ /* divide by 3 the MRs to avoid MF ILT overflow */
+ params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
+ params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
+#endif
for (i = 0; i < cdev->num_hwfns; i++) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index e75738d21783..b414a0542177 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -1448,5 +1448,11 @@
0x620000UL
#define PHY_PCIE_REG_PHY1 \
0x624000UL
-
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
+#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
+#define DORQ_REG_PF_DPM_ENABLE 0x100510UL
+#define DORQ_REG_PF_ICID_BIT_SHIFT_NORM 0x100448UL
+#define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL
+#define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
new file mode 100644
index 000000000000..4c53b857cc1c
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -0,0 +1,886 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2016 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/io.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tcp.h>
+#include <linux/bitops.h>
+#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_roce_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_int.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+#include "qed_roce.h"
+
+void qed_async_roce_event(struct qed_hwfn *p_hwfn,
+ struct event_ring_entry *p_eqe)
+{
+ struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+
+ p_rdma_info->events.affiliated_event(p_rdma_info->events.context,
+ p_eqe->opcode, &p_eqe->data);
+}
+
+static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn,
+ struct qed_bmap *bmap, u32 max_count)
+{
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count);
+
+ bmap->max_count = max_count;
+
+ bmap->bitmap = kzalloc(BITS_TO_LONGS(max_count) * sizeof(long),
+ GFP_KERNEL);
+ if (!bmap->bitmap) {
+ DP_NOTICE(p_hwfn,
+ "qed bmap alloc failed: cannot allocate memory (bitmap)\n");
+ return -ENOMEM;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n",
+ bmap->bitmap);
+ return 0;
+}
+
+static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn,
+ struct qed_bmap *bmap, u32 *id_num)
+{
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap);
+
+ *id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count);
+
+ if (*id_num >= bmap->max_count) {
+ DP_NOTICE(p_hwfn, "no id available max_count=%d\n",
+ bmap->max_count);
+ return -EINVAL;
+ }
+
+ __set_bit(*id_num, bmap->bitmap);
+
+ return 0;
+}
+
+static void qed_bmap_release_id(struct qed_hwfn *p_hwfn,
+ struct qed_bmap *bmap, u32 id_num)
+{
+ bool b_acquired;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num);
+ if (id_num >= bmap->max_count)
+ return;
+
+ b_acquired = test_and_clear_bit(id_num, bmap->bitmap);
+ if (!b_acquired) {
+ DP_NOTICE(p_hwfn, "ID %d already released\n", id_num);
+ return;
+ }
+}
+
+u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
+{
+ /* First sb id for RoCE is after all the l2 sb */
+ return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id;
+}
+
+u32 qed_rdma_query_cau_timer_res(void *rdma_cxt)
+{
+ return QED_CAU_DEF_RX_TIMER_RES;
+}
+
+static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_rdma_start_in_params *params)
+{
+ struct qed_rdma_info *p_rdma_info;
+ u32 num_cons, num_tasks;
+ int rc = -ENOMEM;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n");
+
+ /* Allocate a struct with current pf rdma info */
+ p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL);
+ if (!p_rdma_info) {
+ DP_NOTICE(p_hwfn,
+ "qed rdma alloc failed: cannot allocate memory (rdma info). rc = %d\n",
+ rc);
+ return rc;
+ }
+
+ p_hwfn->p_rdma_info = p_rdma_info;
+ p_rdma_info->proto = PROTOCOLID_ROCE;
+
+ num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0);
+
+ p_rdma_info->num_qps = num_cons / 2;
+
+ num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
+
+ /* Each MR uses a single task */
+ p_rdma_info->num_mrs = num_tasks;
+
+ /* Queue zone lines are shared between RoCE and L2 in such a way that
+ * they can be used by each without obstructing the other.
+ */
+ p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE);
+
+ /* Allocate a struct with device params and fill it */
+ p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL);
+ if (!p_rdma_info->dev) {
+ DP_NOTICE(p_hwfn,
+ "qed rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n",
+ rc);
+ goto free_rdma_info;
+ }
+
+ /* Allocate a struct with port params and fill it */
+ p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL);
+ if (!p_rdma_info->port) {
+ DP_NOTICE(p_hwfn,
+ "qed rdma alloc failed: cannot allocate memory (rdma info port). rc = %d\n",
+ rc);
+ goto free_rdma_dev;
+ }
+
+ /* Allocate bit map for pd's */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate pd_map, rc = %d\n",
+ rc);
+ goto free_rdma_port;
+ }
+
+ /* Allocate DPI bitmap */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map,
+ p_hwfn->dpi_count);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate DPI bitmap, rc = %d\n", rc);
+ goto free_pd_map;
+ }
+
+ /* Allocate bitmap for cq's. The maximum number of CQs is bounded to
+ * twice the number of QPs.
+ */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map,
+ p_rdma_info->num_qps * 2);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate cq bitmap, rc = %d\n", rc);
+ goto free_dpi_map;
+ }
+
+ /* Allocate bitmap for toggle bit for cq icids
+ * We toggle the bit every time we create or resize cq for a given icid.
+ * The maximum number of CQs is bounded to twice the number of QPs.
+ */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits,
+ p_rdma_info->num_qps * 2);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate toogle bits, rc = %d\n", rc);
+ goto free_cq_map;
+ }
+
+ /* Allocate bitmap for itids */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map,
+ p_rdma_info->num_mrs);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate itids bitmaps, rc = %d\n", rc);
+ goto free_toggle_map;
+ }
+
+ /* Allocate bitmap for cids used for qps. */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate cid bitmap, rc = %d\n", rc);
+ goto free_tid_map;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
+ return 0;
+
+free_tid_map:
+ kfree(p_rdma_info->tid_map.bitmap);
+free_toggle_map:
+ kfree(p_rdma_info->toggle_bits.bitmap);
+free_cq_map:
+ kfree(p_rdma_info->cq_map.bitmap);
+free_dpi_map:
+ kfree(p_rdma_info->dpi_map.bitmap);
+free_pd_map:
+ kfree(p_rdma_info->pd_map.bitmap);
+free_rdma_port:
+ kfree(p_rdma_info->port);
+free_rdma_dev:
+ kfree(p_rdma_info->dev);
+free_rdma_info:
+ kfree(p_rdma_info);
+
+ return rc;
+}
+
+void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
+{
+ struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+
+ kfree(p_rdma_info->cid_map.bitmap);
+ kfree(p_rdma_info->tid_map.bitmap);
+ kfree(p_rdma_info->toggle_bits.bitmap);
+ kfree(p_rdma_info->cq_map.bitmap);
+ kfree(p_rdma_info->dpi_map.bitmap);
+ kfree(p_rdma_info->pd_map.bitmap);
+
+ kfree(p_rdma_info->port);
+ kfree(p_rdma_info->dev);
+
+ kfree(p_rdma_info);
+}
+
+static void qed_rdma_free(struct qed_hwfn *p_hwfn)
+{
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
+
+ qed_rdma_resc_free(p_hwfn);
+}
+
+static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid)
+{
+ guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2;
+ guid[1] = p_hwfn->hw_info.hw_mac_addr[1];
+ guid[2] = p_hwfn->hw_info.hw_mac_addr[2];
+ guid[3] = 0xff;
+ guid[4] = 0xfe;
+ guid[5] = p_hwfn->hw_info.hw_mac_addr[3];
+ guid[6] = p_hwfn->hw_info.hw_mac_addr[4];
+ guid[7] = p_hwfn->hw_info.hw_mac_addr[5];
+}
+
+static void qed_rdma_init_events(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_start_in_params *params)
+{
+ struct qed_rdma_events *events;
+
+ events = &p_hwfn->p_rdma_info->events;
+
+ events->unaffiliated_event = params->events->unaffiliated_event;
+ events->affiliated_event = params->events->affiliated_event;
+ events->context = params->events->context;
+}
+
+static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_start_in_params *params)
+{
+ struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+ struct qed_dev *cdev = p_hwfn->cdev;
+ u32 pci_status_control;
+ u32 num_qps;
+
+ /* Vendor specific information */
+ dev->vendor_id = cdev->vendor_id;
+ dev->vendor_part_id = cdev->device_id;
+ dev->hw_ver = 0;
+ dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) |
+ (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION);
+
+ qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid);
+ dev->node_guid = dev->sys_image_guid;
+
+ dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE,
+ RDMA_MAX_SGE_PER_RQ_WQE);
+
+ if (cdev->rdma_max_sge)
+ dev->max_sge = min_t(u32, cdev->rdma_max_sge, dev->max_sge);
+
+ dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE;
+
+ dev->max_inline = (cdev->rdma_max_inline) ?
+ min_t(u32, cdev->rdma_max_inline, dev->max_inline) :
+ dev->max_inline;
+
+ dev->max_wqe = QED_RDMA_MAX_WQE;
+ dev->max_cnq = (u8)FEAT_NUM(p_hwfn, QED_RDMA_CNQ);
+
+ /* The number of QPs may be higher than QED_ROCE_MAX_QPS, because
+ * it is up-aligned to 16 and then to ILT page size within qed cxt.
+ * This is OK in terms of ILT but we don't want to configure the FW
+ * above its abilities
+ */
+ num_qps = ROCE_MAX_QPS;
+ num_qps = min_t(u64, num_qps, p_hwfn->p_rdma_info->num_qps);
+ dev->max_qp = num_qps;
+
+ /* CQs uses the same icids that QPs use hence they are limited by the
+ * number of icids. There are two icids per QP.
+ */
+ dev->max_cq = num_qps * 2;
+
+ /* The number of mrs is smaller by 1 since the first is reserved */
+ dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1;
+ dev->max_mr_size = QED_RDMA_MAX_MR_SIZE;
+
+ /* The maximum CQE capacity per CQ supported.
+ * max number of cqes will be in two layer pbl,
+ * 8 is the pointer size in bytes
+ * 32 is the size of cq element in bytes
+ */
+ if (params->cq_mode == QED_RDMA_CQ_MODE_32_BITS)
+ dev->max_cqe = QED_RDMA_MAX_CQE_32_BIT;
+ else
+ dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT;
+
+ dev->max_mw = 0;
+ dev->max_fmr = QED_RDMA_MAX_FMR;
+ dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8);
+ dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE;
+ dev->max_pkey = QED_RDMA_MAX_P_KEY;
+
+ dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE /
+ (RDMA_RESP_RD_ATOMIC_ELM_SIZE * 2);
+ dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE /
+ RDMA_REQ_RD_ATOMIC_ELM_SIZE;
+ dev->max_dev_resp_rd_atomic_resc = dev->max_qp_resp_rd_atomic_resc *
+ p_hwfn->p_rdma_info->num_qps;
+ dev->page_size_caps = QED_RDMA_PAGE_SIZE_CAPS;
+ dev->dev_ack_delay = QED_RDMA_ACK_DELAY;
+ dev->max_pd = RDMA_MAX_PDS;
+ dev->max_ah = p_hwfn->p_rdma_info->num_qps;
+ dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, QED_RDMA_STATS_QUEUE);
+
+ /* Set capablities */
+ dev->dev_caps = 0;
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RNR_NAK, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RESIZE_CQ, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ZBVA, 1);
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
+
+ /* Check atomic operations support in PCI configuration space. */
+ pci_read_config_dword(cdev->pdev,
+ cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
+ &pci_status_control);
+
+ if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
+ SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+}
+
+static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
+{
+ struct qed_rdma_port *port = p_hwfn->p_rdma_info->port;
+ struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+ port->port_state = p_hwfn->mcp_info->link_output.link_up ?
+ QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+
+ port->max_msg_size = min_t(u64,
+ (dev->max_mr_mw_fmr_size *
+ p_hwfn->cdev->rdma_max_sge),
+ BIT(31));
+
+ port->pkey_bad_counter = 0;
+}
+
+static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ u32 ll2_ethertype_en;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
+ p_hwfn->b_rdma_enabled_in_prs = false;
+
+ qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+ p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+ /* We delay writing to this reg until first cid is allocated. See
+ * qed_cxt_dynamic_ilt_alloc function for more details
+ */
+ ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+ qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+ (ll2_ethertype_en | 0x01));
+
+ if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+ DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+ return 0;
+}
+
+static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_start_in_params *params,
+ struct qed_ptt *p_ptt)
+{
+ struct rdma_init_func_ramrod_data *p_ramrod;
+ struct qed_rdma_cnq_params *p_cnq_pbl_list;
+ struct rdma_init_func_hdr *p_params_header;
+ struct rdma_cnq_params *p_cnq_params;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ u32 cnq_id, sb_id;
+ int rc;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Starting FW\n");
+
+ /* Save the number of cnqs for the function close ramrod */