From de8f847a5114ff7cfcdfc114af8485c431dec703 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:16 +0300 Subject: RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 4 ++++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 5b74d6534899..106276a4cce7 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -274,6 +274,10 @@ enum mlx5_ib_create_cq_attrs { MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW, }; +enum mlx5_ib_reg_dmabuf_mr_attrs { + MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT), +}; + #define MLX5_IB_DW_MATCH_PARAM 0xA0 struct mlx5_ib_match_params { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 3189c7f08d17..7c233df475e7 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -54,6 +54,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type { MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, }; +enum mlx5_ib_uapi_reg_dmabuf_flags { + MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0, +}; + struct mlx5_ib_uapi_devx_async_cmd_hdr { __aligned_u64 wr_id; __u8 out_data[]; -- cgit v1.2.3 From ec7ad6530909983c8736c80af46e3529ce7bab55 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2024 15:05:17 +0300 Subject: RDMA/mlx5: Introduce GET_DATA_DIRECT_SYSFS_PATH ioctl Introduce the 'GET_DATA_DIRECT_SYSFS_PATH' ioctl to return the sysfs path of the affiliated 'data direct' device for a given device. Signed-off-by: Yishai Hadas Link: https://patch.msgid.link/403745463e0ef52adbef681ff09aa6a29a756352.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 106276a4cce7..fd2e4a3a56b3 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -348,6 +348,7 @@ enum mlx5_ib_pd_methods { enum mlx5_ib_device_methods { MLX5_IB_METHOD_QUERY_PORT = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH, }; enum mlx5_ib_query_port_attrs { @@ -355,4 +356,8 @@ enum mlx5_ib_query_port_attrs { MLX5_IB_ATTR_QUERY_PORT, }; +enum mlx5_ib_get_data_direct_sysfs_path_attrs { + MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif -- cgit v1.2.3 From d8ea645d6984c84a87032063a0941f15a323831f Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 18 Aug 2024 21:47:26 -0700 Subject: RDMA/bnxt_re: Handle variable WQE support for user applications User library calculates the number of slots required for user applications and it can pass that information to the driver. Driver can use this value and update the HW directly. This mechanism is currently used only for the newly introduced variable size WQEs. Extend the bnxt_re_qp_req structure to pass the Send Queue slot count. Reorganize the code to get the sq_slots before initializing the Send Queue attributes. Link: https://patch.msgid.link/r/1724042847-1481-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/bnxt_re-abi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index e61104f35d73..71140618700a 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -118,10 +118,16 @@ struct bnxt_re_resize_cq_req { __aligned_u64 cq_va; }; +enum bnxt_re_qp_mask { + BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1, +}; + struct bnxt_re_qp_req { __aligned_u64 qpsva; __aligned_u64 qprva; __aligned_u64 qp_handle; + __aligned_u64 comp_mask; + __u32 sq_slots; }; struct bnxt_re_qp_resp { -- cgit v1.2.3 From 10a104c0debbb19a1e45193d5670510216e339ff Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 18 Aug 2024 21:47:27 -0700 Subject: RDMA/bnxt_re: Enable variable size WQEs for user space applications Add backward compatibility code to enable variable size WQEs only if the user lib supports it. Link: https://patch.msgid.link/r/1724042847-1481-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/bnxt_re-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 71140618700a..6821002931c8 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -66,6 +66,7 @@ enum bnxt_re_wqe_mode { enum { BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01, + BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT = 0x02, }; struct bnxt_re_uctx_req { -- cgit v1.2.3 From 181028a0d84cdcc7ac86d05cc49eaa416ce85c8b Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Thu, 29 Aug 2024 08:34:05 -0700 Subject: RDMA/bnxt_re: Share a page to expose per SRQ info with userspace Gen P7 adapters needs to share a toggle bits information received in kernel driver with the user space. User space needs this info to arm the SRQ. User space application can get this page using the UAPI routines. Library will mmap this page and get the toggle bits to be used in the next ARM Doorbell. Uses a hash list to map the SRQ structure from the SRQ ID. SRQ structure is retrieved from the hash list while the library calls the UAPI routine to get the toggle page mapping. Currently the full page is mapped per SRQ. This can be optimized to enable multiple SRQs from the same application share the same page and different offsets in the page Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1724945645-14989-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/bnxt_re-abi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 6821002931c8..faa9d62b3b30 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -141,8 +141,14 @@ struct bnxt_re_srq_req { __aligned_u64 srq_handle; }; +enum bnxt_re_srq_mask { + BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT = 0x1, +}; + struct bnxt_re_srq_resp { __u32 srqid; + __u32 rsvd; /* padding */ + __aligned_u64 comp_mask; }; enum bnxt_re_shpg_offt { -- cgit v1.2.3 From 9cbed5aab5aeea420d0aa945733bf608449d44fb Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Mon, 9 Sep 2024 20:30:24 +0300 Subject: RDMA/nldev: Add support for RDMA monitoring Introduce a new netlink command to allow rdma event monitoring. The rdma events supported now are IB device registration/unregistration and net device attachment/detachment. Example output of rdma monitor and the commands which trigger the events: $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_0 [NETDEV_ATTACH] dev 6 mlx5_0 port 1 netdev 12 eth9 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs [UNREGISTER] dev 5 rocep8s0f0v0 [UNREGISTER] dev 6 rocep8s0f0v1 [UNREGISTER] dev 7 rocep8s0f0v2 [UNREGISTER] dev 8 rocep8s0f0v3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 2 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 3 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 4 [NETDEV_DETACH] dev 4 rdmap8s0f0 port 5 Signed-off-by: Chiara Meiohas Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-7-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 2f37568f5556..5f9636d26050 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -15,6 +15,7 @@ enum { enum { RDMA_NL_GROUP_IWPM = 2, RDMA_NL_GROUP_LS, + RDMA_NL_GROUP_NOTIFY, RDMA_NL_NUM_GROUPS }; @@ -305,6 +306,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_DELDEV, + RDMA_NLDEV_CMD_MONITOR, + RDMA_NLDEV_NUM_OPS }; @@ -574,6 +577,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ + /* * Always the end */ @@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type { RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */ }; +/* + * Supported rdma monitoring event types. + */ +enum rdma_nl_notify_event_type { + RDMA_REGISTER_EVENT, + RDMA_UNREGISTER_EVENT, + RDMA_NETDEV_ATTACH_EVENT, + RDMA_NETDEV_DETACH_EVENT, +}; + #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 12fb1153c53bf9b53e299c9775b84fa7838640f7 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Mon, 9 Sep 2024 20:30:25 +0300 Subject: RDMA/nldev: Expose whether RDMA monitoring is supported Extend the "rdma sys" command to display whether RDMA monitoring is supported. RDMA monitoring is not supported in mlx4 because it does not use the ib_device_set_netdev() API, which sends the RDMA events. Example output for kernel where monitoring is supported: $ rdma sys show netns shared privileged-qkey off monitor on copy-on-fork on Example output for kernel where monitoring is not supported: $ rdma sys show netns shared privileged-qkey off monitor off copy-on-fork on Signed-off-by: Chiara Meiohas Signed-off-by: Michael Guralnik Link: https://patch.msgid.link/20240909173025.30422-8-michaelgur@nvidia.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5f9636d26050..39be09c0ffbb 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -579,6 +579,7 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ + RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */ /* * Always the end */ -- cgit v1.2.3