summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-29 11:12:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-29 11:12:28 -0700
commit092e335082f22880207384ad736729c67d784665 (patch)
tree6f737fedfa5ad5d53f622cc7fc531683cba1a0f8
parent0ccff074d6aa45835ccb7c0e4a995a32e4c90b5a (diff)
parent37826f0a8c2f6b6add5179003b8597e32a445362 (diff)
downloadlinux-092e335082f22880207384ad736729c67d784665.tar.gz
linux-092e335082f22880207384ad736729c67d784665.tar.bz2
linux-092e335082f22880207384ad736729c67d784665.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: - Usual minor updates and fixes for bnxt_re, hfi1, rxe, mana, iser, mlx5, vmw_pvrdma, hns - Make rxe work on tun devices - mana gains more standard verbs as it moves toward supporting in-kernel verbs - DMABUF support for mana - Fix page size calculations when memory registration exceeds 4G - On Demand Paging support for rxe - mlx5 support for RDMA TRANSPORT flow tables and a new ucap mechanism to access control use of them - Optional RDMA_TX/RX counters per QP in mlx5 * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (73 commits) IB/mad: Check available slots before posting receive WRs RDMA/mana_ib: Fix integer overflow during queue creation RDMA/mlx5: Fix calculation of total invalidated pages RDMA/mlx5: Fix mlx5_poll_one() cur_qp update flow RDMA/mlx5: Fix page_size variable overflow RDMA/mlx5: Drop access_flags from _mlx5_mr_cache_alloc() RDMA/mlx5: Fix cache entry update on dereg error RDMA/mlx5: Fix MR cache initialization error flow RDMA/mlx5: Support optional-counters binding for QPs RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config RDMA/core: Pass port to counter bind/unbind operations RDMA/core: Add support to optional-counters binding configuration RDMA/core: Create and destroy rdma_counter using rdma_zalloc_drv_obj() RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes RDMA/core: Fix use-after-free when rename device name RDMA/bnxt_re: Support perf management counters RDMA/rxe: Fix incorrect return value of rxe_odp_atomic_op() RDMA/uverbs: Propagate errors from rdma_lookup_get_uobject() RDMA/mana_ib: Handle net event for pointing to the current netdev net: mana: Change the function signature of mana_get_primary_netdev_rcu ...
-rw-r--r--Documentation/infiniband/index.rst1
-rw-r--r--Documentation/infiniband/ucaps.rst71
-rw-r--r--drivers/infiniband/core/Makefile3
-rw-r--r--drivers/infiniband/core/cache.c6
-rw-r--r--drivers/infiniband/core/cma.c24
-rw-r--r--drivers/infiniband/core/counters.c52
-rw-r--r--drivers/infiniband/core/device.c20
-rw-r--r--drivers/infiniband/core/iwcm.c4
-rw-r--r--drivers/infiniband/core/mad.c38
-rw-r--r--drivers/infiniband/core/nldev.c18
-rw-r--r--drivers/infiniband/core/sysfs.c15
-rw-r--r--drivers/infiniband/core/ucaps.c267
-rw-r--r--drivers/infiniband/core/ucma.c4
-rw-r--r--drivers/infiniband/core/umem.c36
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c163
-rw-r--r--drivers/infiniband/core/uverbs_main.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c4
-rw-r--r--drivers/infiniband/core/verbs.c13
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c215
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h15
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c92
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c36
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c1
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c18
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig1
-rw-r--r--drivers/infiniband/hw/irdma/main.h1
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h6
-rw-r--r--drivers/infiniband/hw/irdma/puda.c19
-rw-r--r--drivers/infiniband/hw/irdma/puda.h5
-rw-r--r--drivers/infiniband/hw/irdma/utils.c47
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c228
-rw-r--r--drivers/infiniband/hw/mana/device.c82
-rw-r--r--drivers/infiniband/hw/mana/main.c103
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h210
-rw-r--r--drivers/infiniband/hw/mana/mr.c105
-rw-r--r--drivers/infiniband/hw/mana/qp.c245
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c195
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h15
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c41
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h5
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c637
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h17
-rw-r--r--drivers/infiniband/hw/mlx5/main.c77
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h23
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c52
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c28
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig3
-rw-r--r--drivers/infiniband/sw/rxe/Makefile2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c36
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c326
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c24
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h42
-rw-r--r--drivers/infiniband/sw/siw/Kconfig4
-rw-r--r--drivers/infiniband/sw/siw/siw.h37
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c22
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c54
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c23
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c44
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c3
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c8
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c7
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c22
-rw-r--r--include/linux/mlx5/device.h4
-rw-r--r--include/net/mana/gdma.h7
-rw-r--r--include/net/mana/mana.h4
-rw-r--r--include/rdma/ib_ucaps.h30
-rw-r--r--include/rdma/ib_verbs.h30
-rw-r--r--include/rdma/rdma_counter.h7
-rw-r--r--include/rdma/uverbs_std_types.h2
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h1
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h1
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h2
-rw-r--r--include/uapi/rdma/rdma_netlink.h2
100 files changed, 4052 insertions, 656 deletions
diff --git a/Documentation/infiniband/index.rst b/Documentation/infiniband/index.rst
index 9cd7615438b9..5b4c24125f66 100644
--- a/Documentation/infiniband/index.rst
+++ b/Documentation/infiniband/index.rst
@@ -12,6 +12,7 @@ InfiniBand
opa_vnic
sysfs
tag_matching
+ ucaps
user_mad
user_verbs
diff --git a/Documentation/infiniband/ucaps.rst b/Documentation/infiniband/ucaps.rst
new file mode 100644
index 000000000000..b8b6927742f4
--- /dev/null
+++ b/Documentation/infiniband/ucaps.rst
@@ -0,0 +1,71 @@
+=================================
+Infiniband Userspace Capabilities
+=================================
+
+ User CAPabilities (UCAPs) provide fine-grained control over specific
+ firmware features in Infiniband (IB) devices. This approach offers
+ more granular capabilities than the existing Linux capabilities,
+ which may be too generic for certain FW features.
+
+ Each user capability is represented as a character device with root
+ read-write access. Root processes can grant users special privileges
+ by allowing access to these character devices (e.g., using chown).
+
+Usage
+=====
+
+ UCAPs allow control over specific features of an IB device using file
+ descriptors of UCAP character devices. Here is how a user enables
+ specific features of an IB device:
+
+ * A root process grants the user access to the UCAP files that
+ represents the capabilities (e.g., using chown).
+ * The user opens the UCAP files, obtaining file descriptors.
+ * When opening an IB device, include an array of the UCAP file
+ descriptors as an attribute.
+ * The ib_uverbs driver recognizes the UCAP file descriptors and enables
+ the corresponding capabilities for the IB device.
+
+Creating UCAPs
+==============
+
+ To create a new UCAP, drivers must first define a type in the
+ rdma_user_cap enum in rdma/ib_ucaps.h. The name of the UCAP character
+ device should be added to the ucap_names array in
+ drivers/infiniband/core/ucaps.c. Then, the driver can create the UCAP
+ character device by calling the ib_create_ucap API with the UCAP
+ type.
+
+ A reference count is stored for each UCAP to track creations and
+ removals of the UCAP device. If multiple creation calls are made with
+ the same type (e.g., for two IB devices), the UCAP character device
+ is created during the first call and subsequent calls increment the
+ reference count.
+
+ The UCAP character device is created under /dev/infiniband, and its
+ permissions are set to allow root read and write access only.
+
+Removing UCAPs
+==============
+
+ Each removal decrements the reference count of the UCAP. The UCAP
+ character device is removed from the filesystem only when the
+ reference count is decreased to 0.
+
+/dev and /sys/class files
+=========================
+
+ The class::
+
+ /sys/class/infiniband_ucaps
+
+ is created when the first UCAP character device is created.
+
+ The UCAP character device is created under /dev/infiniband.
+
+ For example, if mlx5_ib adds the rdma_user_cap
+ RDMA_UCAP_MLX5_CTRL_LOCAL with name "mlx5_perm_ctrl_local", this will
+ create the device node::
+
+ /dev/infiniband/mlx5_perm_ctrl_local
+
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 8ab4eea5a0a5..d49ded7e95f0 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -39,6 +39,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_async_fd.o \
uverbs_std_types_srq.o \
uverbs_std_types_wq.o \
- uverbs_std_types_qp.o
+ uverbs_std_types_qp.o \
+ ucaps.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index f8413f8a9f26..9979a351577f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1501,6 +1501,12 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
device->port_data[port].cache.pkey = pkey_cache;
}
device->port_data[port].cache.lmc = tprops->lmc;
+
+ if (device->port_data[port].cache.port_state != IB_PORT_NOP &&
+ device->port_data[port].cache.port_state != tprops->state)
+ ibdev_info(device, "Port: %d Link %s\n", port,
+ ib_port_state_to_str(tprops->state));
+
device->port_data[port].cache.port_state = tprops->state;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 91db10515d74..fedcdb56fb6b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -739,12 +739,26 @@ cma_validate_port(struct ib_device *device, u32 port,
goto out;
}
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(dev_addr->net, bound_if_index);
- if (!ndev)
- goto out;
+ /*
+ * For a RXE device, it should work with TUN device and normal ethernet
+ * devices. Use driver_id to check if a device is a RXE device or not.
+ * ARPHDR_NONE means a TUN device.
+ */
+ if (device->ops.driver_id == RDMA_DRIVER_RXE) {
+ if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
+ && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ }
} else {
- gid_type = IB_GID_TYPE_IB;
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
}
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index af59486fe418..e6ec7b7a40af 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -12,7 +12,8 @@
static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
- enum rdma_nl_counter_mask new_mask)
+ enum rdma_nl_counter_mask new_mask,
+ bool bind_opcnt)
{
if (new_mode == RDMA_COUNTER_MODE_AUTO) {
if (new_mask & (~ALL_AUTO_MODE_MASKS))
@@ -23,6 +24,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
port_counter->mode.mode = new_mode;
port_counter->mode.mask = new_mask;
+ port_counter->mode.bind_opcnt = bind_opcnt;
return 0;
}
@@ -41,6 +43,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
*/
int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mask mask,
+ bool bind_opcnt,
struct netlink_ext_ack *extack)
{
struct rdma_port_counter *port_counter;
@@ -59,12 +62,13 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
RDMA_COUNTER_MODE_NONE;
if (port_counter->mode.mode == mode &&
- port_counter->mode.mask == mask) {
+ port_counter->mode.mask == mask &&
+ port_counter->mode.bind_opcnt == bind_opcnt) {
ret = 0;
goto out;
}
- ret = __counter_set_mode(port_counter, mode, mask);
+ ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
out:
mutex_unlock(&port_counter->lock);
@@ -89,7 +93,7 @@ static void auto_mode_init_counter(struct rdma_counter *counter,
}
static int __rdma_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
+ struct ib_qp *qp, u32 port)
{
int ret;
@@ -100,7 +104,7 @@ static int __rdma_counter_bind_qp(struct r