diff options
100 files changed, 4052 insertions, 656 deletions
diff --git a/Documentation/infiniband/index.rst b/Documentation/infiniband/index.rst index 9cd7615438b9..5b4c24125f66 100644 --- a/Documentation/infiniband/index.rst +++ b/Documentation/infiniband/index.rst @@ -12,6 +12,7 @@ InfiniBand opa_vnic sysfs tag_matching + ucaps user_mad user_verbs diff --git a/Documentation/infiniband/ucaps.rst b/Documentation/infiniband/ucaps.rst new file mode 100644 index 000000000000..b8b6927742f4 --- /dev/null +++ b/Documentation/infiniband/ucaps.rst @@ -0,0 +1,71 @@ +================================= +Infiniband Userspace Capabilities +================================= + + User CAPabilities (UCAPs) provide fine-grained control over specific + firmware features in Infiniband (IB) devices. This approach offers + more granular capabilities than the existing Linux capabilities, + which may be too generic for certain FW features. + + Each user capability is represented as a character device with root + read-write access. Root processes can grant users special privileges + by allowing access to these character devices (e.g., using chown). + +Usage +===== + + UCAPs allow control over specific features of an IB device using file + descriptors of UCAP character devices. Here is how a user enables + specific features of an IB device: + + * A root process grants the user access to the UCAP files that + represents the capabilities (e.g., using chown). + * The user opens the UCAP files, obtaining file descriptors. + * When opening an IB device, include an array of the UCAP file + descriptors as an attribute. + * The ib_uverbs driver recognizes the UCAP file descriptors and enables + the corresponding capabilities for the IB device. + +Creating UCAPs +============== + + To create a new UCAP, drivers must first define a type in the + rdma_user_cap enum in rdma/ib_ucaps.h. The name of the UCAP character + device should be added to the ucap_names array in + drivers/infiniband/core/ucaps.c. Then, the driver can create the UCAP + character device by calling the ib_create_ucap API with the UCAP + type. + + A reference count is stored for each UCAP to track creations and + removals of the UCAP device. If multiple creation calls are made with + the same type (e.g., for two IB devices), the UCAP character device + is created during the first call and subsequent calls increment the + reference count. + + The UCAP character device is created under /dev/infiniband, and its + permissions are set to allow root read and write access only. + +Removing UCAPs +============== + + Each removal decrements the reference count of the UCAP. The UCAP + character device is removed from the filesystem only when the + reference count is decreased to 0. + +/dev and /sys/class files +========================= + + The class:: + + /sys/class/infiniband_ucaps + + is created when the first UCAP character device is created. + + The UCAP character device is created under /dev/infiniband. + + For example, if mlx5_ib adds the rdma_user_cap + RDMA_UCAP_MLX5_CTRL_LOCAL with name "mlx5_perm_ctrl_local", this will + create the device node:: + + /dev/infiniband/mlx5_perm_ctrl_local + diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 8ab4eea5a0a5..d49ded7e95f0 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -39,6 +39,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_async_fd.o \ uverbs_std_types_srq.o \ uverbs_std_types_wq.o \ - uverbs_std_types_qp.o + uverbs_std_types_qp.o \ + ucaps.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index f8413f8a9f26..9979a351577f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1501,6 +1501,12 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids, device->port_data[port].cache.pkey = pkey_cache; } device->port_data[port].cache.lmc = tprops->lmc; + + if (device->port_data[port].cache.port_state != IB_PORT_NOP && + device->port_data[port].cache.port_state != tprops->state) + ibdev_info(device, "Port: %d Link %s\n", port, + ib_port_state_to_str(tprops->state)); + device->port_data[port].cache.port_state = tprops->state; device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91db10515d74..fedcdb56fb6b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -739,12 +739,26 @@ cma_validate_port(struct ib_device *device, u32 port, goto out; } - if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { - ndev = dev_get_by_index(dev_addr->net, bound_if_index); - if (!ndev) - goto out; + /* + * For a RXE device, it should work with TUN device and normal ethernet + * devices. Use driver_id to check if a device is a RXE device or not. + * ARPHDR_NONE means a TUN device. + */ + if (device->ops.driver_id == RDMA_DRIVER_RXE) { + if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER) + && rdma_protocol_roce(device, port)) { + ndev = dev_get_by_index(dev_addr->net, bound_if_index); + if (!ndev) + goto out; + } } else { - gid_type = IB_GID_TYPE_IB; + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { + ndev = dev_get_by_index(dev_addr->net, bound_if_index); + if (!ndev) + goto out; + } else { + gid_type = IB_GID_TYPE_IB; + } } sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index af59486fe418..e6ec7b7a40af 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -12,7 +12,8 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter, enum rdma_nl_counter_mode new_mode, - enum rdma_nl_counter_mask new_mask) + enum rdma_nl_counter_mask new_mask, + bool bind_opcnt) { if (new_mode == RDMA_COUNTER_MODE_AUTO) { if (new_mask & (~ALL_AUTO_MODE_MASKS)) @@ -23,6 +24,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter, port_counter->mode.mode = new_mode; port_counter->mode.mask = new_mask; + port_counter->mode.bind_opcnt = bind_opcnt; return 0; } @@ -41,6 +43,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter, */ int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mask mask, + bool bind_opcnt, struct netlink_ext_ack *extack) { |