// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include "cmd.h"
enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);
static void
_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
int err;
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
/*
* In case PRE_COPY is used, saving_migf is exposed while the device is
* running. Make sure to run only once there is no active save command.
* Running both in parallel, might end-up with a failure in the save
* command once it will try to turn on 'tracking' on a suspended device.
*/
if (migf) {
err = wait_for_completion_interruptible(&migf->save_comp);
if (err)
return err;
}
MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
if (migf)
complete(&migf->save_comp);
return err;
}
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
}
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u8 query_flags)
{
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
int ret;
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(query_vhca_migration_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
MLX5_SET(query_vhca_migration_state_in, in, incremental,
query_flags & MLX5VF_QUERY_INC);
ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
out);
if (ret)
return ret;
*state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size);
return 0;
}
static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
{
/* Mark the tracker under an error and wake it up if it's running */
mvdev->tracker.is_err = true;
complete(&mvdev->tracker_comp);
}
static int mlx5fv_vf_event(struct notifier_block *nb,
unsigned long event, void *data)
{
struct mlx5vf_pci_core_device *mvdev =
container_of(nb, struct mlx5vf_pci_core_device, nb);
switch (event) {
case MLX5_PF_NOTIFY_ENABLE_VF:
mutex_lock(&mvdev->state_mutex);
mvdev->mdev_detach = false;
mlx5vf_state_mutex_unlock(mvdev);
break;
case MLX5_PF_NOTIFY_DISABLE_VF:
mlx5vf_cmd_close_migratable(mvdev);
mutex_lock(&mvdev->state_mutex);
mvdev->mdev_detach = true;
mlx5vf_state_mutex_unlock(mvdev);
break;
default:
break;
}
return 0;
}
void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
{
if (!mvdev->migrate_cap)
return;
/* Must be done outside the lock to let it progress */
set_tracker_error(mvdev);
mutex_lock(&mvdev->state_mutex);
mlx5vf_disable_fds(mvdev);
_mlx5vf_free_page_tracker_resources(mvdev);
mlx5vf_state_mutex_unlock(mvdev);
}
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
{
if (!mvdev->migrate_cap)
return;
mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
&mvdev->nb);
destroy_workqueue(mvdev->cb_wq);
}
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
const struct vfio_migration_ops *mig_ops,
const struct vfio_log_ops *log_ops)
{
struct pci_dev *pdev = mvdev->core_device.pdev;
int ret;
if (!pdev->is_virtfn)
return;
mvdev->mdev = mlx5_vf_get_core_dev(pdev);
if (!mvdev->mdev)
return;
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
goto end;
mvdev->vf_id = pci_iov_vf_id(pdev);
if (mvdev->vf_id < 0)
goto end;
if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
&mvdev->vhca_id))
goto end;
mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
if (!mvdev->cb_wq)
goto end;
mutex_init(&mvdev->state_mutex);
spin_lock_init(&mvdev->reset_lock);
mvdev->nb.notifier_call = mlx5fv_vf_event;
ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
&mvdev->nb);
if (ret) {
destroy_workqueue(mvdev->cb_wq);
goto end;
}
mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P;
mvdev->core_device.vdev.mig_ops = mig_ops;
init_completion(&mvdev->tracker_comp);
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
mvdev->core_device.vdev.log_ops = log_ops;
end:
mlx5_vf_put_core_dev(mvdev->mdev);
}
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id)
{
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
int out_size;
void *out;
int ret;
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, 1);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, op_mod,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
HCA_CAP_OPMOD_GET_CUR);
ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
if (ret)
goto err_exec;
*vhca_id = MLX5_GET(query_hca_cap_out, out,
capability.cmd_hca_cap.vhca_id);
err_exec:
kfree(out);
return ret;
}
static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
struct mlx5_vhca_data_buffer *buf,
struct mlx5_vhca_recv_buf *recv_buf,
u32 *mkey)
{
size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
recv_buf->npages;
int err = 0, inlen;
__be64 *mtt;
void *mkc;
u32 *in;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
sizeof(*mtt) * round_up(npages, 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(npages, 2));
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
if (buf) {
struct sg_dma_page_iter dma_iter;
for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
} else {
int i;
for (i = 0; i < npages; i++)
*mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
}
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, rr, 1);
MLX5_SET(mkc, mkc, rw, 1);
MLX5_SET(mkc, mkc,
|