aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vdpa
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
commit8ccd54fe45713cd458015b5b08d6098545e70543 (patch)
tree12a034b2ee42d681b2e915815c4529d6f246bcc4 /drivers/vdpa
parent0835b5ee8704aef4e19b369237a762c52c7b6fb1 (diff)
parentc82729e06644f4e087f5ff0f91b8fb15e03b8890 (diff)
downloadlinux-8ccd54fe45713cd458015b5b08d6098545e70543.tar.gz
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "virtio,vhost,vdpa: features, fixes, and cleanups: - reduction in interrupt rate in virtio - perf improvement for VDUSE - scalability for vhost-scsi - non power of 2 ring support for packed rings - better management for mlx5 vdpa - suspend for snet - VIRTIO_F_NOTIFICATION_DATA - shared backend with vdpa-sim-blk - user VA support in vdpa-sim - better struct packing for virtio and fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (52 commits) vhost_vdpa: fix unmap process in no-batch mode MAINTAINERS: make me a reviewer of VIRTIO CORE AND NET DRIVERS tools/virtio: fix build caused by virtio_ring changes virtio_ring: add a struct device forward declaration vdpa_sim_blk: support shared backend vdpa_sim: move buffer allocation in the devices vdpa/snet: use likely/unlikely macros in hot functions vdpa/snet: implement kick_vq_with_data callback virtio-vdpa: add VIRTIO_F_NOTIFICATION_DATA feature support virtio: add VIRTIO_F_NOTIFICATION_DATA feature support vdpa/snet: support the suspend vDPA callback vdpa/snet: support getting and setting VQ state MAINTAINERS: add vringh.h to Virtio Core and Net Drivers vringh: address kdoc warnings vdpa: address kdoc warnings virtio_ring: don't update event idx on get_buf vdpa_sim: add support for user VA vdpa_sim: replace the spinlock with a mutex to protect the state vdpa_sim: use kthread worker vdpa_sim: make devices agnostic for work management ...
Diffstat (limited to 'drivers/vdpa')
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c261
-rw-r--r--drivers/vdpa/solidrun/Makefile1
-rw-r--r--drivers/vdpa/solidrun/snet_ctrl.c330
-rw-r--r--drivers/vdpa/solidrun/snet_hwmon.c2
-rw-r--r--drivers/vdpa/solidrun/snet_main.c146
-rw-r--r--drivers/vdpa/solidrun/snet_vdpa.h20
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c168
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.h14
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c93
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c38
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c414
11 files changed, 1178 insertions, 309 deletions
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 195963b82b636..e29e32b306adc 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -778,12 +778,28 @@ static bool vq_is_tx(u16 idx)
return idx % 2;
}
-static u16 get_features_12_3(u64 features)
+enum {
+ MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
+ MLX5_VIRTIO_NET_F_HOST_ECN = 4,
+ MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
+ MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
+ MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
+ MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
+ MLX5_VIRTIO_NET_F_CSUM = 10,
+ MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
+ MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
+};
+
+static u16 get_features(u64 features)
{
- return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
+ return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}
static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
@@ -797,6 +813,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
void *obj_context;
+ u16 mlx_features;
void *cmd_hdr;
void *vq_ctx;
void *in;
@@ -812,6 +829,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
goto err_alloc;
}
+ mlx_features = get_features(ndev->mvdev.actual_features);
cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
@@ -822,7 +840,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
- get_features_12_3(ndev->mvdev.actual_features));
+ mlx_features >> 3);
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
+ mlx_features & 7);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
@@ -2171,23 +2191,27 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
return MLX5_VDPA_DATAVQ_GROUP;
}
-enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
- MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
- MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
- MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
-};
-
static u64 mlx_to_vritio_features(u16 dev_features)
{
u64 result = 0;
- if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
+ result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
return result;
@@ -2298,6 +2322,113 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
}
}
+static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
+
+ MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
+{
+ if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
+ VPORT_STATE_UP)
+ return true;
+
+ return false;
+}
+
+static void update_carrier(struct work_struct *work)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_vdpa_net *ndev;
+
+ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+ if (ndev->config_cb.callback)
+ ndev->config_cb.callback(ndev->config_cb.private);
+
+ kfree(wqent);
+}
+
+static int queue_link_work(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+
+ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+ if (!wqent)
+ return -ENOMEM;
+
+ wqent->mvdev = &ndev->mvdev;
+ INIT_WORK(&wqent->work, update_carrier);
+ queue_work(ndev->mvdev.wq, &wqent->work);
+ return 0;
+}
+
+static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
+{
+ struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
+ struct mlx5_eqe *eqe = param;
+ int ret = NOTIFY_DONE;
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ if (queue_link_work(ndev))
+ return NOTIFY_DONE;
+
+ ret = NOTIFY_OK;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return ret;
+ }
+ return ret;
+}
+
+static void register_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
+ return;
+
+ ndev->nb.notifier_call = event_handler;
+ mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
+ ndev->nb_registered = true;
+ queue_link_work(ndev);
+}
+
+static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!ndev->nb_registered)
+ return;
+
+ ndev->nb_registered = false;
+ mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
+ if (ndev->mvdev.wq)
+ flush_workqueue(ndev->mvdev.wq);
+}
+
static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2567,10 +2698,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
goto err_setup;
}
+ register_link_notifier(ndev);
err = setup_driver(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
- goto err_setup;
+ goto err_driver;
}
} else {
mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -2582,6 +2714,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock);
return;
+err_driver:
+ unregister_link_notifier(ndev);
err_setup:
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
@@ -2607,6 +2741,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
mlx5_vdpa_info(mvdev, "performing device reset\n");
down_write(&ndev->reslock);
+ unregister_link_notifier(ndev);
teardown_driver(ndev);
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev);
@@ -2861,9 +2996,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
mlx5_vdpa_info(mvdev, "suspending device\n");
down_write(&ndev->reslock);
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- flush_workqueue(ndev->mvdev.wq);
+ unregister_link_notifier(ndev);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
@@ -3000,84 +3133,6 @@ struct mlx5_vdpa_mgmtdev {
struct mlx5_vdpa_net *ndev;
};
-static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
- int err;
-
- MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
- MLX5_SET(query_vport_state_in, in, op_mod, opmod);
- MLX5_SET(query_vport_state_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vport_state_in, in, other_vport, 1);
-
- err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
- if (err)
- return 0;
-
- return MLX5_GET(query_vport_state_out, out, state);
-}
-
-static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
-{
- if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
- VPORT_STATE_UP)
- return true;
-
- return false;
-}
-
-static void update_carrier(struct work_struct *work)
-{
- struct mlx5_vdpa_wq_ent *wqent;
- struct mlx5_vdpa_dev *mvdev;
- struct mlx5_vdpa_net *ndev;
-
- wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
- mvdev = wqent->mvdev;
- ndev = to_mlx5_vdpa_ndev(mvdev);
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
-
- if (ndev->nb_registered && ndev->config_cb.callback)
- ndev->config_cb.callback(ndev->config_cb.private);
-
- kfree(wqent);
-}
-
-static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
-{
- struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
- struct mlx5_eqe *eqe = param;
- int ret = NOTIFY_DONE;
- struct mlx5_vdpa_wq_ent *wqent;
-
- if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
- if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
- return NOTIFY_DONE;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent)
- return NOTIFY_DONE;
-
- wqent->mvdev = &ndev->mvdev;
- INIT_WORK(&wqent->work, update_carrier);
- queue_work(ndev->mvdev.wq, &wqent->work);
- ret = NOTIFY_OK;
- break;
- default:
- return NOTIFY_DONE;
- }
- return ret;
- }
- return ret;
-}
-
static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
{
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -3127,6 +3182,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -EINVAL;
}
device_features &= add_config->device_features;
+ } else {
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
}
if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
@@ -3258,9 +3315,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_res2;
}
- ndev->nb.notifier_call = event_handler;
- mlx5_notifier_register(mdev, &ndev->nb);
- ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
@@ -3294,10 +3348,7 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL;
- if (ndev->nb_registered) {
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- }
+ unregister_link_notifier(ndev);
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
diff --git a/drivers/vdpa/solidrun/Makefile b/drivers/vdpa/solidrun/Makefile
index c0aa3415bf7b5..9116252cd5fa2 100644
--- a/drivers/vdpa/solidrun/Makefile
+++ b/drivers/vdpa/solidrun/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o
+snet_vdpa-$(CONFIG_SNET_VDPA) += snet_ctrl.o
ifdef CONFIG_HWMON
snet_vdpa-$(CONFIG_SNET_VDPA) += snet_hwmon.o
endif
diff --git a/drivers/vdpa/solidrun/snet_ctrl.c b/drivers/vdpa/solidrun/snet_ctrl.c
new file mode 100644
index 0000000000000..3858738643b40
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_ctrl.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+
+#include <linux/iopoll.h>
+
+#include "snet_vdpa.h"
+
+enum snet_ctrl_opcodes {
+ SNET_CTRL_OP_DESTROY = 1,
+ SNET_CTRL_OP_READ_VQ_STATE,
+ SNET_CTRL_OP_SUSPEND,
+};
+
+#define SNET_CTRL_TIMEOUT 2000000
+
+#define SNET_CTRL_DATA_SIZE_MASK 0x0000FFFF
+#define SNET_CTRL_IN_PROCESS_MASK 0x00010000
+#define SNET_CTRL_CHUNK_RDY_MASK 0x00020000
+#define SNET_CTRL_ERROR_MASK 0x0FFC0000
+
+#define SNET_VAL_TO_ERR(val) (-(((val) & SNET_CTRL_ERROR_MASK) >> 18))
+#define SNET_EMPTY_CTRL(val) (((val) & SNET_CTRL_ERROR_MASK) || \
+ !((val) & SNET_CTRL_IN_PROCESS_MASK))
+#define SNET_DATA_READY(val) ((val) & (SNET_CTRL_ERROR_MASK | SNET_CTRL_CHUNK_RDY_MASK))
+
+/* Control register used to read data from the DPU */
+struct snet_ctrl_reg_ctrl {
+ /* Chunk size in 4B words */
+ u16 data_size;
+ /* We are in the middle of a command */
+ u16 in_process:1;
+ /* A data chunk is ready and can be consumed */
+ u16 chunk_ready:1;
+ /* Error code */
+ u16 error:10;
+ /* Saved for future usage */
+ u16 rsvd:4;
+};
+
+/* Opcode register */
+struct snet_ctrl_reg_op {
+ u16 opcode;
+ /* Only if VQ index is relevant for the command */
+ u16 vq_idx;
+};
+
+struct snet_ctrl_regs {
+ struct snet_ctrl_reg_op op;
+ struct snet_ctrl_reg_ctrl ctrl;
+ u32 rsvd;
+ u32 data[];
+};
+
+static struct snet_ctrl_regs __iomem *snet_get_ctrl(struct snet *snet)
+{
+ return snet->bar + snet->psnet->cfg.ctrl_off;
+}
+
+static int snet_wait_for_empty_ctrl(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_EMPTY_CTRL(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_empty_op(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->op, val, !val, 10, SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_data(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_DATA_READY(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static u32 snet_read32_word(struct snet_ctrl_regs __iomem *ctrl_regs, u16 word_idx)
+{
+ return ioread32(&ctrl_regs->data[word_idx]);
+}
+
+static u32 snet_read_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ return ioread32(&ctrl_regs->ctrl);
+}
+
+static void snet_write_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->ctrl);
+}
+
+static void snet_write_op(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->op);
+}
+
+static int snet_wait_for_dpu_completion(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ /* Wait until the DPU finishes completely.
+ * It will clear the opcode register.
+ */
+ return snet_wait_for_empty_op(ctrl_regs);
+}
+
+/* Reading ctrl from the DPU:
+ * buf_size must be 4B aligned
+ *
+ * Steps:
+ *
+ * (1) Verify that the DPU is not in the middle of another operation by
+ * reading the in_process and error bits in the control register.
+ * (2) Write the request opcode and the VQ idx in the opcode register
+ * and write the buffer size in the control register.
+ * (3) Start readind chunks of data, chunk_ready bit indicates that a
+ * data chunk is available, we signal that we read the data by clearing the bit.
+ * (4) Detect that the transfer is completed when the in_process bit
+ * in the control register is cleared or when the an error appears.
+ */
+static int snet_ctrl_read_from_dpu(struct snet *snet, u16 opcode, u16 vq_idx, void *buffer,
+ u32 buf_size)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 *bfr_ptr = (u32 *)buffer;
+ u32 val;
+ u16 buf_words;
+ int ret;
+ u16 words, i, tot_words = 0;
+
+ /* Supported for config 2+ */
+ if (!SNET_CFG_VER(snet, 2))
+ return -EOPNOTSUPP;
+
+ if (!IS_ALIGNED(buf_size, 4))
+ return -EINVAL;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ buf_words = buf_size / 4;
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to write the buffer size in the control register, and the opcode + vq index in
+ * the opcode register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, buf_words);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ while (buf_words != tot_words) {
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control data\n");
+ goto exit;
+ }
+
+ val = snet_read_ctrl(regs);
+
+ /* Error? */
+ if (val & SNET_CTRL_ERROR_MASK) {
+ ret = SNET_VAL_TO_ERR(val);
+ SNET_WARN(pdev, "Error while reading control data from DPU, err %d\n", ret);
+ goto exit;
+ }
+
+ words = min_t(u16, val & SNET_CTRL_DATA_SIZE_MASK, buf_words - tot_words);
+
+ for (i = 0; i < words; i++) {
+ *bfr_ptr = snet_read32_word(regs, i);
+ bfr_ptr++;
+ }
+
+ tot_words += words;
+
+ /* Is the job completed? */
+ if (!(val & SNET_CTRL_IN_PROCESS_MASK))
+ break;
+
+ /* Clear the chunk ready bit and continue */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+ }
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for the DPU to complete a control command\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU using the old mechanism
+ * used with config version 1.
+ */
+static int snet_send_ctrl_msg_old(struct snet *snet, u32 opcode)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ int ret;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Old mechanism uses just 1 register, the opcode register.
+ * Make sure that the opcode register is empty, and that the DPU isn't
+ * processing an old message.
+ */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Write the message */
+ snet_write_op(regs, opcode);
+
+ /* DPU ACKs the message by clearing the opcode register */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for a control message to be ACKed\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU.
+ * A control message is a message without payload.
+ */
+static int snet_send_ctrl_msg(struct snet *snet, u16 opcode, u16 vq_idx)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 val;
+ int ret;
+
+ /* If config version is not 2+, use the old mechanism */
+ if (!SNET_CFG_VER(snet, 2))
+ return snet_send_ctrl_msg_old(snet, opcode);
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to clear the control register and write the opcode + vq index in the opcode
+ * register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, 0);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ /* The DPU ACKs control messages by setting the chunk ready bit
+ * without data.
+ */
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Check for errors */
+ val = snet_read_ctrl(regs);
+ ret = SNET_VAL_TO_ERR(val);
+
+ /* Clear the chunk ready bit */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for DPU to complete a control command, err %d\n",
+ ret);
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+void snet_ctrl_clear(struct snet *snet)
+{
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+
+ snet_write_op(regs, 0);
+}
+
+int snet_destroy_dev(struct snet *snet)
+{
+ return snet_send_ctrl_msg(snet, SNET_CTRL_OP_DESTROY, 0);
+}
+
+int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state)
+{
+ return snet_ctrl_read_from_dpu(snet, SNET_CTRL_OP_READ_VQ_STATE, idx, state,
+ sizeof(*state));
+}
+
+int snet_suspend_dev(struct snet *snet)
+{
+ return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0);
+}
diff --git a/drivers/vdpa/solidrun/snet_hwmon.c b/drivers/vdpa/solidrun/snet_hwmon.c
index e695e36ff7534..42c87387a0f1a 100644
--- a/drivers/vdpa/solidrun/snet_hwmon.c
+++ b/drivers/vdpa/solidrun/snet_hwmon.c
@@ -2,7 +2,7 @@
/*
* SolidRun DPU driver for control plane
*
- * Copyright (C) 2022 SolidRun
+ * Copyright (C) 2022-2023 SolidRun
*
* Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
*
diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c
index 68de727398ede..cdcd84ce4f5a9 100644
--- a/drivers/vdpa/solidrun/snet_main.c
+++ b/drivers/vdpa/solidrun/snet_main.c
@@ -2,7 +2,7 @@
/*
* SolidRun DPU driver for control plane
*
- * Copyright (C) 2022 SolidRun
+ * Copyright (C) 2022-2023 SolidRun
*
* Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
*
@@ -16,14 +16,12 @@
/* SNET signature */
#define SNET_SIGNATURE 0xD0D06363
/* Max. config version that we can work with */
-#define SNET_CFG_VERSION 0x1
+#define SNET_CFG_VERSION 0x2
/* Queue align */
#define SNET_QUEUE_ALIGNMENT PAGE_SIZE
/* Kick value to notify that new data is available */
#define SNET_KICK_VAL 0x1
#define SNET_CONFIG_OFF 0x0
-/* ACK timeout for a message */
-#define SNET_ACK_TIMEOUT 2000000
/* How long we are willing to wait for a SNET device */
#define SNET_DETECT_TIMEOUT 5000000
/* How long should we wait for the DPU to read our config */
@@ -32,61 +30,16 @@
#define SNET_GENERAL_CFG_LEN 36
#define SNET_GENERAL_CFG_VQ_LEN 40
-enum snet_msg {
- SNET_MSG_DESTROY = 1,
-};
-
static struct snet *vdpa_to_snet(struct vdpa_device *vdpa)
{
return container_of(vdpa, struct snet, vdpa);
}
-static int snet_wait_for_msg_ack(struct snet *snet)
-{
- struct pci_dev *pdev = snet->pdev;
- int ret;
- u32 val;
-
- /* The DPU will clear the messages offset once messages
- * are processed.
- */
- ret = readx_poll_timeout(ioread32, snet->bar + snet->psnet->cfg.msg_off,
- val, !val, 10, SNET_ACK_TIMEOUT);
- if (ret)
- SNET_WARN(pdev, "Timeout waiting for message ACK\n");
-
- return ret;
-}
-
-/* Sends a message to the DPU.
- * If blocking is set, the function will return once the
- * message was processed by the DPU (or timeout).
- */
-static int snet_send_msg(struct snet *snet, u32 msg, bool blocking)
-{
- int ret = 0;
-
- /* Make sure the DPU acked last message before issuing a new one */
- ret = snet_wait_for_msg_ack(snet);
- if (ret)
- return ret;
-
- /* Write the message */
- snet_write32(snet, snet->psnet->cfg.msg_off, msg);
-
- if (blocking)
- ret = snet_wait_for_msg_ack(snet);
- else /* If non-blocking, flush the write by issuing a read */
- snet_read32(snet, snet->psnet->cfg.msg_off);
-
- return ret;
-}
-
static irqreturn_t snet_cfg_irq_hndlr(int irq, void *data)
{
struct snet *snet = data;
/* Call callback if any */
- if (snet->cb.callback)
+ if (likely(snet->cb.callback))
return snet->cb.callback(snet->cb.private);
return IRQ_HANDLED;
@@ -96,7 +49,7 @@ static irqreturn_t snet_vq_irq_hndlr(int irq, void *data)
{
struct snet_vq *vq = data;
/* Call callback if any */
- if (vq->cb.callback)
+ if (likely(vq->cb.callback))
return vq->cb.callback(vq->cb.private);
return IRQ_HANDLED;
@@ -153,12 +106,24 @@ static void snet_kick_vq(struct vdpa_device *vdev, u16 idx)
{
struct snet *snet = vdpa_to_snet(vdev);
/* not ready - ignore */
- if (!snet->vqs[idx]->ready)
+ if (unlikely(!snet->vqs[idx]->ready))
return;
iowrite32(SNET_KICK_VAL, snet->vqs[idx]->kick_ptr);
}
+static void snet_kick_vq_with_data(struct vdpa_device *vdev, u32 data)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ u16 idx = data & 0xFFFF;
+
+ /* not ready - ignore */
+ if (unlikely(!snet->vqs[idx]->ready))
+ return;
+
+ iowrite32((data & 0xFFFF0000) | SNET_KICK_VAL, snet->vqs[idx]->kick_ptr);
+}
+
static void snet_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
{
struct snet *snet = vdpa_to_snet(vdev);
@@ -181,33 +146,48 @@ static bool snet_get_vq_ready(struct vdpa_device *vdev, u16 idx)
return snet->vqs[idx]->ready;
}
-static int snet_set_vq_state(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state)
+static bool snet_vq_state_is_initial(struct snet *snet, const struct vdpa_vq_state *state)
{
- struct snet *snet = vdpa_to_snet(vdev);
- /* Setting the VQ state is not supported.
- * If the asked state is the same as the initial one
- * we can ignore it.
- */
if (SNET_HAS_FEATURE(snet, VIRTIO_F_RING_PACKED)) {
const struct vdpa_vq_state_packed *p = &state->packed;
if (p->last_avail_counter == 1 && p->last_used_counter == 1 &&
p->last_avail_idx == 0 && p->last_used_idx == 0)
- return 0;
+ return true;
} else {
const struct vdpa_vq_state_split *s = &state->split;
if (s->avail_index == 0)
- return 0;
+ return true;
}
+ return false;
+}
+
+static int snet_set_vq_state(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ /* We can set any state for config version 2+ */
+ if (SNET_CFG_VER(snet, 2)) {
+ memcpy(&snet->vqs[idx]->vq_state, state, sizeof(*state));
+ return 0;
+ }
+
+ /* Older config - we can't set the VQ state.
+ * Return 0 only if this is the initial state we use in the DPU.
+ */
+ if (snet_vq_state_is_initial(snet, state))
+ return 0;
+
return -EOPNOTSUPP;
}
static int snet_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
{
- /* Not supported */
- return -EOPNOTSUPP;
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet_read_vq_state(snet, idx, state);
}
static int snet_get_vq_irq(struct vdpa_device *vdev, u16 idx)
@@ -232,9 +212,9 @@ static int snet_reset_dev(struct snet *snet)
if (!snet->status)
return 0;
- /* If DPU started, send a destroy message */
+ /* If DPU started, destroy it */
if (snet->status & VIRTIO_CONFIG_S_DRIVER_OK)
- ret = snet_send_msg(snet, SNET_MSG_DESTROY, true);
+ ret = snet_destroy_dev(snet);
/* Clear VQs */
for (i = 0; i < snet->cfg->vq_num; i++) {
@@ -258,7 +238,7 @@ static int snet_reset_dev(struct snet *snet)
snet->dpu_ready = false;
if (ret)
- SNET_WARN(pdev, "Incomplete reset to SNET[%u] device\n", snet->sid);
+ SNET_WARN(pdev, "Incomplete reset to SNET[%u] device, err: %d\n", snet->sid, ret);
else
SNET_DBG(pdev, "Reset SNET[%u] device\n", snet->sid);
@@ -356,7 +336,7 @@ static int snet_write_conf(struct snet *snet)
* | DESC AREA |
* | DEVICE AREA |
* | DRIVER AREA |
- * | RESERVED |
+ * | VQ STATE (CFG 2+) | RSVD |
*
* Magic number should be written last, this is the DPU indication that the data is ready
*/
@@ -391,12 +371,15 @@ static int snet_write_conf(struct snet *snet)
off += 8;
snet_write64(snet, off, snet->vqs[i]->driver_area);
off += 8;
+ /* Write VQ state if config version is 2+ */
+ if (SNET_CFG_VER(snet, 2))
+ snet_write32(snet, off, *(u32 *)&snet->vqs[i]->vq_state);
+ off += 4;
+
/* Ignore reserved */
- off += 8;
+ off += 4;
}
- /* Clear snet messages address for this device */
- snet_write32(snet, snet->psnet->cfg.msg_off, 0);
/* Write magic number - data is ready */
snet_write32(snet, snet->psnet->cfg.host_cfg_off, SNET_SIGNATURE);
@@ -512,10 +495,25 @@ static void snet_set_config(struct vdpa_device *vdev, unsigned int offset,
iowrite8(*buf_ptr++, cfg_ptr + i);
}
+static int snet_suspend(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ int ret;
+
+ ret = snet_suspend_dev(snet);
+ if (ret)
+ SNET_ERR(snet->pdev, "SNET[%u] suspend failed, err: %d\n", snet->sid, ret);
+ else
+ SNET_DBG(snet->pdev, "Suspend SNET[%u] device\n", snet->sid);
+
+ return ret;
+}
+
static const struct vdpa_config_ops snet_config_ops = {
.set_vq_address = snet_set_vq_address,
.set_vq_num = snet_set_vq_num,
.kick_vq = snet_kick_vq,
+ .kick_vq_with_data = snet_kick_vq_with_data,
.set_vq_cb = snet_set_vq_cb,
.set_vq_ready = snet_set_vq_ready,
.get_vq_ready = snet_get_vq_ready,
@@ -537,6 +535,7 @@ static const struct vdpa_config_ops snet_config_ops = {
.set_status = snet_set_status,
.get_config = snet_get_config,
.set_config = snet_set_config,
+ .suspend = snet_suspend,
};
static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
@@ -697,7 +696,7 @@ static int psnet_read_cfg(struct pci_dev *pdev, struct psnet *psnet)
off += 4;
cfg->hwmon_off = psnet_read32(psnet, off);
off += 4;
- cfg->msg_off = psnet_read32(psnet, off);
+ cfg->ctrl_off = psnet_read32(psnet, off);
off += 4;
cfg->flags = psnet_read32(psnet, off);
off += 4;
@@ -997,6 +996,10 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev)
goto free_irqs;
}
+ /* Init control mutex and spinlock */
+ mutex_init(&snet->ctrl_lock);
+ spin_lock_init(&snet->ctrl_spinlock);
+
/* Save pci device pointer */
snet->pdev = pdev;
snet->psnet = psnet;
@@ -1013,6 +1016,9 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev)
/* Create a VirtIO config pointer */
snet->cfg->virtio_cfg = snet->bar + snet->psnet->cfg.virtio_cfg_off;
+ /* Clear control registers */
+ snet_ctrl_clear(snet);
+
pci_set_master(pdev);
pci_set_drvdata(pdev, snet);
diff --git a/drivers/vdpa/solidrun/snet_vdpa.h b/drivers/vdpa/solidrun/snet_vdpa.h
index b7f34169053fb..3c78d4e7d4857 100644
--- a/drivers/vdpa/solidrun/snet_vdpa.h
+++ b/drivers/vdpa/solidrun/snet_vdpa.h
@@ -2,7 +2,7 @@
/*
* SolidRun DPU driver for control plane
*
- * Copyright (C) 2022 SolidRun
+ * Copyright (C) 2022-2023 SolidRun
*
* Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
*
@@ -20,10 +20,15 @@
#define SNET_INFO(pdev, fmt, ...) dev_info(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
#define SNET_DBG(pdev, fmt, ...) dev_dbg(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
#define SNET_HAS_FEATURE(s, f) ((s)->negotiated_features & BIT_ULL(f))
+/* Check if negotiated config version is at least @ver */
+#define SNET_CFG_VER(snet, ver) ((snet)->psnet->negotiated_cfg_ver >= (ver))
+
/* VQ struct */
struct snet_vq {
/* VQ callback */
struct vdpa_callback cb;
+ /* VQ state received from bus */
+ struct vdpa_vq_state vq_state;
/* desc base address */
u64 desc_area;
/* device base address */
@@ -51,6 +56,10 @@ struct snet {
struct vdpa_device vdpa;
/* Config callback */
struct vdpa_callback cb;
+ /* To lock the control mechanism */
+ struct mutex ctrl_lock;
+ /* Spinlock to protect critical parts in the control mechanism */
+ spinlock_t ctrl_spinlock;
/* array of virqueues */
struct snet_vq **vqs;
/* Used features */
@@ -117,8 +126,8 @@ struct snet_cfg {
u32 kick_off;
/* Offset in PCI BAR for HW monitoring */
u32 hwmon_off;
- /* Offset in PCI BAR for SNET messages */
- u32 msg_off;
+ /* Offset in PCI BAR for Control mechanism */
+ u32 ctrl_off;
/* Config general flags - enum snet_cfg_flags */
u32 flags;
/* Reserved for future usage */
@@ -191,4 +200,9 @@ static inline void snet_write64(struct snet *snet, u32 off, u64 val)
void psnet_create_hwmon(struct pci_dev *pdev);
#endif
+void snet_ctrl_clear(struct snet *snet);
+int snet_destroy_dev(struct snet *snet);
+int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state);
+int snet_suspend_dev(struct snet *snet);
+
#endif //_SNET_VDPA_H_
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index eea23c630f7c0..d343af4fa60ee 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -11,8 +11,8 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/kthread.h>
#include <linux/slab.h>
-#include <linux/sched.h>
#include <linux/dma-map-ops.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
@@ -35,10 +35,44 @@ module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
+static bool use_va = true;
+module_param(use_va, bool, 0444);
+MODULE_PARM_DESC(use_va, "Enable/disable the device's ability to use VA");
+
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
#define VDPASIM_VENDOR_ID 0
+struct vdpasim_mm_work {
+ struct kthread_work work;
+ struct vdpasim *vdpasim;
+ struct mm_struct *mm_to_bind;
+ int ret;
+};
+
+static void vdpasim_mm_work_fn(struct kthread_work *work)
+{
+ struct vdpasim_mm_work *mm_work =
+ container_of(work, struct vdpasim_mm_work, work);
+ struct vdpasim *vdpasim = mm_work->vdpasim;
+
+ mm_work->ret = 0;
+
+ //TODO: should we attach the cgroup of the mm owner?
+ vdpasim->mm_bound = mm_work->mm_to_bind;
+}
+
+static void vdpasim_worker_change_mm_sync(struct vdpasim *vdpasim,
+ struct vdpasim_mm_work *mm_work)
+{
+ struct kthread_work *work = &mm_work->work;
+
+ kthread_init_work(work, vdpasim_mm_work_fn);
+ kthread_queue_work(vdpasim->worker, work);
+
+ kthread_flush_work(work);
+}
+
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
{
return container_of(vdpa, struct vdpasim, vdpa);
@@ -59,13 +93,20 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
uint16_t last_avail_idx = vq->vring.last_avail_idx;
-
- vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, true,
- (struct vring_desc *)(uintptr_t)vq->desc_addr,
- (struct vring_avail *)
- (uintptr_t)vq->driver_addr,
- (struct vring_used *)
- (uintptr_t)vq->device_addr);
+ struct vring_desc *desc = (struct vring_desc *)
+ (uintptr_t)vq->desc_addr;
+ struct vring_avail *avail = (struct vring_avail *)
+ (uintptr_t)vq->driver_addr;
+ struct vring_used *used = (struct vring_used *)
+ (uintptr_t)vq->device_addr;
+
+ if (use_va && vdpasim->mm_bound) {
+ vringh_init_iotlb_va(&vq->vring, vdpasim->features, vq->num,
+ true, desc, avail, used);
+ } else {
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num,
+ true, desc, avail, used);
+ }
vq->vring.last_avail_idx = last_avail_idx;
@@ -127,6 +168,25 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
static const struct vdpa_config_ops vdpasim_config_ops;
static const struct vdpa_config_ops vdpasim_batch_config_ops;
+static void vdpasim_work_fn(struct kthread_work *work)
+{
+ struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ struct mm_struct *mm = vdpasim->mm_bound;
+
+ if (use_va && mm) {
+ if (!mmget_not_zero(mm))
+ return;
+ kthread_use_mm(mm);
+ }
+
+ vdpasim->dev_attr.work_fn(vdpasim);
+
+ if (use_va && mm) {
+ kthread_unuse_mm(mm);
+ mmput(mm);
+ }
+}
+
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
const struct vdpa_dev_set_config *config)
{
@@ -155,7 +215,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
vdpa = __vdpa_alloc_device(NULL, ops,
dev_attr->ngroups, dev_attr->nas,
dev_attr->alloc_size,
- dev_attr->name, false);
+ dev_attr->name, use_va);
if (IS_ERR(vdpa)) {
ret = PTR_ERR(vdpa);
goto err_alloc;
@@ -163,11 +223,17 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
vdpasim = vdpa_to_sim(vdpa);
vdpasim->dev_attr = *dev_attr;
- INIT_WORK(&vdpasim->work, dev_attr->work_fn);
- spin_lock_init(&vdpasim->lock);
+ dev = &vdpasim->vdpa.dev;
+
+ kthread_init_work(&vdpasim->work, vdpasim_work_fn);
+ vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s",
+ dev_attr->name);
+ if (IS_ERR(vdpasim->worker))
+ goto err_iommu;
+
+ mutex_init(&vdpasim->mutex);
spin_lock_init(&vdpasim->iommu_lock);
- dev = &vdpasim->vdpa.dev;
dev->dma_mask = &dev->coherent_dma_mask;
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
goto err_iommu;
@@ -195,10 +261,6 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
- vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
- if (!vdpasim->buffer)
- goto err_iommu;
-
for (i = 0; i < dev_attr->nvqs; i++)
vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
&vdpasim->iommu_lock);
@@ -214,6 +276,12 @@ err_alloc:
}
EXPORT_SYMBOL_GPL(vdpasim_create);
+void vdpasim_schedule_work(struct vdpasim *vdpasim)
+{
+ kthread_queue_work(vdpasim->worker, &vdpasim->work);
+}
+EXPORT_SYMBOL_GPL(vdpasim_schedule_work);
+
static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 desc_area, u64 driver_area,
u64 device_area)
@@ -248,7 +316,7 @@ static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx)
}
if (vq->ready)
- schedule_work(&vdpasim->work);
+ vdpasim_schedule_work(vdpasim);
}
static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
@@ -267,13 +335,13 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
bool old_ready;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
old_ready = vq->ready;
vq->ready = ready;
if (vq->ready && !old_ready) {
vdpasim_queue_ready(vdpasim, idx);
}
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
}
static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
@@ -291,9 +359,9 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
struct vringh *vrh = &vq->vring;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vrh->last_avail_idx = state->split.avail_index;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -390,9 +458,9 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
u8 status;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
status = vdpasim->status;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return status;
}
@@ -401,19 +469,19 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->status = status;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
}
static int vdpasim_reset(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->status = 0;
vdpasim_do_reset(vdpasim);
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -422,9 +490,9 @@ static int vdpasim_suspend(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->running = false;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -434,7 +502,7 @@ static int vdpasim_resume(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int i;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->running = true;
if (vdpasim->pending_kick) {
@@ -445,7 +513,7 @@ static int vdpasim_resume(struct vdpa_device *vdpa)
vdpasim->pending_kick = false;
}
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -517,14 +585,14 @@ static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
iommu = &vdpasim->iommu[asid];
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
if (vdpasim_get_vq_group(vdpa, i) == group)
vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu,
&vdpasim->iommu_lock);
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -563,6 +631,30 @@ err:
return ret;
}
+static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_mm_work mm_work;
+
+ mm_work.vdpasim = vdpasim;
+ mm_work.mm_to_bind = mm;
+
+ vdpasim_worker_change_mm_sync(vdpasim, &mm_work);
+
+ return mm_work.ret;
+}
+
+static void vdpasim_unbind_mm(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_mm_work mm_work;
+
+ mm_work.vdpasim = vdpasim;
+ mm_work.mm_to_bind = NULL;
+
+ vdpasim_worker_change_mm_sync(vdpasim, &mm_work);
+}
+
static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
u64 iova, u64 size,
u64 pa, u32 perm, void *opaque)
@@ -610,14 +702,16 @@ static void vdpasim_free(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int i;
- cancel_work_sync(&vdpasim->work);
+ kthread_cancel_work_sync(&vdpasim->work);
+ kthread_destroy_worker(vdpasim->worker);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
}
- kvfree(vdpasim->buffer);
+ vdpasim->dev_attr.free(vdpasim);
+
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(&vdpasim->iommu[i]);
kfree(vdpasim->iommu);
@@ -658,6 +752,8 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_group_asid = vdpasim_set_group_asid,
.dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap,
+ .bind_mm = vdpasim_bind_mm,
+ .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free,
};
@@ -692,6 +788,8 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_iova_range = vdpasim_get_iova_range,
.set_group_asid = vdpasim_set_group_asid,
.set_map = vdpasim_set_map,
+ .bind_mm = vdpasim_bind_mm,
+ .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free,
};
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 144858636c10a..bb137e4797633 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -39,33 +39,34 @@ struct vdpasim_dev_attr {
u64 supported_features;
size_t alloc_size;
size_t config_size;
- size_t buffer_size;
int nvqs;
u32 id;
u32 ngroups;
u32 nas;
- work_func_t work_fn;
+ void (*work_fn)(struct vdpasim *vdpasim);
void (*get_config)(struct vdpasim *vdpasim, void *config);
void (*set_config)(struct vdpasim *vdpasim, const void *config);
int (*get_stats)(struct vdpasim *vdpasim, u16 idx,
struct sk_buff *msg,
struct netlink_ext_ack *extack);
+ void (*free)(struct vdpasim *vdpasim);
};
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue *vqs;
- struct work_struct work;
+ struct kthread_worker *worker;
+ struct kthread_work work;
+ struct mm_struct *mm_bound;
struct vdpasim_dev_attr dev_attr;
- /* spinlock to synchronize virtqueue state */
- spinlock_t lock;
+ /* mutex to synchronize virtqueue state */
+ struct mutex mutex;
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
bool *iommu_pt;
- void *buffer;
u32 status;
u32 generation;
u64 features;
@@ -78,6 +79,7 @@ struct vdpasim {
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
const struct vdpa_dev_set_config *config);
+void vdpasim_schedule_work(struct vdpasim *vdpasim);
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 5117959bed8a2..00d7d72713beb 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/blkdev.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
@@ -44,8 +43,39 @@
#define VDPASIM_BLK_AS_NUM 1
#define VDPASIM_BLK_GROUP_NUM 1
+struct vdpasim_blk {
+ struct vdpasim vdpasim;
+ void *buffer;
+ bool shared_backend;
+};
+
+static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
+{
+ return container_of(vdpasim, struct vdpasim_blk, vdpasim);
+}
+
static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
+static bool shared_backend;
+module_param(shared_backend, bool, 0444);
+MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
+
+static void *shared_buffer;
+/* mutex to synchronize shared_buffer access */
+static DEFINE_MUTEX(shared_buffer_mutex);
+
+static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
+{
+ if (blk->shared_backend)
+ mutex_lock(&shared_buffer_mutex);
+}
+
+static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
+{
+ if (blk->shared_backend)
+ mutex_unlock(&shared_buffer_mutex);
+}
+
static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
u64 num_sectors, u64 max_sectors)
{
@@ -79,6 +109,7 @@ static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
struct vdpasim_virtqueue *vq)
{
+ struct vdpasim_blk *blk = sim_to_blk(vdpasim);
size_t pushed = 0, to_pull, to_push;
struct virtio_blk_outhdr hdr;
bool handled = false;
@@ -144,9 +175,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break;
}
+ vdpasim_blk_buffer_lock(blk);
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
- vdpasim->buffer + offset,
- to_push);
+ blk->buffer + offset, to_push);
+ vdpasim_blk_buffer_unlock(blk);
if (bytes < 0) {
dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
@@ -166,9 +198,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break;
}
+ vdpasim_blk_buffer_lock(blk);
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
- vdpasim->buffer + offset,
- to_pull);
+ blk->buffer + offset, to_pull);
+ vdpasim_blk_buffer_unlock(blk);
if (bytes < 0) {
dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
@@ -248,8 +281,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
}
if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
- memset(vdpasim->buffer + offset, 0,
+ vdpasim_blk_buffer_lock(blk);
+ memset(blk->buffer + offset, 0,
num_sectors << SECTOR_SHIFT);
+ vdpasim_blk_buffer_unlock(blk);
}
break;
@@ -286,13 +321,12 @@ err:
return handled;
}
-static void vdpasim_blk_work(struct work_struct *work)
+static void vdpasim_blk_work(struct vdpasim *vdpasim)
{
- struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
bool reschedule = false;
int i;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
@@ -323,10 +357,10 @@ static void vdpasim_blk_work(struct work_struct *work)
}
}
out:
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
if (reschedule)
- schedule_work(&vdpasim->work);
+ vdpasim_schedule_work(vdpasim);
}
static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
@@ -355,6 +389,14 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
}
+static void vdpasim_blk_free(struct vdpasim *vdpasim)
+{
+ struct vdpasim_blk *blk = sim_to_blk(vdpasim);
+
+ if (!blk->shared_backend)
+ kvfree(blk->buffer);
+}
+
static void vdpasim_blk_mgmtdev_release(struct device *dev)
{
}
@@ -368,6 +410,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vdpasim_dev_attr dev_attr = {};
+ struct vdpasim_blk *blk;
struct vdpasim *simdev;
int ret;
@@ -378,16 +421,30 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
dev_attr.nas = VDPASIM_BLK_AS_NUM;
- dev_attr.alloc_size = sizeof(struct vdpasim);
+ dev_attr.alloc_size = sizeof(struct vdpasim_blk);
dev_attr.config_size = sizeof(struct virtio_blk_config);
dev_attr.get_config = vdpasim_blk_get_config;
dev_attr.work_fn = vdpasim_blk_work;
- dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
+ dev_attr.free = vdpasim_blk_free;
simdev = vdpasim_create(&dev_attr, config);
if (IS_ERR(simdev))
return PTR_ERR(simdev);
+ blk = sim_to_blk(simdev);
+ blk->shared_backend = shared_backend;
+
+ if (blk->shared_backend) {
+ blk->buffer = shared_buffer;
+ } else {
+ blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ GFP_KERNEL);
+ if (!blk->buffer) {
+ ret = -ENOMEM;
+ goto put_dev;
+ }
+ }
+
ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
if (ret)
goto put_dev;
@@ -437,6 +494,15 @@ static int __init vdpasim_blk_init(void)
if (ret)
goto parent_err;
+ if (shared_backend) {
+ shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ GFP_KERNEL);
+ if (!shared_buffer) {
+ ret = -ENOMEM;
+ goto parent_err;
+ }
+ }
+
return 0;
parent_err:
@@ -446,6 +512,7 @@ parent_err:
static void __exit vdpasim_blk_exit(void)
{
+ kvfree(shared_buffer);
vdpa_mgmtdev_unregister(&mgmt_dev);
device_unregister(&vdpasim_blk_mgmtdev);
}
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index dfe2ce3418035..cfe9629118045 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
@@ -59,6 +58,7 @@ struct vdpasim_net{
struct vdpasim_dataq_stats tx_stats;
struct vdpasim_dataq_stats rx_stats;
struct vdpasim_cq_stats cq_stats;
+ void *buffer;
};
static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
@@ -88,14 +88,15 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len)
size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
sizeof(struct virtio_net_hdr);
struct virtio_net_config *vio_config = vdpasim->config;
+ struct vdpasim_net *net = sim_to_net(vdpasim);
if (len < ETH_ALEN + hdr_len)
return false;
- if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) ||
- is_multicast_ether_addr(vdpasim->buffer + hdr_len))
+ if (is_broadcast_ether_addr(net->buffer + hdr_len) ||
+ is_multicast_ether_addr(net->buffer + hdr_len))
return true;
- if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN))
+ if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN))
return true;
return false;
@@ -192,9 +193,8 @@ static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
u64_stats_update_end(&net->cq_stats.syncp);
}
-static void vdpasim_net_work(struct work_struct *work)
+static void vdpasim_net_work(struct vdpasim *vdpasim)
{
- struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
struct vdpasim_net *net = sim_to_net(vdpasim);
@@ -203,7 +203,7 @@ static void vdpasim_net_work(struct work_struct *work)
u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
int err;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
if (!vdpasim->running)
goto out;
@@ -227,8 +227,7 @@ static void vdpasim_net_work(struct work_struct *work)
++tx_pkts;
read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
- vdpasim->buffer,
- PAGE_SIZE);
+ net->buffer, PAGE_SIZE);
tx_bytes += read;
@@ -247,7 +246,7 @@ static void vdpasim_net_work(struct work_struct *work)
}
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
- vdpasim->buffer, read);
+ net->buffer, read);
if (write <= 0) {
++rx_errors;
break;
@@ -260,13 +259,13 @@ static void vdpasim_net_work(struct work_struct *work)
vdpasim_net_complete(rxq, write);
if (tx_pkts > 4) {
- schedule_work(&vdpasim->work);
+ vdpasim_schedule_work(vdpasim);
goto out;
}
}
out:
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
u64_stats_update_begin(&net->tx_stats.syncp);
net->tx_stats.pkts += tx_pkts;
@@ -429,6 +428,13 @@ static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
}
+static void vdpasim_net_free(struct vdpasim *vdpasim)
+{
+ struct vdpasim_net *net = sim_to_net(vdpasim);
+
+ kvfree(net->buffer);
+}
+
static void vdpasim_net_mgmtdev_release(struct device *dev)
{
}
@@ -458,7 +464,7 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.get_config = vdpasim_net_get_config;
dev_attr.work_fn = vdpasim_net_work;
dev_attr.get_stats = vdpasim_net_get_stats;
- dev_attr.buffer_size = PAGE_SIZE;
+ dev_attr.free = vdpasim_net_free;
simdev = vdpasim_create(&dev_attr, config);
if (IS_ERR(simdev))
@@ -472,6 +478,12 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
u64_stats_init(&net->rx_stats.syncp);
u64_stats_init(&net->cq_stats.syncp);
+ net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!net->buffer) {
+ ret = -ENOMEM;
+ goto reg_err;
+ }
+
/*
* Initialization must be completed before this call, since it can
* connect the device to the vDPA bus, so requests can arrive after
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index c421b83f6fa2f..de97e38c3b82f 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -37,10 +37,15 @@
#define DRV_LICENSE "GPL v2"
#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
+#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
-#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+/* 128 MB reserved for virtqueue creation */
+#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+#define IRQ_UNBOUND -1
+
struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +62,9 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
+ struct cpumask irq_affinity;
+ struct kobject kobj;
};
struct vduse_dev;
@@ -76,7 +84,7 @@ struct vduse_umem {
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
- struct vduse_virtqueue *vqs;
+ struct vduse_virtqueue **vqs;
struct vduse_iova_domain *domain;
char *name;
struct mutex lock;
@@ -106,6 +114,8 @@ struct vduse_dev {
u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
+ unsigned int bounce_size;
+ struct mutex domain_lock;
};
struct vduse_dev_msg {
@@ -128,6 +138,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -419,7 +430,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
struct vduse_iova_domain *domain = dev->domain;
/* The coherent mappings are handled in vduse_dev_free_coherent() */
- if (domain->bounce_map)
+ if (domain && domain->bounce_map)
vduse_domain_reset_bounce_map(domain);
down_write(&dev->rwsem);
@@ -434,7 +445,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
flush_work(&dev->inject);
for (i = 0; i < dev->vq_num; i++) {
- struct vduse_virtqueue *vq = &dev->vqs[i];
+ struct vduse_virtqueue *vq = dev->vqs[i];
vq->ready = false;
vq->desc_addr = 0;
@@ -453,6 +464,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
spin_lock(&vq->irq_lock);
vq->cb.callback = NULL;
vq->cb.private = NULL;
+ vq->cb.trigger = NULL;
spin_unlock(&vq->irq_lock);
flush_work(&vq->inject);
flush_work(&vq->kick);
@@ -466,7 +478,7 @@ static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 device_area)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->desc_addr = desc_area;
vq->driver_addr = driver_area;
@@ -500,7 +512,7 @@ static void vduse_vq_kick_work(struct work_struct *work)
static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (!eventfd_signal_allowed()) {
schedule_work(&vq->kick);
@@ -513,18 +525,19 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
struct vdpa_callback *cb)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
spin_lock(&vq->irq_lock);
vq->cb.callback = cb->callback;
vq->cb.private = cb->private;
+ vq->cb.trigger = cb->trigger;
spin_unlock(&vq->irq_lock);
}
static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->num = num;
}
@@ -533,7 +546,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
u16 idx, bool ready)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->ready = ready;
}
@@ -541,7 +554,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
return vq->ready;
}
@@ -550,7 +563,7 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
const struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
vq->state.packed.last_avail_counter =
@@ -569,7 +582,7 @@ static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
@@ -624,8 +637,8 @@ static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (num_max < dev->vqs[i].num_max)
- num_max = dev->vqs[i].num_max;
+ if (num_max < dev->vqs[i]->num_max)
+ num_max = dev->vqs[i]->num_max;
return num_max;
}
@@ -708,6 +721,23 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
return dev->generation;
}
+static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
+ const struct cpumask *cpu_mask)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+ return 0;
+}
+
+static const struct cpumask *
+vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return &dev->vqs[idx]->irq_affinity;
+}
+
static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +788,8 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_vq_affinity = vduse_vdpa_set_vq_affinity,
+ .get_vq_affinity = vduse_vdpa_get_vq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
.free = vduse_vdpa_free,
@@ -863,7 +895,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
return -EINVAL;
index = array_index_nospec(eventfd->index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
if (eventfd->fd >= 0) {
ctx = eventfd_ctx_fdget(eventfd->fd);
if (IS_ERR(ctx))
@@ -889,7 +921,7 @@ static bool vduse_dev_is_ready(struct vduse_dev *dev)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (!dev->vqs[i].num_max)
+ if (!dev->vqs[i]->num_max)
return false;
return true;
@@ -916,8 +948,26 @@ static void vduse_vq_irq_inject(struct work_struct *work)
spin_unlock_irq(&vq->irq_lock);
}
+static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
+{
+ bool signal = false;
+
+ if (!vq->cb.trigger)
+ return false;
+
+ spin_lock_irq(&vq->irq_lock);
+ if (vq->ready && vq->cb.trigger) {
+ eventfd_signal(vq->cb.trigger, 1);
+ signal = true;
+ }
+ spin_unlock_irq(&vq->irq_lock);
+
+ return signal;
+}
+
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +976,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == IRQ_UNBOUND)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -944,6 +998,9 @@ static int vduse_dev_dereg_umem(struct vduse_dev *dev,
goto unlock;
ret = -EINVAL;
+ if (!dev->domain)
+ goto unlock;
+
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
@@ -970,7 +1027,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
unsigned long npages, lock_limit;
int ret;
- if (!dev->domain->bounce_map ||
+ if (!dev->domain || !dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
@@ -1029,6 +1086,22 @@ unlock:
return ret;
}
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+ int curr_cpu = vq->irq_effective_cpu;
+
+ while (true) {
+ curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+ if (cpu_online(curr_cpu))
+ break;
+
+ if (curr_cpu >= nr_cpu_ids)
+ curr_cpu = IRQ_UNBOUND;
+ }
+
+ vq->irq_effective_cpu = curr_cpu;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1044,7 +1117,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
struct vduse_iotlb_entry entry;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
- struct vduse_iova_domain *domain = dev->domain;
struct file *f = NULL;
ret = -EFAULT;
@@ -1055,8 +1127,13 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (entry.start > entry.last)
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
entry.start, entry.last);
if (map) {
map_file = (struct vdpa_map_file *)map->opaque;
@@ -1066,7 +1143,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
entry.last = map->last;
entry.perm = map->perm;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
ret = -EINVAL;
if (!f)
break;
@@ -1111,7 +1189,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1130,7 +1208,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(config.index, dev->vq_num);
- dev->vqs[index].num_max = config.max_size;
+ dev->vqs[index]->num_max = config.max_size;
ret = 0;
break;
}
@@ -1148,7 +1226,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(vq_info.index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
vq_info.desc_addr = vq->desc_addr;
vq_info.driver_addr = vq->driver_addr;
vq_info.device_addr = vq->device_addr;
@@ -1197,8 +1275,14 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (index >= dev->vq_num)
break;
+ ret = 0;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
+ if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev,
+ &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
+ }
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1213,8 +1297,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(umem.reserved)))
break;
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
@@ -1228,15 +1314,15 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
-
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
- struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
@@ -1250,18 +1336,24 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(info.reserved)))
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
- if (domain->bounce_map && map->start == 0 &&
- map->last == domain->bounce_size - 1)
+ if (dev->domain->bounce_map && map->start == 0 &&
+ map->last == dev->domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
if (!map)
break;
@@ -1284,7 +1376,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;
- vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_unlock(&dev->domain_lock);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1339,6 +1434,151 @@ static const struct file_operations vduse_dev_fops = {
.llseek = noop_llseek,
};
+static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
+{
+ return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
+}
+
+static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
+ const char *buf, size_t count)
+{
+ cpumask_var_t new_value;
+ int ret;
+
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, new_value);
+ if (ret)
+ goto free_mask;
+
+ ret = -EINVAL;
+ if (!cpumask_intersects(new_value, cpu_online_mask))
+ goto free_mask;
+
+ cpumask_copy(&vq->irq_affinity, new_value);
+ ret = count;
+free_mask:
+ free_cpumask_var(new_value);
+ return ret;
+}
+
+struct vq_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
+ ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
+ size_t count);
+};
+
+static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
+
+static struct attribute *vq_attrs[] = {
+ &irq_cb_affinity_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vq);
+
+static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(vq, buf);
+}
+
+static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(vq, buf, count);
+}
+
+static const struct sysfs_ops vq_sysfs_ops = {
+ .show = vq_attr_show,
+ .store = vq_attr_store,
+};
+
+static void vq_release(struct kobject *kobj)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ kfree(vq);
+}
+
+static const struct kobj_type vq_type = {
+ .release = vq_release,
+ .sysfs_ops = &vq_sysfs_ops,
+ .default_groups = vq_groups,
+};
+
+static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
+{
+ int i;
+
+ if (!dev->vqs)
+ return;
+
+ for (i = 0; i < dev->vq_num; i++)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+}
+
+static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
+{
+ int ret, i;
+
+ dev->vq_align = vq_align;
+ dev->vq_num = vq_num;
+ dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+ if (!dev->vqs)
+ return -ENOMEM;
+
+ for (i = 0; i < vq_num; i++) {
+ dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
+ if (!dev->vqs[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
+ INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
+ INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
+ spin_lock_init(&dev->vqs[i]->kick_lock);
+ spin_lock_init(&dev->vqs[i]->irq_lock);
+ cpumask_setall(&dev->vqs[i]->irq_affinity);
+
+ kobject_init(&dev->vqs[i]->kobj, &vq_type);
+ ret = kobject_add(&dev->vqs[i]->kobj,
+ &dev->dev->kobj, "vq%d", i);
+ if (ret) {
+ kfree(dev->vqs[i]);
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ while (i--)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+ dev->vqs = NULL;
+ return ret;
+}
+
static struct vduse_dev *vduse_dev_create(void)
{
struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1348,6 +1588,7 @@ static struct vduse_dev *vduse_dev_create(void)
mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
+ mutex_init(&dev->domain_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
@@ -1396,8 +1637,9 @@ static int vduse_destroy_dev(char *name)
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config);
- kfree(dev->vqs);
- vduse_domain_destroy(dev->domain);
+ vduse_dev_deinit_vqs(dev);
+ if (dev->domain)
+ vduse_domain_destroy(dev->domain);
kfree(dev->name);
vduse_dev_destroy(dev);
module_put(THIS_MODULE);
@@ -1476,8 +1718,48 @@ static ssize_t msg_timeout_store(struct device *device,
static DEVICE_ATTR_RW(msg_timeout);
+static ssize_t bounce_size_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+
+ return sysfs_emit(buf, "%u\n", dev->bounce_size);
+}
+
+static ssize_t bounce_size_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+ unsigned int bounce_size;
+ int ret;
+
+ ret = -EPERM;
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ goto unlock;
+
+ ret = kstrtouint(buf, 10, &bounce_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = -EINVAL;
+ if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
+ bounce_size < VDUSE_MIN_BOUNCE_SIZE)
+ goto unlock;
+
+ dev->bounce_size = bounce_size & PAGE_MASK;
+ ret = count;
+unlock:
+ mutex_unlock(&dev->domain_lock);
+ return ret;
+}
+
+static DEVICE_ATTR_RW(bounce_size);
+
static struct attribute *vduse_dev_attrs[] = {
&dev_attr_msg_timeout.attr,
+ &dev_attr_bounce_size.attr,
NULL
};
@@ -1486,7 +1768,7 @@ ATTRIBUTE_GROUPS(vduse_dev);
static int vduse_create_dev(struct vduse_dev_config *config,
void *config_buf, u64 api_version)
{
- int i, ret;
+ int ret;
struct vduse_dev *dev;
ret = -EEXIST;
@@ -1506,26 +1788,9 @@ static int vduse_create_dev(struct vduse_dev_config *config,
if (!dev->name)
goto err_str;
- dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
- VDUSE_BOUNCE_SIZE);
- if (!dev->domain)
- goto err_domain;
-
+ dev->bounce_size = VDUSE_BOUNCE_SIZE;
dev->config = config_buf;
dev->config_size = config->config_size;
- dev->vq_align = config->vq_align;
- dev->vq_num = config->vq_num;
- dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
- if (!dev->vqs)
- goto err_vqs;
-
- for (i = 0; i < dev->vq_num; i++) {
- dev->vqs[i].index = i;
- INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
- INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
- spin_lock_init(&dev->vqs[i].kick_lock);
- spin_lock_init(&dev->vqs[i].irq_lock);
- }
ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
if (ret < 0)
@@ -1540,16 +1805,19 @@ static int vduse_create_dev(struct vduse_dev_config *config,
ret = PTR_ERR(dev->dev);
goto err_dev;
}
+
+ ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
+ if (ret)
+ goto err_vqs;
+
__module_get(THIS_MODULE);
return 0;
+err_vqs:
+ device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- kfree(dev->vqs);
-err_vqs:
- vduse_domain_destroy(dev->domain);
-err_domain:
kfree(dev->name);
err_str:
vduse_dev_destroy(dev);
@@ -1716,9 +1984,23 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (ret)
return ret;
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain)
+ dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+ dev->bounce_size);
+ mutex_unlock(&dev->domain_lock);
+ if (!dev->domain) {
+ put_device(&dev->vdev->vdpa.dev);
+ return -ENOMEM;
+ }
+
ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
if (ret) {
put_device(&dev->vdev->vdpa.dev);
+ mutex_lock(&dev->domain_lock);
+ vduse_domain_destroy(dev->domain);
+ dev->domain = NULL;
+ mutex_unlock(&dev->domain_lock);
return ret;
}
@@ -1824,12 +2106,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1843,6 +2128,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1862,6 +2149,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);