aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/accel
diff options
context:
space:
mode:
authorCarl Vanderlip <quic_carlv@quicinc.com>2023-10-16 11:00:36 -0600
committerJeffrey Hugo <quic_jhugo@quicinc.com>2023-10-27 09:39:39 -0600
commitbb8e97e26ce6437d2f57f37e8ba767a2b9cf0d65 (patch)
tree07adcb149ad7ecad7f61e218d17ac4b79dccfa75 /drivers/accel
parent88b02ebca8b6ea7457bed6809b1dd575420b7544 (diff)
downloadlinux-bb8e97e26ce6437d2f57f37e8ba767a2b9cf0d65.tar.gz
accel/qaic: Enable 1 MSI fallback mode
Several virtualization use-cases either don't support 32 MultiMSIs (Xen/VMware) or have significant drawbacks to their use (KVM's vIOMMU, which is required to support 32 MSI, needs to allocate an alternate system memory space for each device using vIOMMU (e.g. 8GB VM mem and 2 cards => 8 + 2 * 8 = 24GB host memory required)). Support these cases by enabling a 1 MSI fallback mode. Whenever all 32 MSIs requested are not available, a second request for a single MSI is made. Its success is the initiator of single MSI mode. This mode causes all interrupts generated by the device to be directed to the 0th MSI (firmware >=v1.10 will do this as a response to the PCIe MSI capability configuration). Likewise, all interrupt handlers for the device are registered to the 0th MSI. Since the DBC interrupt handler checks if the DBC is in use or if there is any pending changes, the 'spurious' interrupts are disregarded. If there is work to be done, the standard threaded IRQ handler is dispatched. On every interrupt, the MHI handler wakes up its threaded interrupt handler, and attempts to wake any waiters for MHI state events. Performance is within +-0.6% for test cases that typify real world use. Larger differences ([-4,+132]%, avg +47%) exist for very simple tasks (e.g. addition) compiled for single NSPs. It is assumed that the small work and many interrupts typically cause contention (e.g. 16 NSPs vs 4 CPUs), as evidenced by the standard deviation between runs also decreasing (r=-0.48 between delta(Performace_test) and delta(StdDev_test/Avg_test)) Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231016170036.5409-1-quic_jhugo@quicinc.com
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/qaic/mhi_controller.c6
-rw-r--r--drivers/accel/qaic/mhi_controller.h2
-rw-r--r--drivers/accel/qaic/qaic.h2
-rw-r--r--drivers/accel/qaic/qaic_data.c25
-rw-r--r--drivers/accel/qaic/qaic_drv.c35
5 files changed, 50 insertions, 20 deletions
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 5036e58e7235b..41be3626587dc 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -468,7 +468,7 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
}
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq)
+ int mhi_irq, bool shared_msi)
{
struct mhi_controller *mhi_cntrl;
int ret;
@@ -500,6 +500,10 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
return ERR_PTR(-ENOMEM);
mhi_cntrl->irq[0] = mhi_irq;
+
+ if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */
+ mhi_cntrl->irq_flags = IRQF_SHARED;
+
mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
/* use latest configured timeout */
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
index 2ae45d768e247..500e7f4af2afe 100644
--- a/drivers/accel/qaic/mhi_controller.h
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -8,7 +8,7 @@
#define MHICONTROLLERQAIC_H_
struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
- int mhi_irq);
+ int mhi_irq, bool shared_msi);
void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index e3f4c30f3ffd2..01e9dda0cc378 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -123,6 +123,8 @@ struct qaic_device {
struct srcu_struct dev_lock;
/* true: Device under reset; false: Device not under reset */
bool in_reset;
+ /* true: single MSI is used to operate device */
+ bool single_msi;
/*
* true: A tx MHI transaction has failed and a rx buffer is still queued
* in control device. Such a buffer is considered lost rx buffer
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 4a8e43a7a6a40..ebc3cca1b0947 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -1466,6 +1466,16 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (datapath_polling) {
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ /*
+ * Normally datapath_polling will not have irqs enabled, but
+ * when running with only one MSI the interrupt is shared with
+ * MHI so it cannot be disabled. Return ASAP instead.
+ */
+ return IRQ_HANDLED;
+ }
+
if (!dbc->usr) {
srcu_read_unlock(&dbc->ch_lock, rcu_id);
return IRQ_HANDLED;
@@ -1488,7 +1498,8 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
return IRQ_NONE;
}
- disable_irq_nosync(irq);
+ if (!dbc->qdev->single_msi)
+ disable_irq_nosync(irq);
srcu_read_unlock(&dbc->ch_lock, rcu_id);
return IRQ_WAKE_THREAD;
}
@@ -1559,12 +1570,12 @@ irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
u32 tail;
rcu_id = srcu_read_lock(&dbc->ch_lock);
+ qdev = dbc->qdev;
head = readl(dbc->dbc_base + RSPHP_OFF);
if (head == U32_MAX) /* PCI link error */
goto error_out;
- qdev = dbc->qdev;
read_fifo:
if (!event_count) {
@@ -1645,14 +1656,14 @@ read_fifo:
goto read_fifo;
normal_out:
- if (likely(!datapath_polling))
+ if (!qdev->single_msi && likely(!datapath_polling))
enable_irq(irq);
- else
+ else if (unlikely(datapath_polling))
schedule_work(&dbc->poll_work);
/* checking the fifo and enabling irqs is a race, missed event check */
tail = readl(dbc->dbc_base + RSPTP_OFF);
if (tail != U32_MAX && head != tail) {
- if (likely(!datapath_polling))
+ if (!qdev->single_msi && likely(!datapath_polling))
disable_irq_nosync(irq);
goto read_fifo;
}
@@ -1661,9 +1672,9 @@ normal_out:
error_out:
srcu_read_unlock(&dbc->ch_lock, rcu_id);
- if (likely(!datapath_polling))
+ if (!qdev->single_msi && likely(!datapath_polling))
enable_irq(irq);
- else
+ else if (unlikely(datapath_polling))
schedule_work(&dbc->poll_work);
return IRQ_HANDLED;
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 6f58095767df6..5eba5f03d3bf7 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -424,14 +424,24 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
int i;
/* Managed release since we use pcim_enable_device */
- ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
- if (ret < 0)
- return ret;
+ ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
+ if (ret == -ENOSPC) {
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ if (ret < 0)
+ return ret;
- if (ret < 32) {
- pci_err(pdev, "%s: Requested 32 MSIs. Obtained %d MSIs which is less than the 32 required.\n",
- __func__, ret);
- return -ENODEV;
+ /*
+ * Operate in one MSI mode. All interrupts will be directed to
+ * MSI0; every interrupt will wake up all the interrupt handlers
+ * (MHI and DBC[0-15]). Since the interrupt is now shared, it is
+ * not disabled during DBC threaded handler, but only one thread
+ * will be allowed to run per DBC, so while it can be
+ * interrupted, it shouldn't race with itself.
+ */
+ qdev->single_msi = true;
+ pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
+ } else if (ret < 0) {
+ return ret;
}
mhi_irq = pci_irq_vector(pdev, 0);
@@ -439,15 +449,17 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
return mhi_irq;
for (i = 0; i < qdev->num_dbc; ++i) {
- ret = devm_request_threaded_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
+ ret = devm_request_threaded_irq(&pdev->dev,
+ pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1),
dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED,
"qaic_dbc", &qdev->dbc[i]);
if (ret)
return ret;
if (datapath_polling) {
- qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1);
- disable_irq_nosync(qdev->dbc[i].irq);
+ qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
+ if (!qdev->single_msi)
+ disable_irq_nosync(qdev->dbc[i].irq);
INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
}
}
@@ -479,7 +491,8 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto cleanup_qdev;
}
- qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq);
+ qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
+ qdev->single_msi);
if (IS_ERR(qdev->mhi_cntrl)) {
ret = PTR_ERR(qdev->mhi_cntrl);
goto cleanup_qdev;