aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThéo Lebrun <theo.lebrun@bootlin.com>2024-04-23 12:01:41 +0200
committerMark Brown <broonie@kernel.org>2024-04-26 11:09:23 +0900
commit1f257b92e6330d576cc826fb8f0b74fe0e8209de (patch)
tree5285bf6ec2549d61ccefdca5f8ce491d45ce2ce9
parent3bf64a2b66edffd28614b004648ccd60e3139c9e (diff)
downloadspi-1f257b92e6330d576cc826fb8f0b74fe0e8209de.tar.gz
spi: cadence-qspi: add no-IRQ mode to indirect reads
Support reads through polling, without any IRQ. The main reason is performance; profiling shows that the first IRQ comes quickly on our specific hardware. Once this IRQ arrives, we poll until all data is retrieved. Avoid initial sleep to reduce IRQ count. Hide this behavior behind a quirk flag. This is confirmed through micro-benchmarks, but also end-to-end performance tests. Mobileye EyeQ5, octal flash, reading 235M on a UBIFS filesystem: - No optimizations, ~10.34s, ~22.7 MB/s, 199230 IRQs - CQSPI_SLOW_SRAM, ~10.34s, ~22.7 MB/s, 70284 IRQs - CQSPI_RD_NO_IRQ, ~9.37s, ~25.1 MB/s, 521 IRQs Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com> Link: https://lore.kernel.org/r/20240423-cdns-qspi-mbly-v4-2-3d2a7b535ad0@bootlin.com Signed-off-by: Mark Brown <broonie@kernel.org>
-rw-r--r--drivers/spi/spi-cadence-quadspi.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 5a83940220a91..a82e23526a6f0 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -42,6 +42,7 @@ static_assert(CQSPI_MAX_CHIPSELECT <= SPI_CS_CNT_MAX);
#define CQSPI_NO_SUPPORT_WR_COMPLETION BIT(3)
#define CQSPI_SLOW_SRAM BIT(4)
#define CQSPI_NEEDS_APB_AHB_HAZARD_WAR BIT(5)
+#define CQSPI_RD_NO_IRQ BIT(6)
/* Capabilities */
#define CQSPI_SUPPORTS_OCTAL BIT(0)
@@ -702,6 +703,7 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
const size_t n_rx)
{
struct cqspi_st *cqspi = f_pdata->cqspi;
+ bool use_irq = !(cqspi->ddata && cqspi->ddata->quirks & CQSPI_RD_NO_IRQ);
struct device *dev = &cqspi->pdev->dev;
void __iomem *reg_base = cqspi->iobase;
void __iomem *ahb_base = cqspi->ahb_base;
@@ -725,17 +727,20 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
* all the read interrupts disabled for max performance.
*/
- if (!cqspi->slow_sram)
+ if (use_irq && cqspi->slow_sram)
+ writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
+ else if (use_irq)
writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK);
else
- writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
+ writel(0, reg_base + CQSPI_REG_IRQMASK);
reinit_completion(&cqspi->transfer_complete);
writel(CQSPI_REG_INDIRECTRD_START_MASK,
reg_base + CQSPI_REG_INDIRECTRD);
while (remaining > 0) {
- if (!wait_for_completion_timeout(&cqspi->transfer_complete,
+ if (use_irq &&
+ !wait_for_completion_timeout(&cqspi->transfer_complete,
msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS)))
ret = -ETIMEDOUT;
@@ -777,7 +782,7 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
}
- if (remaining > 0) {
+ if (use_irq && remaining > 0) {
reinit_completion(&cqspi->transfer_complete);
if (cqspi->slow_sram)
writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);