ChangeSet 1.924.3.6, 2002/11/30 00:08:43-08:00, david-b@pacbell.net [PATCH] ehci-hcd, handle async_next register correctly This patch should improve behavior of the EHCI driver, particularly on VIA hardware. - A more careful reading of the EHCI spec turns up requirements not to change this register's value while the async schedule is enabled. That means in effect that it must never point to a QH that'd get unlinked ... driver now uses a dedicated QH. - Disables async schedule a bit faster: after 50msec idle, not 330msec idle. - Streamline the "can't init memory" failure path. - Start to use the dev_dbg()/dev_info()/... macros in more places. This version acts a bunch happier than the previous version, removing some failure modes I could never quite convince myself were hardware (they weren't!) I suspect it'll remove a lot of the "it hangs" failures that some folk have reported (mostly on 2.4 though). diff -Nru a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c --- a/drivers/usb/host/ehci-dbg.c Sun Dec 1 23:06:10 2002 +++ b/drivers/usb/host/ehci-dbg.c Sun Dec 1 23:06:10 2002 @@ -18,6 +18,17 @@ /* this file is part of ehci-hcd.c */ +#define ehci_dbg(ehci, fmt, args...) \ + dev_dbg (*(ehci)->hcd.controller, fmt, ## args ) + +#ifdef EHCI_VERBOSE_DEBUG +#define ehci_vdbg(ehci, fmt, args...) \ + dev_dbg (*(ehci)->hcd.controller, fmt, ## args ) +#else +#define ehci_vdbg(ehci, fmt, args...) do { } while (0) +#endif + + #ifdef EHCI_VERBOSE_DEBUG # define vdbg dbg #else @@ -338,12 +349,8 @@ * one QH per line, and TDs we know about */ spin_lock_irqsave (&ehci->lock, flags); - if (ehci->async) { - qh = ehci->async; - do { - qh_lines (qh, &next, &size); - } while ((qh = qh->qh_next.qh) != ehci->async); - } + for (qh = ehci->async->qh_next.qh; qh; qh = qh->qh_next.qh) + qh_lines (qh, &next, &size); if (ehci->reclaim) { temp = snprintf (next, size, "\nreclaim =\n"); size -= temp; diff -Nru a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c --- a/drivers/usb/host/ehci-hcd.c Sun Dec 1 23:06:10 2002 +++ b/drivers/usb/host/ehci-hcd.c Sun Dec 1 23:06:10 2002 @@ -17,6 +17,13 @@ */ #include + +#ifdef CONFIG_USB_DEBUG + #define DEBUG +#else + #undef DEBUG +#endif + #include #include #include @@ -31,12 +38,6 @@ #include #include -#ifdef CONFIG_USB_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include @@ -70,6 +71,7 @@ * * HISTORY: * + * 2002-11-29 Correct handling for hw async_next register. * 2002-08-06 Handling for bulk and interrupt transfers is mostly shared; * only scheduling is different, no arbitrary limitations. * 2002-07-25 Sanity check PCI reads, mostly for better cardbus support, @@ -92,7 +94,7 @@ * 2001-June Works with usb-storage and NEC EHCI on 2.4 */ -#define DRIVER_VERSION "2002-Sep-23" +#define DRIVER_VERSION "2002-Nov-29" #define DRIVER_AUTHOR "David Brownell" #define DRIVER_DESC "USB 2.0 'Enhanced' Host Controller (EHCI) Driver" @@ -114,7 +116,7 @@ #define EHCI_TUNE_MULT_TT 1 #define EHCI_WATCHDOG_JIFFIES (HZ/100) /* arbitrary; ~10 msec */ -#define EHCI_ASYNC_JIFFIES (HZ/3) /* async idle timeout */ +#define EHCI_ASYNC_JIFFIES (HZ/20) /* async idle timeout */ /* Initial IRQ latency: lower than default */ static int log2_irq_thresh = 0; // 0 to 6 @@ -215,7 +217,7 @@ /* wait for any schedule enables/disables to take effect */ temp = 0; - if (ehci->async) + if (ehci->async->qh_next.qh) temp = STS_ASS; if (ehci->next_uframe != -1) temp |= STS_PSS; @@ -360,7 +362,6 @@ else // N microframes cached ehci->i_thresh = 2 + HCC_ISOC_THRES (hcc_params); - ehci->async = 0; ehci->reclaim = 0; ehci->next_uframe = -1; @@ -373,6 +374,21 @@ } writel (INTR_MASK, &ehci->regs->intr_enable); writel (ehci->periodic_dma, &ehci->regs->frame_list); + + /* + * dedicate a qh for the async ring head, since we couldn't unlink + * a 'real' qh without stopping the async schedule [4.8]. use it + * as the 'reclamation list head' too. + */ + ehci->async->qh_next.qh = 0; + ehci->async->hw_next = QH_NEXT (ehci->async->qh_dma); + ehci->async->hw_info1 = cpu_to_le32 (QH_HEAD); + ehci->async->hw_token = cpu_to_le32 (QTD_STS_HALT); + ehci->async->hw_qtd_next = EHCI_LIST_END; + ehci->async->qh_state = QH_STATE_LINKED; + ehci_qtd_free (ehci, ehci->async->dummy); + ehci->async->dummy = 0; + writel ((u32)ehci->async->qh_dma, &ehci->regs->async_next); /* * hcc_params controls whether ehci->regs->segment must (!!!) diff -Nru a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c --- a/drivers/usb/host/ehci-mem.c Sun Dec 1 23:06:10 2002 +++ b/drivers/usb/host/ehci-mem.c Sun Dec 1 23:06:10 2002 @@ -142,6 +142,10 @@ static void ehci_mem_cleanup (struct ehci_hcd *ehci) { + if (ehci->async) + qh_put (ehci, ehci->async); + ehci->async = 0; + /* PCI consistent memory and pools */ if (ehci->qtd_pool) pci_pool_destroy (ehci->qtd_pool); @@ -183,20 +187,20 @@ 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); if (!ehci->qtd_pool) { - dbg ("no qtd pool"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; } - /* QH for control/bulk/intr transfers */ + /* QHs for control/bulk/intr transfers */ ehci->qh_pool = pci_pool_create ("ehci_qh", ehci->hcd.pdev, sizeof (struct ehci_qh), 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); if (!ehci->qh_pool) { - dbg ("no qh pool"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; + } + ehci->async = ehci_qh_alloc (ehci, flags); + if (!ehci->async) { + goto fail; } /* ITD for high speed ISO transfers */ @@ -205,9 +209,7 @@ 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); if (!ehci->itd_pool) { - dbg ("no itd pool"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; } /* SITD for full/low speed split ISO transfers */ @@ -216,9 +218,7 @@ 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); if (!ehci->sitd_pool) { - dbg ("no sitd pool"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; } /* Hardware periodic table */ @@ -227,9 +227,7 @@ ehci->periodic_size * sizeof (u32), &ehci->periodic_dma); if (ehci->periodic == 0) { - dbg ("no hw periodic table"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; } for (i = 0; i < ehci->periodic_size; i++) ehci->periodic [i] = EHCI_LIST_END; @@ -237,11 +235,14 @@ /* software shadow of hardware table */ ehci->pshadow = kmalloc (ehci->periodic_size * sizeof (void *), flags); if (ehci->pshadow == 0) { - dbg ("no shadow periodic table"); - ehci_mem_cleanup (ehci); - return -ENOMEM; + goto fail; } memset (ehci->pshadow, 0, ehci->periodic_size * sizeof (void *)); return 0; + +fail: + ehci_dbg (ehci, "couldn't init memory\n"); + ehci_mem_cleanup (ehci); + return -ENOMEM; } diff -Nru a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c --- a/drivers/usb/host/ehci-q.c Sun Dec 1 23:06:10 2002 +++ b/drivers/usb/host/ehci-q.c Sun Dec 1 23:06:10 2002 @@ -678,33 +678,33 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh) { u32 dma = QH_NEXT (qh->qh_dma); - struct ehci_qh *q; + struct ehci_qh *head; - if (unlikely (!(q = ehci->async))) { + /* (re)start the async schedule? */ + head = ehci->async; + if (ehci->async_idle) + del_timer (&ehci->watchdog); + else if (!head->qh_next.qh) { u32 cmd = readl (&ehci->regs->command); - /* in case a clear of CMD_ASE didn't take yet */ - (void) handshake (&ehci->regs->status, STS_ASS, 0, 150); - - qh->hw_info1 |= __constant_cpu_to_le32 (QH_HEAD); /* [4.8] */ - qh->qh_next.qh = qh; - qh->hw_next = dma; - wmb (); - ehci->async = qh; - writel ((u32)qh->qh_dma, &ehci->regs->async_next); - cmd |= CMD_ASE | CMD_RUN; - writel (cmd, &ehci->regs->command); - ehci->hcd.state = USB_STATE_RUNNING; - /* posted write need not be known to HC yet ... */ - } else { - /* splice right after "start" of ring */ - qh->hw_info1 &= ~__constant_cpu_to_le32 (QH_HEAD); /* [4.8] */ - qh->qh_next = q->qh_next; - qh->hw_next = q->hw_next; - wmb (); - q->qh_next.qh = qh; - q->hw_next = dma; + if (!(cmd & CMD_ASE)) { + /* in case a clear of CMD_ASE didn't take yet */ + (void) handshake (&ehci->regs->status, STS_ASS, 0, 150); + cmd |= CMD_ASE | CMD_RUN; + writel (cmd, &ehci->regs->command); + ehci->hcd.state = USB_STATE_RUNNING; + /* posted write need not be known to HC yet ... */ + } } + + /* splice right after start */ + qh->qh_next = head->qh_next; + qh->hw_next = head->hw_next; + wmb (); + + head->qh_next.qh = qh; + head->hw_next = dma; + qh->qh_state = QH_STATE_LINKED; /* qtd completions reported later by interrupt */ @@ -897,6 +897,14 @@ qh_link_async (ehci, qh); else qh_put (ehci, qh); // refcount from async list + + /* it's not free to turn the async schedule on/off, so we leave it + * active but idle for a while once it empties. + */ + if (!ehci->async->qh_next.qh && !timer_pending (&ehci->watchdog)) { + ehci->async_idle = 1; + mod_timer (&ehci->watchdog, jiffies + EHCI_ASYNC_JIFFIES); + } } /* makes sure the async qh will become idle */ @@ -909,7 +917,6 @@ #ifdef DEBUG if (ehci->reclaim - || !ehci->async || qh->qh_state != QH_STATE_LINKED #ifdef CONFIG_SMP // this macro lies except on SMP compiles @@ -919,31 +926,20 @@ BUG (); #endif - qh->qh_state = QH_STATE_UNLINK; - ehci->reclaim = qh = qh_get (qh); - - // dbg_qh ("start unlink", ehci, qh); - - /* Remove the last QH (qhead)? Stop async schedule first. */ - if (unlikely (qh == ehci->async && qh->qh_next.qh == qh)) { + /* stop async schedule right now? */ + if (unlikely (qh == ehci->async)) { /* can't get here without STS_ASS set */ if (ehci->hcd.state != USB_STATE_HALT) { writel (cmd & ~CMD_ASE, &ehci->regs->command); - (void) handshake (&ehci->regs->status, STS_ASS, 0, 150); -#if 0 - // one VT8235 system wants to die with STS_FATAL - // unless this qh is leaked here. others seem ok... - qh = qh_get (qh); - dbg_qh ("async/off", ehci, qh); -#endif + wmb (); + // handshake later, if we need to } - qh->qh_next.qh = ehci->async = 0; - - ehci->reclaim_ready = 1; - tasklet_schedule (&ehci->tasklet); return; } + qh->qh_state = QH_STATE_UNLINK; + ehci->reclaim = qh = qh_get (qh); + if (unlikely (ehci->hcd.state == USB_STATE_HALT)) { ehci->reclaim_ready = 1; tasklet_schedule (&ehci->tasklet); @@ -951,13 +947,9 @@ } prev = ehci->async; - while (prev->qh_next.qh != qh && prev->qh_next.qh != ehci->async) + while (prev->qh_next.qh != qh) prev = prev->qh_next.qh; - if (qh->hw_info1 & __constant_cpu_to_le32 (QH_HEAD)) { - ehci->async = prev; - prev->hw_info1 |= __constant_cpu_to_le32 (QH_HEAD); - } prev->hw_next = qh->hw_next; prev->qh_next = qh->qh_next; wmb (); @@ -979,7 +971,7 @@ unsigned count; rescan: - qh = ehci->async; + qh = ehci->async->qh_next.qh; count = 0; if (likely (qh != 0)) { do { @@ -991,25 +983,17 @@ /* concurrent unlink could happen here */ count += qh_completions (ehci, qh); qh_put (ehci, qh); + goto rescan; } /* unlink idle entries, reducing HC PCI usage as - * well as HCD schedule-scanning costs. removing - * the last qh is deferred, since it's costly. + * well as HCD schedule-scanning costs. * * FIXME don't unlink idle entries so quickly; it * can penalize (common) half duplex protocols. */ if (list_empty (&qh->qtd_list) && !ehci->reclaim) { - if (qh->qh_next.qh != qh) { - // dbg ("irq/empty"); - start_unlink_async (ehci, qh); - } else if (!timer_pending (&ehci->watchdog)) { - /* can't use IAA for last entry */ - ehci->async_idle = 1; - mod_timer (&ehci->watchdog, - jiffies + EHCI_ASYNC_JIFFIES); - } + start_unlink_async (ehci, qh); } /* keep latencies down: let any irqs in */ @@ -1021,8 +1005,6 @@ } qh = qh->qh_next.qh; - if (!qh) /* unlinked? */ - goto rescan; - } while (qh != ehci->async); + } while (qh); } }