ChangeSet 1.823.3.10, 2002/11/11 17:21:19-08:00, david-b@pacbell.net [PATCH] ehci-hcd, use dummy td when queueing What it does is give up on catching all the "race with HC" cases when appending to a live QH, by switching to using a disabled "dummy" TD at the end of all hardware queues. The HC won't cache disabled TDs, so it just sees "always good" pointers: no races. As a side benefit this also makes it safe to not irq on completion of most TDs that are queued using the scatterlist calls, so it'll be typical for one 64 KByte usb-storage request to mean just one irq (or less!) even without tuning ehci irq latency (for the DATA stage, that is). diff -Nru a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c --- a/drivers/usb/host/ehci-dbg.c Thu Nov 14 14:12:39 2002 +++ b/drivers/usb/host/ehci-dbg.c Thu Nov 14 14:12:39 2002 @@ -272,6 +272,7 @@ static void qh_lines (struct ehci_qh *qh, char **nextp, unsigned *sizep) { u32 scratch; + u32 hw_curr; struct list_head *entry; struct ehci_qtd *td; unsigned temp; @@ -279,20 +280,22 @@ char *next = *nextp; scratch = cpu_to_le32p (&qh->hw_info1); - temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x", + hw_curr = cpu_to_le32p (&qh->hw_current); + temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x (%08x %08x)", qh, scratch & 0x007f, speed_char (scratch), (scratch >> 8) & 0x000f, - scratch, cpu_to_le32p (&qh->hw_info2)); + scratch, cpu_to_le32p (&qh->hw_info2), + hw_curr, cpu_to_le32p (&qh->hw_token)); size -= temp; next += temp; list_for_each (entry, &qh->qtd_list) { - td = list_entry (entry, struct ehci_qtd, - qtd_list); + td = list_entry (entry, struct ehci_qtd, qtd_list); scratch = cpu_to_le32p (&td->hw_token); temp = snprintf (next, size, - "\n\ttd/%p %s len=%d %08x urb %p", + "\n\t%std/%p %s len=%d %08x urb %p", + (hw_curr == td->qtd_dma) ? "*" : "", td, ({ char *tmp; switch ((scratch>>8)&0x03) { case 0: tmp = "out"; break; @@ -552,8 +555,8 @@ size -= temp; next += temp; - temp = snprintf (next, size, "complete %ld unlink %ld qpatch %ld\n", - ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch); + temp = snprintf (next, size, "complete %ld unlink %ld\n", + ehci->stats.complete, ehci->stats.unlink); size -= temp; next += temp; #endif diff -Nru a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c --- a/drivers/usb/host/ehci-hcd.c Thu Nov 14 14:12:39 2002 +++ b/drivers/usb/host/ehci-hcd.c Thu Nov 14 14:12:39 2002 @@ -494,8 +494,8 @@ #ifdef EHCI_STATS dbg ("irq normal %ld err %ld reclaim %ld", ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim); - dbg ("complete %ld unlink %ld qpatch %ld", - ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch); + dbg ("complete %ld unlink %ld", + ehci->stats.complete, ehci->stats.unlink); #endif dbg_status (ehci, "ehci_stop completed", readl (&ehci->regs->status)); diff -Nru a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c --- a/drivers/usb/host/ehci-mem.c Thu Nov 14 14:12:39 2002 +++ b/drivers/usb/host/ehci-mem.c Thu Nov 14 14:12:39 2002 @@ -58,19 +58,23 @@ /* Allocate the key transfer structures from the previously allocated pool */ +static void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma) +{ + memset (qtd, 0, sizeof *qtd); + qtd->qtd_dma = dma; + qtd->hw_next = EHCI_LIST_END; + qtd->hw_alt_next = EHCI_LIST_END; + INIT_LIST_HEAD (&qtd->qtd_list); +} + static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags) { struct ehci_qtd *qtd; dma_addr_t dma; qtd = pci_pool_alloc (ehci->qtd_pool, flags, &dma); - if (qtd != 0) { - memset (qtd, 0, sizeof *qtd); - qtd->qtd_dma = dma; - qtd->hw_next = EHCI_LIST_END; - qtd->hw_alt_next = EHCI_LIST_END; - INIT_LIST_HEAD (&qtd->qtd_list); - } + if (qtd != 0) + ehci_qtd_init (qtd, dma); return qtd; } @@ -87,12 +91,21 @@ qh = (struct ehci_qh *) pci_pool_alloc (ehci->qh_pool, flags, &dma); - if (qh) { - memset (qh, 0, sizeof *qh); - atomic_set (&qh->refcount, 1); - qh->qh_dma = dma; - // INIT_LIST_HEAD (&qh->qh_list); - INIT_LIST_HEAD (&qh->qtd_list); + if (!qh) + return qh; + + memset (qh, 0, sizeof *qh); + atomic_set (&qh->refcount, 1); + qh->qh_dma = dma; + // INIT_LIST_HEAD (&qh->qh_list); + INIT_LIST_HEAD (&qh->qtd_list); + + /* dummy td enables safe urb queuing */ + qh->dummy = ehci_qtd_alloc (ehci, flags); + if (qh->dummy == 0) { + dbg ("no dummy td"); + pci_pool_free (ehci->qh_pool, qh, qh->qh_dma); + qh = 0; } return qh; } @@ -115,6 +128,8 @@ dbg ("unused qh not empty!"); BUG (); } + if (qh->dummy) + ehci_qtd_free (ehci, qh->dummy); pci_pool_free (ehci->qh_pool, qh, qh->qh_dma); } diff -Nru a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c --- a/drivers/usb/host/ehci-q.c Thu Nov 14 14:12:39 2002 +++ b/drivers/usb/host/ehci-q.c Thu Nov 14 14:12:39 2002 @@ -85,7 +85,7 @@ /* update halted (but potentially linked) qh */ -static inline void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd) +static void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd) { qh->hw_current = 0; qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma); @@ -221,17 +221,6 @@ struct urb *urb = qtd->urb; u32 token = 0; - /* hc's on-chip qh overlay cache can overwrite our idea of - * next qtd ptrs, if we appended a qtd while the queue was - * advancing. (because we don't use dummy qtds.) - */ - if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current - && qtd->hw_next != qh->hw_qtd_next) { - qh->hw_alt_next = qtd->hw_alt_next; - qh->hw_qtd_next = qtd->hw_next; - COUNT (ehci->stats.qpatch); - } - /* clean up any state from previous QTD ...*/ if (last) { if (likely (last->urb != urb)) { @@ -495,8 +484,7 @@ } /* by default, enable interrupt on urb completion */ -// ... do it always, unless we switch over to dummy qtds -// if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT))) + if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT))) qtd->hw_token |= __constant_cpu_to_le32 (QTD_IOC); return head; @@ -661,8 +649,15 @@ /* initialize sw and hw queues with these qtds */ if (!list_empty (qtd_list)) { + struct ehci_qtd *qtd; + + /* hc's list view ends with dummy td; we might update it */ + qtd = list_entry (qtd_list->prev, struct ehci_qtd, qtd_list); + qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma); + list_splice (qtd_list, &qh->qtd_list); - qh_update (qh, list_entry (qtd_list->next, struct ehci_qtd, qtd_list)); + qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list); + qh_update (qh, qtd); } else { qh->hw_qtd_next = qh->hw_alt_next = EHCI_LIST_END; } @@ -767,39 +762,48 @@ /* append to tds already queued to this qh? */ if (unlikely (!list_empty (&qh->qtd_list) && qtd)) { - struct ehci_qtd *last_qtd; - u32 hw_next; + struct ehci_qtd *dummy; + dma_addr_t dma; + u32 token; + + /* to avoid racing the HC, use the dummy td instead of + * the first td of our list (becomes new dummy). both + * tds stay deactivated until we're done, when the + * HC is allowed to fetch the old dummy (4.10.2). + */ + token = qtd->hw_token; + qtd->hw_token = 0; + dummy = qh->dummy; + // dbg ("swap td %p with dummy %p", qtd, dummy); + + dma = dummy->qtd_dma; + *dummy = *qtd; + dummy->qtd_dma = dma; + list_del (&qtd->qtd_list); + list_add (&dummy->qtd_list, qtd_list); - /* update the last qtd's "next" pointer */ - // dbg_qh ("non-empty qh", ehci, qh); - last_qtd = list_entry (qh->qtd_list.prev, - struct ehci_qtd, qtd_list); - hw_next = QTD_NEXT (qtd->qtd_dma); - last_qtd->hw_next = hw_next; + ehci_qtd_init (qtd, qtd->qtd_dma); + qh->dummy = qtd; - /* previous urb allows short rx? maybe optimize. */ - if (!(last_qtd->urb->transfer_flags & URB_SHORT_NOT_OK) - && (epnum & 0x10)) { - // only the last QTD for now - last_qtd->hw_alt_next = hw_next; - } + /* hc must see the new dummy at list end */ + qtd = list_entry (qh->qtd_list.prev, + struct ehci_qtd, qtd_list); + qtd->hw_next = QTD_NEXT (dma); - /* qh_completions() may need to patch the qh overlay if - * the hc was advancing this queue while we appended. - * we know it can: last_qtd->hw_token has IOC set. - * - * or: use a dummy td (so the overlay gets the next td - * only when we set its active bit); fewer irqs. - */ + /* let the hc process these next qtds */ wmb (); + dummy->hw_token = token; /* no URB queued */ } else { - // dbg_qh ("empty qh", ehci, qh); + struct ehci_qtd *last_qtd; - /* NOTE: we already canceled any queued URBs - * when the endpoint halted. - */ + /* make sure hc sees current dummy at the end */ + last_qtd = list_entry (qtd_list->prev, + struct ehci_qtd, qtd_list); + last_qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma); + + // dbg_qh ("empty qh", ehci, qh); /* usb_clear_halt() means qh data toggle gets reset */ if (unlikely (!usb_gettoggle (urb->dev, diff -Nru a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h --- a/drivers/usb/host/ehci.h Thu Nov 14 14:12:39 2002 +++ b/drivers/usb/host/ehci.h Thu Nov 14 14:12:39 2002 @@ -31,11 +31,6 @@ /* termination of urbs from core */ unsigned long complete; unsigned long unlink; - - /* qhs patched to recover from td queueing race - * (can avoid by using 'dummy td', allowing fewer irqs) - */ - unsigned long qpatch; }; /* ehci_hcd->lock guards shared data against other CPUs: @@ -311,6 +306,7 @@ dma_addr_t qh_dma; /* address of qh */ union ehci_shadow qh_next; /* ptr to qh; or periodic */ struct list_head qtd_list; /* sw qtd list */ + struct ehci_qtd *dummy; atomic_t refcount;