fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / drivers / usb / host / ehci-q.c
index fd8634b..62e46dc 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2001-2002 by David Brownell
- * 
+ * Copyright (C) 2001-2004 by David Brownell
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2 of the License, or (at your
@@ -31,7 +31,7 @@
  * ISO traffic uses "ISO TD" (itd, and sitd) records, and (along with
  * interrupts) needs careful scheduling.  Performance improvements can be
  * an ongoing challenge.  That's in "ehci-sched.c".
- * 
+ *
  * USB 1.1 devices are handled (a) by "companion" OHCI or UHCI root hubs,
  * or otherwise through transaction translators (TTs) in USB 2.0 hubs using
  * (b) special fields in qh entries or (c) split iso entries.  TTs will
@@ -83,19 +83,59 @@ qtd_fill (struct ehci_qtd *qtd, dma_addr_t buf, size_t len,
 
 /*-------------------------------------------------------------------------*/
 
-/* update halted (but potentially linked) qh */
-
 static inline void
 qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd)
 {
+       /* writes to an active overlay are unsafe */
+       BUG_ON(qh->qh_state != QH_STATE_IDLE);
+
        qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
        qh->hw_alt_next = EHCI_LIST_END;
 
+       /* Except for control endpoints, we make hardware maintain data
+        * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
+        * and set the pseudo-toggle in udev. Only usb_clear_halt() will
+        * ever clear it.
+        */
+       if (!(qh->hw_info1 & cpu_to_le32(1 << 14))) {
+               unsigned        is_out, epnum;
+
+               is_out = !(qtd->hw_token & cpu_to_le32(1 << 8));
+               epnum = (le32_to_cpup(&qh->hw_info1) >> 8) & 0x0f;
+               if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
+                       qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE);
+                       usb_settoggle (qh->dev, epnum, is_out, 1);
+               }
+       }
+
        /* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
        wmb ();
        qh->hw_token &= __constant_cpu_to_le32 (QTD_TOGGLE | QTD_STS_PING);
 }
 
+/* if it weren't for a common silicon quirk (writing the dummy into the qh
+ * overlay, so qh->hw_token wrongly becomes inactive/halted), only fault
+ * recovery (including urb dequeue) would need software changes to a QH...
+ */
+static void
+qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
+{
+       struct ehci_qtd *qtd;
+
+       if (list_empty (&qh->qtd_list))
+               qtd = qh->dummy;
+       else {
+               qtd = list_entry (qh->qtd_list.next,
+                               struct ehci_qtd, qtd_list);
+               /* first qtd may already be partially processed */
+               if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current)
+                       qtd = NULL;
+       }
+
+       if (qtd)
+               qh_update (ehci, qh, qtd);
+}
+
 /*-------------------------------------------------------------------------*/
 
 static void qtd_copy_status (
@@ -153,22 +193,14 @@ static void qtd_copy_status (
                        usb_pipein (urb->pipe) ? "in" : "out",
                        token, urb->status);
 
-               /* stall indicates some recovery action is needed */
-               if (urb->status == -EPIPE) {
-                       int     pipe = urb->pipe;
-
-                       if (!usb_pipecontrol (pipe))
-                               usb_endpoint_halt (urb->dev,
-                                       usb_pipeendpoint (pipe),
-                                       usb_pipeout (pipe));
-
                /* if async CSPLIT failed, try cleaning out the TT buffer */
-               } else if (urb->dev->tt && !usb_pipeint (urb->pipe)
+               if (urb->status != -EPIPE
+                               && urb->dev->tt && !usb_pipeint (urb->pipe)
                                && ((token & QTD_STS_MMF) != 0
                                        || QTD_CERR(token) == 0)
-                               && (!ehci_is_ARC(ehci)
-                                       || urb->dev->tt->hub !=
-                                               ehci->hcd.self.root_hub)) {
+                               && (!ehci_is_TDI(ehci)
+                                       || urb->dev->tt->hub !=
+                                          ehci_to_hcd(ehci)->self.root_hub)) {
 #ifdef DEBUG
                        struct usb_device *tt = urb->dev->tt->hub;
                        dev_dbg (&tt->dev,
@@ -182,22 +214,24 @@ static void qtd_copy_status (
 }
 
 static void
-ehci_urb_done (struct ehci_hcd *ehci, struct urb *urb, struct pt_regs *regs)
+ehci_urb_done (struct ehci_hcd *ehci, struct urb *urb)
+__releases(ehci->lock)
+__acquires(ehci->lock)
 {
-       if (likely (urb->hcpriv != 0)) {
+       if (likely (urb->hcpriv != NULL)) {
                struct ehci_qh  *qh = (struct ehci_qh *) urb->hcpriv;
 
                /* S-mask in a QH means it's an interrupt urb */
-               if ((qh->hw_info2 & __constant_cpu_to_le32 (0x00ff)) != 0) {
+               if ((qh->hw_info2 & __constant_cpu_to_le32 (QH_SMASK)) != 0) {
 
                        /* ... update hc-wide periodic stats (for usbfs) */
-                       hcd_to_bus (&ehci->hcd)->bandwidth_int_reqs--;
+                       ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
                }
-               qh_put (ehci, qh);
+               qh_put (qh);
        }
 
        spin_lock (&urb->lock);
-       urb->hcpriv = 0;
+       urb->hcpriv = NULL;
        switch (urb->status) {
        case -EINPROGRESS:              /* success */
                urb->status = 0;
@@ -228,10 +262,15 @@ ehci_urb_done (struct ehci_hcd *ehci, struct urb *urb, struct pt_regs *regs)
 
        /* complete() can reenter this HCD */
        spin_unlock (&ehci->lock);
-       usb_hcd_giveback_urb (&ehci->hcd, urb, regs);
+       usb_hcd_giveback_urb (ehci_to_hcd(ehci), urb);
        spin_lock (&ehci->lock);
 }
 
+static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
+static void unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
+
+static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
+static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh);
 
 /*
  * Process and free completed qtds for a qh, returning URBs to drivers.
@@ -240,9 +279,9 @@ ehci_urb_done (struct ehci_hcd *ehci, struct urb *urb, struct pt_regs *regs)
  */
 #define HALT_BIT __constant_cpu_to_le32(QTD_STS_HALT)
 static unsigned
-qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh, struct pt_regs *regs)
+qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-       struct ehci_qtd         *last = 0, *end = qh->dummy;
+       struct ehci_qtd         *last = NULL, *end = qh->dummy;
        struct list_head        *entry, *tmp;
        int                     stopped;
        unsigned                count = 0;
@@ -278,11 +317,11 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh, struct pt_regs *regs)
                /* clean up any state from previous QTD ...*/
                if (last) {
                        if (likely (last->urb != urb)) {
-                               ehci_urb_done (ehci, last->urb, regs);
+                               ehci_urb_done (ehci, last->urb);
                                count++;
                        }
                        ehci_qtd_free (ehci, last);
-                       last = 0;
+                       last = NULL;
                }
 
                /* ignore urbs submitted during completions we reported */
@@ -299,29 +338,33 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh, struct pt_regs *regs)
                        if ((token & QTD_STS_HALT) != 0) {
                                stopped = 1;
 
-                       /* magic dummy for some short reads; qh won't advance */
+                       /* magic dummy for some short reads; qh won't advance.
+                        * that silicon quirk can kick in with this dummy too.
+                        */
                        } else if (IS_SHORT_READ (token)
-                                       && (qh->hw_alt_next & QTD_MASK)
-                                               == ehci->async->hw_alt_next) {
+                                       && !(qtd->hw_alt_next & EHCI_LIST_END)) {
                                stopped = 1;
                                goto halt;
                        }
 
                /* stop scanning when we reach qtds the hc is using */
                } else if (likely (!stopped
-                               && HCD_IS_RUNNING (ehci->hcd.state))) {
+                               && HC_IS_RUNNING (ehci_to_hcd(ehci)->state))) {
                        break;
 
                } else {
                        stopped = 1;
 
+                       if (unlikely (!HC_IS_RUNNING (ehci_to_hcd(ehci)->state)))
+                               urb->status = -ESHUTDOWN;
+
                        /* ignore active urbs unless some previous qtd
                         * for the urb faulted (including short read) or
                         * its urb was canceled.  we may patch qh or qtds.
                         */
                        if (likely (urb->status == -EINPROGRESS))
                                continue;
-                       
+
                        /* issue status after short control reads */
                        if (unlikely (do_status != 0)
                                        && QTD_PID (token) == 0 /* OUT */) {
@@ -345,7 +388,7 @@ halt:
                                wmb ();
                        }
                }
+
                /* remove it from the queue */
                spin_lock (&urb->lock);
                qtd_copy_status (ehci, urb, qtd->length, token);
@@ -363,8 +406,8 @@ halt:
        }
 
        /* last urb's completion might still need calling */
-       if (likely (last != 0)) {
-               ehci_urb_done (ehci, last->urb, regs);
+       if (likely (last != NULL)) {
+               ehci_urb_done (ehci, last->urb);
                count++;
                ehci_qtd_free (ehci, last);
        }
@@ -372,21 +415,28 @@ halt:
        /* restore original state; caller must unlink or relink */
        qh->qh_state = state;
 
-       /* update qh after fault cleanup */
-       if (unlikely (stopped != 0)
-                       /* some EHCI 0.95 impls will overlay dummy qtds */ 
-                       || qh->hw_qtd_next == EHCI_LIST_END) {
-               if (list_empty (&qh->qtd_list))
-                       end = qh->dummy;
-               else {
-                       end = list_entry (qh->qtd_list.next,
-                                       struct ehci_qtd, qtd_list);
-                       /* first qtd may already be partially processed */
-                       if (cpu_to_le32 (end->qtd_dma) == qh->hw_current)
-                               end = 0;
+       /* be sure the hardware's done with the qh before refreshing
+        * it after fault cleanup, or recovering from silicon wrongly
+        * overlaying the dummy qtd (which reduces DMA chatter).
+        */
+       if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END) {
+               switch (state) {
+               case QH_STATE_IDLE:
+                       qh_refresh(ehci, qh);
+                       break;
+               case QH_STATE_LINKED:
+                       /* should be rare for periodic transfers,
+                        * except maybe high bandwidth ...
+                        */
+                       if ((__constant_cpu_to_le32 (QH_SMASK)
+                                       & qh->hw_info2) != 0) {
+                               intr_deschedule (ehci, qh);
+                               (void) qh_schedule (ehci, qh);
+                       } else
+                               unlink_async (ehci, qh);
+                       break;
+               /* otherwise, unlink already started */
                }
-               if (end)
-                       qh_update (ehci, qh, end);
        }
 
        return count;
@@ -427,7 +477,7 @@ qh_urb_transaction (
        struct ehci_hcd         *ehci,
        struct urb              *urb,
        struct list_head        *head,
-       int                     flags
+       gfp_t                   flags
 ) {
        struct ehci_qtd         *qtd, *qtd_prev;
        dma_addr_t              buf;
@@ -440,7 +490,7 @@ qh_urb_transaction (
         */
        qtd = ehci_qtd_alloc (ehci, flags);
        if (unlikely (!qtd))
-               return 0;
+               return NULL;
        list_add_tail (&qtd->qtd_list, head);
        qtd->urb = urb;
 
@@ -464,18 +514,18 @@ qh_urb_transaction (
                qtd->urb = urb;
                qtd_prev->hw_next = QTD_NEXT (qtd->qtd_dma);
                list_add_tail (&qtd->qtd_list, head);
-       } 
+
+               /* for zero length DATA stages, STATUS is always IN */
+               if (len == 0)
+                       token |= (1 /* "in" */ << 8);
+       }
 
        /*
         * data transfer stage:  buffer setup
         */
-       if (likely (len > 0))
-               buf = urb->transfer_dma;
-       else
-               buf = 0;
+       buf = urb->transfer_dma;
 
-       // FIXME this 'buf' check break some zlps...
-       if (!buf || is_input)
+       if (is_input)
                token |= (1 /* "in" */ << 8);
        /* else it's already initted to "out" pid (0 << 8) */
 
@@ -522,7 +572,7 @@ qh_urb_transaction (
         * control requests may need a terminating data "status" ack;
         * bulk ones may need a terminating short packet (zero length).
         */
-       if (likely (buf != 0)) {
+       if (likely (urb->transfer_buffer_length != 0)) {
                int     one_more = 0;
 
                if (usb_pipecontrol (urb->pipe)) {
@@ -555,26 +605,11 @@ qh_urb_transaction (
 
 cleanup:
        qtd_list_free (ehci, urb, head);
-       return 0;
+       return NULL;
 }
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * Hardware maintains data toggle (like OHCI) ... here we (re)initialize
- * the hardware data toggle in the QH, and set the pseudo-toggle in udev
- * so we can see if usb_clear_halt() was called.  NOP for control, since
- * we set up qh->hw_info1 to always use the QTD toggle bits. 
- */
-static inline void
-clear_toggle (struct usb_device *udev, int ep, int is_out, struct ehci_qh *qh)
-{
-       vdbg ("clear toggle, dev %d ep 0x%x-%s",
-               udev->devnum, ep, is_out ? "out" : "in");
-       qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE);
-       usb_settoggle (udev, ep, is_out, 1);
-}
-
 // Would be best to create all qh's from config descriptors,
 // when each interface/altsetting is established.  Unlink
 // any previous qh and cancel its urbs first; endpoints are
@@ -594,7 +629,7 @@ static struct ehci_qh *
 qh_make (
        struct ehci_hcd         *ehci,
        struct urb              *urb,
-       int                     flags
+       gfp_t                   flags
 ) {
        struct ehci_qh          *qh = ehci_qh_alloc (ehci, flags);
        u32                     info1 = 0, info2 = 0;
@@ -623,22 +658,28 @@ qh_make (
         * For control/bulk requests, the HC or TT handles these.
         */
        if (type == PIPE_INTERRUPT) {
-               qh->usecs = usb_calc_bus_time (USB_SPEED_HIGH, is_input, 0,
-                               hb_mult (maxp) * max_packet (maxp));
+               qh->usecs = NS_TO_US (usb_calc_bus_time (USB_SPEED_HIGH, is_input, 0,
+                               hb_mult (maxp) * max_packet (maxp)));
                qh->start = NO_FRAME;
 
                if (urb->dev->speed == USB_SPEED_HIGH) {
                        qh->c_usecs = 0;
                        qh->gap_uf = 0;
 
-                       /* FIXME handle HS periods of less than 1 frame. */
                        qh->period = urb->interval >> 3;
-                       if (qh->period < 1) {
+                       if (qh->period == 0 && urb->interval != 1) {
+                               /* NOTE interval 2 or 4 uframes could work.
+                                * But interval 1 scheduling is simpler, and
+                                * includes high bandwidth.
+                                */
                                dbg ("intr period %d uframes, NYET!",
                                                urb->interval);
                                goto done;
                        }
                } else {
+                       struct usb_tt   *tt = urb->dev->tt;
+                       int             think_time;
+
                        /* gap is f(FS/LS transfer times) */
                        qh->gap_uf = 1 + usb_calc_bus_time (urb->dev->speed,
                                        is_input, 0, maxp) / (125 * 1000);
@@ -652,13 +693,17 @@ qh_make (
                                qh->c_usecs = HS_USECS (0);
                        }
 
+                       think_time = tt ? tt->think_time : 0;
+                       qh->tt_usecs = NS_TO_US (think_time +
+                                       usb_calc_bus_time (urb->dev->speed,
+                                       is_input, 0, max_packet (maxp)));
                        qh->period = urb->interval;
                }
-
-               /* support for tt scheduling */
-               qh->dev = usb_get_dev (urb->dev);
        }
 
+       /* support for tt scheduling, and access to toggles */
+       qh->dev = urb->dev;
+
        /* using TT? */
        switch (urb->dev->speed) {
        case USB_SPEED_LOW:
@@ -676,13 +721,21 @@ qh_make (
                info1 |= maxp << 16;
 
                info2 |= (EHCI_TUNE_MULT_TT << 30);
-               info2 |= urb->dev->ttport << 23;
 
-               /* set the address of the TT; for ARC's integrated
+               /* Some Freescale processors have an erratum in which the
+                * port number in the queue head was 0..N-1 instead of 1..N.
+                */
+               if (ehci_has_fsl_portno_bug(ehci))
+                       info2 |= (urb->dev->ttport-1) << 23;
+               else
+                       info2 |= urb->dev->ttport << 23;
+
+               /* set the address of the TT; for TDI's integrated
                 * root hub tt, leave it zeroed.
                 */
-               if (!ehci_is_ARC(ehci)
-                               || urb->dev->tt->hub != ehci->hcd.self.root_hub)
+               if (!ehci_is_TDI(ehci)
+                               || urb->dev->tt->hub !=
+                                       ehci_to_hcd(ehci)->self.root_hub)
                        info2 |= urb->dev->tt->hub->devnum << 16;
 
                /* NOTE:  if (PIPE_INTERRUPT) { scheduler sets c-mask } */
@@ -706,10 +759,10 @@ qh_make (
                }
                break;
        default:
-               dbg ("bogus dev %p speed %d", urb->dev, urb->dev->speed);
+               dbg ("bogus dev %p speed %d", urb->dev, urb->dev->speed);
 done:
-               qh_put (ehci, qh);
-               return 0;
+               qh_put (qh);
+               return NULL;
        }
 
        /* NOTE:  if (PIPE_INTERRUPT) { scheduler sets s-mask } */
@@ -718,8 +771,8 @@ done:
        qh->qh_state = QH_STATE_IDLE;
        qh->hw_info1 = cpu_to_le32 (info1);
        qh->hw_info2 = cpu_to_le32 (info2);
-       qh_update (ehci, qh, qh->dummy);
        usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
+       qh_refresh (ehci, qh);
        return qh;
 }
 
@@ -729,7 +782,7 @@ done:
 
 static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-       u32             dma = QH_NEXT (qh->qh_dma);
+       __le32          dma = QH_NEXT (qh->qh_dma);
        struct ehci_qh  *head;
 
        /* (re)start the async schedule? */
@@ -743,12 +796,14 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
                        (void) handshake (&ehci->regs->status, STS_ASS, 0, 150);
                        cmd |= CMD_ASE | CMD_RUN;
                        writel (cmd, &ehci->regs->command);
-                       ehci->hcd.state = USB_STATE_RUNNING;
+                       ehci_to_hcd(ehci)->state = HC_STATE_RUNNING;
                        /* posted write need not be known to HC yet ... */
                }
        }
 
-       qh->hw_token &= ~HALT_BIT;
+       /* clear halt and/or toggle; and maybe recover from silicon quirk */
+       if (qh->qh_state == QH_STATE_IDLE)
+               qh_refresh (ehci, qh);
 
        /* splice right after start */
        qh->qh_next = head->qh_next;
@@ -764,7 +819,7 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 /*-------------------------------------------------------------------------*/
 
-#define        QH_ADDR_MASK    __constant_le32_to_cpu(0x7f)
+#define        QH_ADDR_MASK    __constant_cpu_to_le32(0x7f)
 
 /*
  * For control/bulk/interrupt, return QH with these TDs appended.
@@ -780,75 +835,38 @@ static struct ehci_qh *qh_append_tds (
        void                    **ptr
 )
 {
-       struct ehci_qh          *qh = 0;
+       struct ehci_qh          *qh = NULL;
 
        qh = (struct ehci_qh *) *ptr;
-       if (unlikely (qh == 0)) {
+       if (unlikely (qh == NULL)) {
                /* can't sleep here, we have ehci->lock... */
                qh = qh_make (ehci, urb, GFP_ATOMIC);
                *ptr = qh;
        }
-       if (likely (qh != 0)) {
+       if (likely (qh != NULL)) {
                struct ehci_qtd *qtd;
 
                if (unlikely (list_empty (qtd_list)))
-                       qtd = 0;
+                       qtd = NULL;
                else
                        qtd = list_entry (qtd_list->next, struct ehci_qtd,
                                        qtd_list);
 
-               /* control qh may need patching after enumeration */
+               /* control qh may need patching ... */
                if (unlikely (epnum == 0)) {
-                       /* set_address changes the address */
-                       if ((qh->hw_info1 & QH_ADDR_MASK) == 0)
-                               qh->hw_info1 |= cpu_to_le32 (
-                                               usb_pipedevice (urb->pipe));
-
-                       /* for full speed, ep0 maxpacket can grow */
-                       else if (!(qh->hw_info1
-                                       & __constant_cpu_to_le32 (0x3 << 12))) {
-                               u32     info, max;
-
-                               info = le32_to_cpu (qh->hw_info1);
-                               max = urb->dev->descriptor.bMaxPacketSize0;
-                               if (max > (0x07ff & (info >> 16))) {
-                                       info &= ~(0x07ff << 16);
-                                       info |= max << 16;
-                                       qh->hw_info1 = cpu_to_le32 (info);
-                               }
-                       }
 
                         /* usb_reset_device() briefly reverts to address 0 */
                         if (usb_pipedevice (urb->pipe) == 0)
                                 qh->hw_info1 &= ~QH_ADDR_MASK;
                }
 
-               /* usb_clear_halt() means qh data toggle gets reset */
-               if (unlikely (!usb_gettoggle (urb->dev,
-                                       (epnum & 0x0f), !(epnum & 0x10)))
-                               && !usb_pipecontrol (urb->pipe)) {
-                       /* "never happens": drivers do stall cleanup right */
-                       if (qh->qh_state != QH_STATE_IDLE
-                                       && !list_empty (&qh->qtd_list)
-                                       && qh->qh_state != QH_STATE_COMPLETING)
-                               ehci_warn (ehci, "clear toggle dev%d "
-                                               "ep%d%s: not idle\n",
-                                               usb_pipedevice (urb->pipe),
-                                               epnum & 0x0f,
-                                               usb_pipein (urb->pipe)
-                                                       ? "in" : "out");
-                       /* else we know this overlay write is safe */
-                       clear_toggle (urb->dev,
-                               epnum & 0x0f, !(epnum & 0x10), qh);
-               }
-
                /* just one way to queue requests: swap with the dummy qtd.
-                * only hc or qh_completions() usually modify the overlay.
+                * only hc or qh_refresh() ever modify the overlay.
                 */
-               if (likely (qtd != 0)) {
+               if (likely (qtd != NULL)) {
                        struct ehci_qtd         *dummy;
                        dma_addr_t              dma;
-                       u32                     token;
+                       __le32                  token;
 
                        /* to avoid racing the HC, use the dummy td instead of
                         * the first td of our list (becomes new dummy).  both
@@ -892,56 +910,59 @@ static struct ehci_qh *qh_append_tds (
 static int
 submit_async (
        struct ehci_hcd         *ehci,
+       struct usb_host_endpoint *ep,
        struct urb              *urb,
        struct list_head        *qtd_list,
-       int                     mem_flags
+       gfp_t                   mem_flags
 ) {
        struct ehci_qtd         *qtd;
-       struct hcd_dev          *dev;
        int                     epnum;
        unsigned long           flags;
-       struct ehci_qh          *qh = 0;
+       struct ehci_qh          *qh = NULL;
+       int                     rc = 0;
 
        qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list);
-       dev = (struct hcd_dev *)urb->dev->hcpriv;
-       epnum = usb_pipeendpoint (urb->pipe);
-       if (usb_pipein (urb->pipe) && !usb_pipecontrol (urb->pipe))
-               epnum |= 0x10;
+       epnum = ep->desc.bEndpointAddress;
 
 #ifdef EHCI_URB_TRACE
        ehci_dbg (ehci,
                "%s %s urb %p ep%d%s len %d, qtd %p [qh %p]\n",
                __FUNCTION__, urb->dev->devpath, urb,
-               epnum & 0x0f, usb_pipein (urb->pipe) ? "in" : "out",
+               epnum & 0x0f, (epnum & USB_DIR_IN) ? "in" : "out",
                urb->transfer_buffer_length,
-               qtd, dev ? dev->ep [epnum] : (void *)~0);
+               qtd, ep->hcpriv);
 #endif
 
        spin_lock_irqsave (&ehci->lock, flags);
-       qh = qh_append_tds (ehci, urb, qtd_list, epnum, &dev->ep [epnum]);
+       if (unlikely(!test_bit(HCD_FLAG_HW_ACCESSIBLE,
+                              &ehci_to_hcd(ehci)->flags))) {
+               rc = -ESHUTDOWN;
+               goto done;
+       }
+
+       qh = qh_append_tds (ehci, urb, qtd_list, epnum, &ep->hcpriv);
+       if (unlikely(qh == NULL)) {
+               rc = -ENOMEM;
+               goto done;
+       }
 
        /* Control/bulk operations through TTs don't need scheduling,
         * the HC and TT handle it when the TT has a buffer ready.
         */
-       if (likely (qh != 0)) {
-               if (likely (qh->qh_state == QH_STATE_IDLE))
-                       qh_link_async (ehci, qh_get (qh));
-       }
+       if (likely (qh->qh_state == QH_STATE_IDLE))
+               qh_link_async (ehci, qh_get (qh));
+ done:
        spin_unlock_irqrestore (&ehci->lock, flags);
-       if (unlikely (qh == 0)) {
+       if (unlikely (qh == NULL))
                qtd_list_free (ehci, urb, qtd_list);
-               return -ENOMEM;
-       }
-       return 0;
+       return rc;
 }
 
 /*-------------------------------------------------------------------------*/
 
 /* the async qh for the qtds being reclaimed are now unlinked from the HC */
 
-static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh);
-
-static void end_unlink_async (struct ehci_hcd *ehci, struct pt_regs *regs)
+static void end_unlink_async (struct ehci_hcd *ehci)
 {
        struct ehci_qh          *qh = ehci->reclaim;
        struct ehci_qh          *next;
@@ -950,33 +971,33 @@ static void end_unlink_async (struct ehci_hcd *ehci, struct pt_regs *regs)
 
        // qh->hw_next = cpu_to_le32 (qh->qh_dma);
        qh->qh_state = QH_STATE_IDLE;
-       qh->qh_next.qh = 0;
-       qh_put (ehci, qh);                      // refcount from reclaim 
+       qh->qh_next.qh = NULL;
+       qh_put (qh);                    // refcount from reclaim
 
        /* other unlink(s) may be pending (in QH_STATE_UNLINK_WAIT) */
        next = qh->reclaim;
        ehci->reclaim = next;
        ehci->reclaim_ready = 0;
-       qh->reclaim = 0;
+       qh->reclaim = NULL;
 
-       qh_completions (ehci, qh, regs);
+       qh_completions (ehci, qh);
 
        if (!list_empty (&qh->qtd_list)
-                       && HCD_IS_RUNNING (ehci->hcd.state))
+                       && HC_IS_RUNNING (ehci_to_hcd(ehci)->state))
                qh_link_async (ehci, qh);
        else {
-               qh_put (ehci, qh);              // refcount from async list
+               qh_put (qh);            // refcount from async list
 
                /* it's not free to turn the async schedule on/off; leave it
                 * active but idle for a while once it empties.
                 */
-               if (HCD_IS_RUNNING (ehci->hcd.state)
-                               && ehci->async->qh_next.qh == 0)
+               if (HC_IS_RUNNING (ehci_to_hcd(ehci)->state)
+                               && ehci->async->qh_next.qh == NULL)
                        timer_action (ehci, TIMER_ASYNC_OFF);
        }
 
        if (next) {
-               ehci->reclaim = 0;
+               ehci->reclaim = NULL;
                start_unlink_async (ehci, next);
        }
 }
@@ -990,13 +1011,10 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        struct ehci_qh  *prev;
 
 #ifdef DEBUG
+       assert_spin_locked(&ehci->lock);
        if (ehci->reclaim
                        || (qh->qh_state != QH_STATE_LINKED
                                && qh->qh_state != QH_STATE_UNLINK_WAIT)
-#ifdef CONFIG_SMP
-// this macro lies except on SMP compiles
-                       || !spin_is_locked (&ehci->lock)
-#endif
                        )
                BUG ();
 #endif
@@ -1004,14 +1022,16 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        /* stop async schedule right now? */
        if (unlikely (qh == ehci->async)) {
                /* can't get here without STS_ASS set */
-               if (ehci->hcd.state != USB_STATE_HALT) {
+               if (ehci_to_hcd(ehci)->state != HC_STATE_HALT
+                               && !ehci->reclaim) {
+                       /* ... and CMD_IAAD clear */
                        writel (cmd & ~CMD_ASE, &ehci->regs->command);
                        wmb ();
                        // handshake later, if we need to
+                       timer_action_done (ehci, TIMER_ASYNC_OFF);
                }
-               timer_action_done (ehci, TIMER_ASYNC_OFF);
                return;
-       } 
+       }
 
        qh->qh_state = QH_STATE_UNLINK;
        ehci->reclaim = qh = qh_get (qh);
@@ -1024,11 +1044,11 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
        prev->qh_next = qh->qh_next;
        wmb ();
 
-       if (unlikely (ehci->hcd.state == USB_STATE_HALT)) {
+       if (unlikely (ehci_to_hcd(ehci)->state == HC_STATE_HALT)) {
                /* if (unlikely (qh->reclaim != 0))
-                *      this will recurse, probably not much
+                *      this will recurse, probably not much
                 */
-               end_unlink_async (ehci, NULL);
+               end_unlink_async (ehci);
                return;
        }
 
@@ -1041,8 +1061,7 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 /*-------------------------------------------------------------------------*/
 
-static void
-scan_async (struct ehci_hcd *ehci, struct pt_regs *regs)
+static void scan_async (struct ehci_hcd *ehci)
 {
        struct ehci_qh          *qh;
        enum ehci_timer_action  action = TIMER_IO_WATCHDOG;
@@ -1052,7 +1071,7 @@ scan_async (struct ehci_hcd *ehci, struct pt_regs *regs)
        timer_action_done (ehci, TIMER_ASYNC_SHRINK);
 rescan:
        qh = ehci->async->qh_next.qh;
-       if (likely (qh != 0)) {
+       if (likely (qh != NULL)) {
                do {
                        /* clean any finished work for this qh */
                        if (!list_empty (&qh->qtd_list)
@@ -1066,8 +1085,8 @@ rescan:
                                 */
                                qh = qh_get (qh);
                                qh->stamp = ehci->stamp;
-                               temp = qh_completions (ehci, qh, regs);
-                               qh_put (ehci, qh);
+                               temp = qh_completions (ehci, qh);
+                               qh_put (qh);
                                if (temp != 0) {
                                        goto rescan;
                                }