Subject: kern/35379: ohci_abort_xfer is broken
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: None <toshii@w.email.ne.jp>
List: netbsd-bugs
Date: 01/08/2007 09:15:01
>Number:         35379
>Category:       kern
>Synopsis:       ohci_abort_xfer is broken and causes infinite loops
>Confidential:   no
>Severity:       critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Mon Jan 08 09:15:00 +0000 2007
>Originator:     toshii@w.email.ne.jp
>Release:        NetBSD 4.99.5
>Organization:
	
>Environment:
	
	
System: NetBSD pepper.my.domain 4.99.5 NetBSD 4.99.5 (PEPPER) #46: Mon Jan  8 01:53:35 JST 2007  toshii@primula.my.domain:/usr/src/current/sys/arch/i386/compile/PEPPER i386
Architecture: i386
Machine: i386
>Description:
	I've found two bugs.
	ohci_abort_xfer assumes xfer->hcpriv STD list is intact, but
	ohci_softintr could run earlier and some of STDs may have been
	connected to the free list.
	Secondly, done STDs shouldn't be reclaimed until they are seen
	on the done list, otherwise a corrupt done list results.

>How-To-Repeat:
	Prepare an nForce 5 motherboard and an Edirol UA-3 USB audio device.
	If the uaudio is plugged in at bootup, usbd_new_device fails with a
	usbd_set_address timeout and a corruption of a done list follows.

	A corruption sometimes can be seen with the following debug code.

Index: ohci.c
===================================================================
RCS file: /usr/local/NetBSD/NetBSD-CVS/src/sys/dev/usb/ohci.c,v
retrieving revision 1.179
diff -u -p -r1.179 ohci.c
--- ohci.c	16 Nov 2006 01:33:26 -0000	1.179
+++ ohci.c	8 Jan 2007 07:11:05 -0000
@@ -1276,6 +1291,9 @@ ohci_softintr(void *v)
 	int len, cc, s;
 	int i, j, actlen, iframes, uedir;
 	ohci_physaddr_t done;
+#ifdef OHCI_DEBUG
+	int listcnt = 0;
+#endif
 
 	DPRINTFN(10,("ohci_softintr: enter\n"));
 
@@ -1283,14 +1301,30 @@ ohci_softintr(void *v)
 
 	s = splhardusb();
 	done = O32TOH(sc->sc_hcca->hcca_done_head) & ~OHCI_DONE_INTRS;
-	sc->sc_hcca->hcca_done_head = 0;
-	OWRITE4(sc, OHCI_INTERRUPT_STATUS, OHCI_WDH);
+	if (done) {
+		sc->sc_hcca->hcca_done_head = 0;
+		OWRITE4(sc, OHCI_INTERRUPT_STATUS, OHCI_WDH);
+	} else {
+		ohci_physaddr_t reg;
+
+		reg = OREAD4(sc, OHCI_DONE_HEAD);
+		if (reg) {
+			DPRINTF(("ohci_softintr: done head %x\n", reg));
+		}
+	}
 	sc->sc_eintrs |= OHCI_WDH;
 	OWRITE4(sc, OHCI_INTERRUPT_ENABLE, OHCI_WDH);
 	splx(s);
 
 	/* Reverse the done list. */
 	for (sdone = NULL, sidone = NULL; done != 0; ) {
+#ifdef OHCI_DEBUG
+		if (listcnt++ > 300) {
+			ohci_dump_tds(sc, sdone);
+			ohci_dump_itds(sc, sidone);
+			panic("ohci_softintr: loop in done list");
+		}
+#endif
 		std = ohci_hash_find_td(sc, done);
 		if (std != NULL) {
 			std->dnext = sdone;
>Fix:
	The following patch should fix the problem.  It isn't well tested
	but I'll commit in a week unless someone speaks up.  Also, it
	would be better if xfer->hcpriv doesn't get rewritten that often.

Index: ohci.c
===================================================================
RCS file: /usr/local/NetBSD/NetBSD-CVS/src/sys/dev/usb/ohci.c,v
retrieving revision 1.179
diff -u -p -r1.179 ohci.c
--- ohci.c	16 Nov 2006 01:33:26 -0000	1.179
+++ ohci.c	8 Jan 2007 07:11:05 -0000
@@ -1323,7 +1357,9 @@ ohci_softintr(void *v)
 		xfer = std->xfer;
 		stdnext = std->dnext;
 		DPRINTFN(10, ("ohci_process_done: std=%p xfer=%p hcpriv=%p\n",
-				std, xfer, xfer ? xfer->hcpriv : 0));
+				std, xfer,
+				(xfer && xfer != (void *)OHCI_TD_DEFERRED) ?
+				xfer->hcpriv : 0));
 		if (xfer == NULL) {
 			/*
 			 * xfer == NULL: There seems to be no xfer associated
@@ -1333,11 +1369,17 @@ ohci_softintr(void *v)
 			 */
 			continue;
 		}
+		if (xfer == (void *)OHCI_TD_DEFERRED) {
+			ohci_free_std(sc, std);
+			continue;
+		}
 		if (xfer->status == USBD_CANCELLED ||
 		    xfer->status == USBD_TIMEOUT) {
 			DPRINTF(("ohci_process_done: cancel/timeout %p\n",
 				 xfer));
 			/* Handled by abort routine. */
+			xfer->hcpriv = std->nexttd;
+			ohci_free_std(sc, std);
 			continue;
 		}
 		usb_uncallout(xfer->timeout_handle, ohci_timeout, xfer);
@@ -1358,7 +1400,8 @@ ohci_softintr(void *v)
 				s = splusb();
 				usb_transfer_complete(xfer);
 				splx(s);
-			}
+			} else
+				xfer->hcpriv = std->nexttd;
 			ohci_free_std(sc, std);
 		} else {
 			/*
@@ -2270,7 +2315,17 @@ ohci_abort_xfer(usbd_xfer_handle xfer, u
 	for (; p->xfer == xfer; p = n) {
 		hit |= headp == p->physaddr;
 		n = p->nexttd;
-		ohci_free_std(sc, p);
+		if (OHCI_TD_GET_CC(O32TOH(p->td.td_flags)) !=
+		    OHCI_TD_GET_CC(OHCI_TD_NOCC)) {
+			/*
+			 * If a TD is retired, let the softintr handler
+			 * handle this.
+			 */
+			p->xfer = (void *)OHCI_TD_DEFERRED;
+		} else {
+			DPRINTFN(1,("ohci_abort_xfer: free %p\n", p));
+			ohci_free_std(sc, p);
+		}
 	}
 	/* Zap headp register if hardware pointed inside the xfer. */
 	if (hit) {
Index: ohcivar.h
===================================================================
RCS file: /usr/local/NetBSD/NetBSD-CVS/src/sys/dev/usb/ohcivar.h,v
retrieving revision 1.39
diff -u -p -r1.39 ohcivar.h
--- ohcivar.h	27 Dec 2005 04:06:45 -0000	1.39
+++ ohcivar.h	1 Jan 2007 23:14:27 -0000
@@ -54,6 +54,8 @@ typedef struct ohci_soft_td {
 	ohci_physaddr_t physaddr;
 	LIST_ENTRY(ohci_soft_td) hnext;
 	usbd_xfer_handle xfer;
+/* An invalid pointer value for a magic number */
+#define	OHCI_TD_DEFERRED	0x1
 	u_int16_t len;
 	u_int16_t flags;
 #define OHCI_CALL_DONE	0x0001

>Unformatted: