Subject: port-next68k/16798: race condition in the ethernet code
To: None <gnats-bugs@gnats.netbsd.org>
From: None <chris@Pin.LU>
List: netbsd-bugs
Date: 05/14/2002 04:05:27
>Number:         16798
>Category:       port-next68k
>Synopsis:       race condition in the ethernet code
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    port-next68k-maintainer
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Mon May 13 19:46:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     Christian Limpach
>Release:        current (as of May 14 2002)
>Organization:
	
>Environment:
	
System: NetBSD clapper 1.5ZC NetBSD 1.5ZC (CLAPPER) #46: Mon May 13 20:51:50 CEST 2002     root@marble:/devel/netbsd/src-current/sys/arch/next68k/compile/CLAPPER next68k


>Description:
	
	There is a race condition in the ethernet code.  Most of the network
	code runs at IPL3 (splnet).  mb8795_txdma_shutdown, which runs at IPL6
	(spldma, since it's a handler for DMA interrupts), calls mb8795_start
	which does an IF_DEQUEUE.  If there's a packet being queued at the
	same moment, the ifp->if_snd queue can get corrupted:  ifq_head is NULL
	while ifq_tail is not, this blocks sending forever and once ifq_maxlen
	packets are in the queue the upper layers will get errors but nothing
	but a reboot will clear the error condition.

>How-To-Repeat:
	
	I've been running build.sh on a next to build the toolchain and this
	would usually trigger the race condition within 1 hour.  Once this
	happens, "xe0: No packet to start" error messages are printed and once
	the ifp->if_snd is full, "nfs-timer: ignoring error 55" messages are
	printed with a debug kernel.

>Fix:
	
	I have tried 2 ways to fix this, both seem to fix the problem
	equally well.

	- increase splnet's level to IPL6

	- mb8795_start dequeues the packets from the ifp->if_snd queue and
	queues them in the new sc->sc_tx_snd queue.  The sc->sc_tx_snd queue
	is only accessed at IPL6 and thus safe from the mb8795_txdma_shutdown
	handler.  A new mb8795_start_dma is called from mb8795_start and
	mb8795_txdma_shutdown to dequeue the packets from the sc->sc_tx_snd
	queue and start the dma.

	I've implemented the 2nd solution since I'm not too sure if raising
	splnet's level is a good idea.  The following patch implements the 2nd
	solution.

Index: arch/next68k/dev/mb8795.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/next68k/dev/mb8795.c,v
retrieving revision 1.24
diff -u -r1.24 mb8795.c
--- mb8795.c	2001/06/16 09:18:46	1.24
+++ mb8795.c	2002/05/14 01:12:45
@@ -124,6 +124,7 @@
 void mb8795_rxdma_shutdown __P((void *));
 void mb8795_txdma_shutdown __P((void *));
 bus_dmamap_t mb8795_txdma_restart __P((bus_dmamap_t,void *));
+void mb8795_start_dma __P((struct ifnet *));
 
 void
 mb8795_config(sc)
@@ -551,8 +552,8 @@
 
 	nextdma_start(sc->sc_rx_nd, DMACSR_SETREAD);
 
-	if (ifp->if_snd.ifq_head != NULL) {
-		mb8795_start(ifp);
+	if (! IF_IS_EMPTY(&sc->sc_tx_snd)) {
+		mb8795_start_dma(ifp);
 	}
 }
 
@@ -710,44 +711,89 @@
  */
 void
 mb8795_start(ifp)
-     struct ifnet *ifp;
+	struct ifnet *ifp;
 {
-  int error;
-  struct mb8795_softc *sc = ifp->if_softc;
+	struct mb8795_softc *sc = ifp->if_softc;
+	struct mbuf *m;
+	int s;
 
-	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
-		return;
+	DPRINTF(("%s: mb8795_start()\n",sc->sc_dev.dv_xname));
+
+	while (1) {
+		if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
+			return;
+
+#if 0
+		return;	/* @@@ Turn off xmit for debugging */
+#endif
+
+		ifp->if_flags |= IFF_OACTIVE;
+
+		IFQ_DEQUEUE(&ifp->if_snd, m);
+		if (m == 0) {
+			DPRINTF(("%s: No packet to start\n",
+				 sc->sc_dev.dv_xname));
+			ifp->if_flags &= ~IFF_OACTIVE;
+			return;
+		}
+
+#if NBPFILTER > 0
+		/*
+		 * Pass packet to bpf if there is a listener.
+		 */
+		if (ifp->if_bpf)
+			bpf_mtap(ifp->if_bpf, m);
+#endif
+
+		s = spldma();
+		IF_ENQUEUE(&sc->sc_tx_snd, m);
+		if (sc->sc_tx_loaded == 0)
+			mb8795_start_dma(ifp);
+		splx(s);
+
+		ifp->if_flags &= ~IFF_OACTIVE;
+	}
+
+}
+
+void
+mb8795_start_dma(ifp)
+	struct ifnet *ifp;
+{
+	int error;
+	struct mb8795_softc *sc = ifp->if_softc;
 
-  DPRINTF(("%s: mb8795_start()\n",sc->sc_dev.dv_xname));
+	DPRINTF(("%s: mb8795_start_dma()\n",sc->sc_dev.dv_xname));
 
 #if (defined(DIAGNOSTIC))
-  {
-    u_char txstat;
-    txstat = bus_space_read_1(sc->sc_bst,sc->sc_bsh, XE_TXSTAT);
-    if (!(txstat & XE_TXSTAT_READY)) {
+	{
+		u_char txstat;
+		txstat = bus_space_read_1(sc->sc_bst,sc->sc_bsh, XE_TXSTAT);
+		if (!(txstat & XE_TXSTAT_READY)) {
 			/* @@@ I used to panic here, but then it paniced once.
 			 * Let's see if I can just reset instead. [ dbj 980706.1900 ]
 			 */
-      printf("%s: transmitter not ready\n", sc->sc_dev.dv_xname);
+			printf("%s: transmitter not ready\n",
+			       sc->sc_dev.dv_xname);
 			mb8795_reset(sc);
 			return;
-    }
-  }
+		}
+	}
 #endif
 
 #if 0
 	return;	/* @@@ Turn off xmit for debugging */
 #endif
 
-  bus_space_write_1(sc->sc_bst,sc->sc_bsh, XE_TXSTAT, XE_TXSTAT_CLEAR);
-
-  IF_DEQUEUE(&ifp->if_snd, sc->sc_tx_mb_head);
-  if (sc->sc_tx_mb_head == 0) {
-    printf("%s: No packet to start\n",
-				sc->sc_dev.dv_xname);
-    return;
-  }
-
+	bus_space_write_1(sc->sc_bst,sc->sc_bsh, XE_TXSTAT, XE_TXSTAT_CLEAR);
+	
+	IF_DEQUEUE(&sc->sc_tx_snd, sc->sc_tx_mb_head);
+	if (sc->sc_tx_mb_head == 0) {
+		DPRINTF(("%s: No packet to start\n",
+			 sc->sc_dev.dv_xname));
+		return;
+	}
+	
 	ifp->if_timer = 5;
 
 /* The following is a next specific hack that should
@@ -762,10 +808,10 @@
 				&~(DMA_ENDALIGNMENT-1)))-(s);}
 
 #if 0
-  error = bus_dmamap_load_mbuf(sc->sc_tx_dmat,
-			sc->sc_tx_dmamap,
-			sc->sc_tx_mb_head,
-			BUS_DMA_NOWAIT);
+	error = bus_dmamap_load_mbuf(sc->sc_tx_dmat,
+				     sc->sc_tx_dmamap,
+				     sc->sc_tx_mb_head,
+				     BUS_DMA_NOWAIT);
 #else
 	{
 		u_char *buf = sc->sc_txbuf;
@@ -788,38 +834,30 @@
 		}
 
 		error = bus_dmamap_load(sc->sc_tx_dmat, sc->sc_tx_dmamap,
-				buf,buflen,NULL,BUS_DMA_NOWAIT);
+					buf,buflen,NULL,BUS_DMA_NOWAIT);
 	}
 #endif
-  if (error) {
-    printf("%s: can't load mbuf chain, error = %d\n",
-				sc->sc_dev.dv_xname, error);
-    m_freem(sc->sc_tx_mb_head);
-    sc->sc_tx_mb_head = NULL;
-    return;
-  }
+	if (error) {
+		printf("%s: can't load mbuf chain, error = %d\n",
+		       sc->sc_dev.dv_xname, error);
+		m_freem(sc->sc_tx_mb_head);
+		sc->sc_tx_mb_head = NULL;
+		return;
+	}
 
 #ifdef DIAGNOSTIC
 	if (sc->sc_tx_loaded != 0) {
-			panic("%s: sc->sc_tx_loaded is %d",sc->sc_dev.dv_xname,
-					sc->sc_tx_loaded);
+		panic("%s: sc->sc_tx_loaded is %d",sc->sc_dev.dv_xname,
+		      sc->sc_tx_loaded);
 	}
 #endif
 
-	ifp->if_flags |= IFF_OACTIVE;
-
-  bus_dmamap_sync(sc->sc_tx_dmat, sc->sc_tx_dmamap, 0,
+	bus_dmamap_sync(sc->sc_tx_dmat, sc->sc_tx_dmamap, 0,
 			sc->sc_tx_dmamap->dm_mapsize, BUS_DMASYNC_PREWRITE);
 
 	nextdma_start(sc->sc_tx_nd, DMACSR_SETWRITE);
-
-#if NBPFILTER > 0
-  /*
-   * Pass packet to bpf if there is a listener.
-   */
-  if (ifp->if_bpf)
-    bpf_mtap(ifp->if_bpf, sc->sc_tx_mb_head);
-#endif
+	
+	ifp->if_opackets++;
 
 }
 
@@ -880,12 +918,10 @@
 		}
 #endif
 
-		ifp->if_flags &= ~IFF_OACTIVE;
-
 		ifp->if_timer = 0;
 
-		if (ifp->if_snd.ifq_head != NULL) {
-			mb8795_start(ifp);
+		if (! IF_IS_EMPTY(&sc->sc_tx_snd)) {
+			mb8795_start_dma(ifp);
 		}
 
 	}
Index: arch/next68k/dev/mb8795var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/next68k/dev/mb8795var.h,v
retrieving revision 1.3
diff -u -r1.3 mb8795var.h
--- mb8795var.h	2001/04/02 05:29:43	1.3
+++ mb8795var.h	2002/05/14 01:12:45
@@ -55,9 +55,11 @@
 	struct mbuf *sc_tx_mb_head;   /* pointer to data for this command */
 	int sc_tx_loaded;
 
-	u_char *sc_txbuf;							/* to solve alignment problems, we
-																 * copy the mbuf into this buffer before
-																 * trying to dma it */
+	u_char *sc_txbuf; 	/* to solve alignment problems, we
+				 * copy the mbuf into this buffer before
+				 * trying to dma it */
+
+	struct ifaltq sc_tx_snd;
 
 	bus_dma_tag_t sc_rx_dmat;
 	bus_dmamap_t sc_rx_dmamap[MB8795_NRXBUFS];

>Release-Note:
>Audit-Trail:
>Unformatted: