Subject: wi performance hack
To: None <current-users@netbsd.org, tech-net@netbsd.org>
From: Charles M. Hannum <abuse@spamalicious.com>
List: tech-net
Date: 07/19/2004 00:28:31
--Boundary-00=_vWx+Ay+kb3DsF7L
Content-Type: text/plain;
  charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

Many people have observed that wi(4) uses a lot of CPU time.  I have 
identified this as being primarily due to busy-waiting for TX commands to 
complete.  The attached hack is a first step at eliminating that problem, and 
improves wi(4) performance substantially on my test machines.

If other people could test this and let me know the results, that would be 
helpful -- especially with Orinoco cards, since I don't have any(*).

Note that this hack is not finished.  It currently causes a race condition 
between wi_cmd() (primarily ifconfig(8) and wiconfig(8)) and the transmit 
path that can cause spurious errors.  It's intended as a test to see if the 
methodology works well enough to bother finishing.


(*) If anyone has a stockpile of old PCMCIA cards of any type, feel free to 
send them my way so I can do ongoing testing, rather than just letting them 
collect dust.

--Boundary-00=_vWx+Ay+kb3DsF7L
Content-Type: text/x-diff;
  charset="us-ascii";
  name="wi.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename="wi.diff"

Index: wi.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/wi.c,v
retrieving revision 1.164
diff -u -r1.164 wi.c
--- wi.c	2 Jul 2004 23:41:34 -0000	1.164
+++ wi.c	19 Jul 2004 00:26:24 -0000
@@ -72,8 +72,9 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: wi.c,v 1.164 2004/07/02 23:41:34 dyoung Exp $");
 
-#define WI_HERMES_AUTOINC_WAR	/* Work around data write autoinc bug. */
+#undef WI_HERMES_AUTOINC_WAR	/* Work around data write autoinc bug. */
 #define WI_HERMES_STATS_WAR	/* Work around stats counter bug. */
+#undef HISTOGRAM
 
 #include "bpfilter.h"
 
@@ -134,6 +135,7 @@
 
 static void wi_rx_intr(struct wi_softc *);
 static void wi_txalloc_intr(struct wi_softc *);
+static void wi_cmd_intr(struct wi_softc *);
 static void wi_tx_intr(struct wi_softc *);
 static void wi_tx_ex_intr(struct wi_softc *);
 static void wi_info_intr(struct wi_softc *);
@@ -148,6 +150,7 @@
 static void wi_read_nicid(struct wi_softc *);
 static int  wi_write_ssid(struct wi_softc *, int, u_int8_t *, int);
 
+static int  wi_sendcmd(struct wi_softc *, int, int);
 static int  wi_cmd(struct wi_softc *, int, int, int, int);
 static int  wi_seek_bap(struct wi_softc *, int, int);
 static int  wi_read_bap(struct wi_softc *, int, int, void *, int);
@@ -190,7 +193,7 @@
 #endif
 
 #define WI_INTRS	(WI_EV_RX | WI_EV_ALLOC | WI_EV_INFO | \
-			 WI_EV_TX | WI_EV_TX_EXC)
+			 WI_EV_TX | WI_EV_TX_EXC | WI_EV_CMD)
 
 struct wi_card_ident
 wi_card_ident[] = {
@@ -561,6 +564,11 @@
 		if (status & WI_EV_INFO)
 			wi_info_intr(sc);
 
+		CSR_WRITE_2(sc, WI_EVENT_ACK, status);
+
+		if (status & WI_EV_CMD)
+			wi_cmd_intr(sc);
+
 		if ((ifp->if_flags & IFF_OACTIVE) == 0 &&
 		    (sc->sc_flags & WI_FLAGS_OUTRANGE) == 0 &&
 		    !IFQ_IS_EMPTY(&ifp->if_snd))
@@ -737,7 +745,10 @@
 			sc->sc_txd[i].d_len = 0;
 		}
 	}
-	sc->sc_txcur = sc->sc_txnext = 0;
+	sc->sc_txqueue = 0;
+	sc->sc_txstart = 0;
+	sc->sc_txalloc = 0;
+	sc->sc_txqueued = 0;
 
 	wi_rssdescs_init(&sc->sc_rssd, &sc->sc_rssdfree);
 
@@ -917,7 +928,7 @@
 		return;
 
 	memset(&frmhdr, 0, sizeof(frmhdr));
-	cur = sc->sc_txnext;
+	cur = sc->sc_txqueue;
 	for (;;) {
 		ni = ic->ic_bss;
 		if (sc->sc_txd[cur].d_len != 0 ||
@@ -1061,8 +1072,8 @@
 		}
 		m_freem(m0);
 		sc->sc_txd[cur].d_len = off;
-		if (sc->sc_txcur == cur) {
-			if (wi_cmd(sc, WI_CMD_TX | WI_RECLAIM, fid, 0, 0)) {
+		if (sc->sc_txqueued++ == 0) {
+			if (wi_sendcmd(sc, WI_CMD_TX | WI_RECLAIM, fid)) {
 				printf("%s: xmit failed\n",
 				    sc->sc_dev.dv_xname);
 				sc->sc_txd[cur].d_len = 0;
@@ -1072,7 +1083,7 @@
 			sc->sc_tx_timer = 5;
 			ifp->if_timer = 1;
 		}
-		sc->sc_txnext = cur = (cur + 1) % WI_NTXBUF;
+		sc->sc_txqueue = cur = (cur + 1) % WI_NTXBUF;
 		SLIST_REMOVE_HEAD(&sc->sc_rssdfree, rd_next);
 		id->id_node = ni;
 		continue;
@@ -1399,7 +1410,6 @@
 
 	/* First read in the frame header */
 	if (wi_read_bap(sc, fid, 0, &frmhdr, sizeof(frmhdr))) {
-		CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
 		ifp->if_ierrors++;
 		DPRINTF(("wi_rx_intr: read fid %x failed\n", fid));
 		return;
@@ -1414,7 +1424,6 @@
 	status = le16toh(frmhdr.wi_status);
 	if ((status & WI_STAT_ERRSTAT) != 0 &&
 	    ic->ic_opmode != IEEE80211_M_MONITOR) {
-		CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
 		ifp->if_ierrors++;
 		DPRINTF(("wi_rx_intr: fid %x error status %x\n", fid, status));
 		return;
@@ -1431,7 +1440,6 @@
 	 */
 	if (off + len > MCLBYTES) {
 		if (ic->ic_opmode != IEEE80211_M_MONITOR) {
-			CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
 			ifp->if_ierrors++;
 			DPRINTF(("wi_rx_intr: oversized packet\n"));
 			return;
@@ -1441,7 +1449,6 @@
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL) {
-		CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
 		ifp->if_ierrors++;
 		DPRINTF(("wi_rx_intr: MGET failed\n"));
 		return;
@@ -1449,7 +1456,6 @@
 	if (off + len > MHLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
-			CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
 			m_freem(m);
 			ifp->if_ierrors++;
 			DPRINTF(("wi_rx_intr: MCLGET failed\n"));
@@ -1464,8 +1470,6 @@
 	m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame) + len;
 	m->m_pkthdr.rcvif = ifp;
 
-	CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_RX);
-
 #if NBPFILTER > 0
 	if (sc->sc_drvbpf) {
 		struct mbuf mb;
@@ -1597,34 +1601,40 @@
 	SLIST_INSERT_HEAD(&sc->sc_rssdfree, rssd, rd_next);
 out:
 	ifp->if_flags &= ~IFF_OACTIVE;
-	CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_TX_EXC);
 }
 
 static void
 wi_txalloc_intr(struct wi_softc *sc)
 {
-	struct ieee80211com *ic = &sc->sc_ic;
-	struct ifnet *ifp = &ic->ic_if;
 	int fid, cur;
 
 	fid = CSR_READ_2(sc, WI_ALLOC_FID);
-	CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_ALLOC);
 
-	cur = sc->sc_txcur;
+	cur = sc->sc_txalloc;
 	if (sc->sc_txd[cur].d_fid != fid) {
 		printf("%s: bad alloc %x != %x, cur %d nxt %d\n",
 		    sc->sc_dev.dv_xname, fid, sc->sc_txd[cur].d_fid, cur,
-		    sc->sc_txnext);
+		    sc->sc_txqueue);
 		return;
 	}
-	sc->sc_tx_timer = 0;
 	sc->sc_txd[cur].d_len = 0;
-	sc->sc_txcur = cur = (cur + 1) % WI_NTXBUF;
-	if (sc->sc_txd[cur].d_len == 0)
+	sc->sc_txalloc = (cur + 1) % WI_NTXBUF;
+}
+
+static void
+wi_cmd_intr(struct wi_softc *sc)
+{
+	struct ieee80211com *ic = &sc->sc_ic;
+	struct ifnet *ifp = &ic->ic_if;
+	int cur;
+
+	cur = sc->sc_txstart;
+	sc->sc_txstart = cur = (cur + 1) % WI_NTXBUF;
+	if (--sc->sc_txqueued == 0) {
+		sc->sc_tx_timer = 0;
 		ifp->if_flags &= ~IFF_OACTIVE;
-	else {
-		if (wi_cmd(sc, WI_CMD_TX | WI_RECLAIM, sc->sc_txd[cur].d_fid,
-		    0, 0)) {
+	} else {
+		if (wi_sendcmd(sc, WI_CMD_TX | WI_RECLAIM, sc->sc_txd[cur].d_fid)) {
 			printf("%s: xmit failed\n", sc->sc_dev.dv_xname);
 			sc->sc_txd[cur].d_len = 0;
 		} else {
@@ -1687,7 +1697,6 @@
 	SLIST_INSERT_HEAD(&sc->sc_rssdfree, rssd, rd_next);
 out:
 	ifp->if_flags &= ~IFF_OACTIVE;
-	CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_TX);
 }
 
 static void
@@ -1769,7 +1778,6 @@
 		    le16toh(ltbuf[1]), le16toh(ltbuf[0])));
 		break;
 	}
-	CSR_WRITE_2(sc, WI_EVENT_ACK, WI_EV_INFO);
 }
 
 static int
@@ -2360,21 +2368,83 @@
 
 /* Must be called at proper protection level! */
 static int
+wi_sendcmd(struct wi_softc *sc, int cmd, int val0)
+{
+#ifdef HISTOGRAM
+	static int hist3[11];
+	static int hist3count;
+#endif
+	int i;
+
+	/* wait for the busy bit to clear */
+	for (i = 500; i > 0; i--) {	/* 5s */
+		if ((CSR_READ_2(sc, WI_COMMAND) & WI_CMD_BUSY) == 0)
+			break;
+		DELAY(1000);	/* 1 m sec */
+	}
+	if (i == 0) {
+		printf("%s: wi_sendcmd: busy bit won't clear.\n",
+		    sc->sc_dev.dv_xname);
+		return(ETIMEDOUT);
+  	}
+#ifdef HISTOGRAM
+	if (i > 490)
+		hist3[500 - i]++;
+	else
+		hist3[10]++;
+	if (++hist3count == 1000) {
+		hist3count = 0;
+		printf("%s: hist3: %d %d %d %d %d %d %d %d %d %d %d\n",
+		    sc->sc_dev.dv_xname,
+		    hist3[0], hist3[1], hist3[2], hist3[3], hist3[4],
+		    hist3[5], hist3[6], hist3[7], hist3[8], hist3[9],
+		    hist3[10]);
+	}
+#endif
+	CSR_WRITE_2(sc, WI_PARAM0, val0);
+	CSR_WRITE_2(sc, WI_PARAM1, 0);
+	CSR_WRITE_2(sc, WI_PARAM2, 0);
+	CSR_WRITE_2(sc, WI_COMMAND, cmd);
+
+	return 0;
+}
+
+static int
 wi_cmd(struct wi_softc *sc, int cmd, int val0, int val1, int val2)
 {
+#ifdef HISTOGRAM
+	static int hist1[11];
+	static int hist1count;
+	static int hist2[11];
+	static int hist2count;
+#endif
 	int i, status;
 
 	/* wait for the busy bit to clear */
 	for (i = 500; i > 0; i--) {	/* 5s */
 		if ((CSR_READ_2(sc, WI_COMMAND) & WI_CMD_BUSY) == 0)
 			break;
-		DELAY(10*1000);	/* 10 m sec */
+		DELAY(1000);	/* 1 m sec */
 	}
 	if (i == 0) {
 		printf("%s: wi_cmd: busy bit won't clear.\n",
 		    sc->sc_dev.dv_xname);
 		return(ETIMEDOUT);
   	}
+#ifdef HISTOGRAM
+	if (i > 490)
+		hist1[500 - i]++;
+	else
+		hist1[10]++;
+	if (++hist1count == 1000) {
+		hist1count = 0;
+		printf("%s: hist1: %d %d %d %d %d %d %d %d %d %d %d\n",
+		    sc->sc_dev.dv_xname,
+		    hist1[0], hist1[1], hist1[2], hist1[3], hist1[4],
+		    hist1[5], hist1[6], hist1[7], hist1[8], hist1[9],
+		    hist1[10]);
+	}
+#endif
 	CSR_WRITE_2(sc, WI_PARAM0, val0);
 	CSR_WRITE_2(sc, WI_PARAM1, val1);
 	CSR_WRITE_2(sc, WI_PARAM2, val2);
@@ -2390,6 +2460,20 @@
 			break;
 		DELAY(WI_DELAY);
 	}
+#ifdef HISTOGRAM
+	if (i < 100)
+		hist2[i/10]++;
+	else
+		hist2[10]++;
+	if (++hist2count == 1000) {
+		hist2count = 0;
+		printf("%s: hist2: %d %d %d %d %d %d %d %d %d %d %d\n",
+		    sc->sc_dev.dv_xname,
+		    hist2[0], hist2[1], hist2[2], hist2[3], hist2[4],
+		    hist2[5], hist2[6], hist2[7], hist2[8], hist2[9],
+		    hist2[10]);
+	}
+#endif
 
 	status = CSR_READ_2(sc, WI_STATUS);
 
@@ -2413,6 +2497,10 @@
 static int
 wi_seek_bap(struct wi_softc *sc, int id, int off)
 {
+#ifdef HISTOGRAM
+	static int hist4[11];
+	static int hist4count;
+#endif
 	int i, status;
 
 	CSR_WRITE_2(sc, WI_SEL0, id);
@@ -2430,6 +2518,20 @@
 		}
 		DELAY(1);
 	}
+#ifdef HISTOGRAM
+	if (i < 10)
+		hist4[i]++;
+	else
+		hist4[10]++;
+	if (++hist4count == 1000) {
+		hist4count = 0;
+		printf("%s: hist4: %d %d %d %d %d %d %d %d %d %d %d\n",
+		    sc->sc_dev.dv_xname,
+		    hist4[0], hist4[1], hist4[2], hist4[3], hist4[4],
+		    hist4[5], hist4[6], hist4[7], hist4[8], hist4[9],
+		    hist4[10]);
+	}
+#endif
 	if (status & WI_OFF_ERR) {
 		printf("%s: failed in wi_seek to %x/%x\n",
 		    sc->sc_dev.dv_xname, id, off);
Index: wivar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/wivar.h,v
retrieving revision 1.43
diff -u -r1.43 wivar.h
--- wivar.h	10 Feb 2004 00:59:38 -0000	1.43
+++ wivar.h	19 Jul 2004 00:26:25 -0000
@@ -128,8 +128,10 @@
 		int				d_len;
 		int				d_rate;
 	}			sc_txd[WI_NTXBUF];
-	int			sc_txnext;
-	int			sc_txcur;
+	int			sc_txqueue;	/* where to queue next packet */
+	int			sc_txstart;	/* next packet to start */
+	int			sc_txalloc;	/* next packet to allocate */
+	int			sc_txqueued;	/* packets currently queued */
 	struct wi_rssdesc 	sc_rssd[WI_NTXRSS];
 	wi_rssdescq_t		sc_rssdfree;
 	int			sc_tx_timer;

--Boundary-00=_vWx+Ay+kb3DsF7L--