Subject: Re: MI SONIC Ethernet driver for mac68k
To: None <port-mac68k@NetBSD.org>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: port-m68k
Date: 06/01/2007 08:32:30
In article <070531205841.M0110699@mirage.ceres.dti.ne.jp> on port-mac68k
I wrote:

> The MI SONIC driver (sys/dev/ic/dp83932.c) should have better
> performance than -current mac68k homegrown version because
> the former does direct DMA from/to mbufs and less memory copies.

...so it seems more proper cache sync ops are required on DMA xfers.

I've put a new kernel with attached patch (mostle based on mvme68k) here:
http://www.ceres.dti.ne.jp/~tsutsui/netbsd/netbsd-mac68k-MI-sonic-20070601.gz

and it actually works even on NFS root.
---
Izumi Tsutsui


Index: arch/m68k/m68k/bus_dma.c
===================================================================
RCS file: /cvsroot/src/sys/arch/m68k/m68k/bus_dma.c,v
retrieving revision 1.22
diff -u -r1.22 bus_dma.c
--- arch/m68k/m68k/bus_dma.c	5 Mar 2007 14:31:08 -0000	1.22
+++ arch/m68k/m68k/bus_dma.c	31 May 2007 23:21:38 -0000
@@ -419,113 +419,108 @@
     bus_size_t len, int ops)
 {
 #if defined(M68040) || defined(M68060)
+	bus_addr_t p, e, ps, pe;
+	bus_size_t seglen;
 	int i;
 #endif
 
+#if defined(M68020) || defined(M68030)
+	if (cputype == CPU_68020 || cputype == CPU_68030)
+		return;
+#endif
+
+	/* Short-circuit for unsupported `ops' */
+	if ((ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) == 0)
+		return;
+
+#if defined(M68040) || defined(M68060)
 	/*
 	 * flush/purge the cache.
-	 * @@@ should probably be fixed to use offset and len args.
 	 */
+	for (i = 0; i < map->dm_nsegs && len != 0; i++) {
+		if (map->dm_segs[i].ds_len <= offset) {
+			/* Segment irrelevant - before requested offset */
+			offset -= map->dm_segs[i].ds_len;
+			continue;
+		}
 
-#if defined(M68040) || defined(M68060)
-	if (ops & BUS_DMASYNC_PREWRITE) {
-		for (i = 0; i < map->dm_nsegs; i++) {
-			bus_addr_t p = map->dm_segs[i].ds_addr;
-			bus_addr_t e = p+map->dm_segs[i].ds_len;
-			/* If the pointers are unaligned, it's ok to flush surrounding cache line */
-			p -= p % 16;
-			if (e % 16) e += 16 - (e % 16);
-#ifdef DIAGNOSTIC
-			if ((p % 16) || (e % 16)) {
-				panic("unaligned address in _bus_dmamap_sync "
-				    "while flushing. address=0x%08lx, "
-				    "end=0x%08lx, ops=0x%x", p, e, ops);
-			}
-#endif
+		/*
+		 * Now at the first segment to sync; nail
+		 * each segment until we have exhausted the
+		 * length.
+		 */
+		seglen = map->dm_segs[i].ds_len - offset;
+		if (seglen > len)
+			seglen = len;
+
+		ps = map->dm_segs[i].ds_addr + offset;
+		pe = ps + seglen;
+
+		if (ops & BUS_DMASYNC_PREWRITE) {
+			p = ps & ~0xf;
+			e = (pe + 0xf) & ~0xf;
+
+			/* flush cache line */
 			while ((p < e) && (p % PAGE_SIZE)) {
-				DCFL(p);		/* flush cache line */
+				DCFL(p);
 				p += 16;
 			}
+
+			/* flush page */
 			while (p + PAGE_SIZE <= e) {
-				DCFP(p);		/* flush page */
+				DCFP(p);
 				p += PAGE_SIZE;
 			}
+
+			/* flush cache line */
 			while (p < e) {
-				DCFL(p);		/* flush cache line */
+				DCFL(p);
 				p += 16;
 			}
-#ifdef DIAGNOSTIC
-			if (p != e) {
-				panic("overrun in _bus_dmamap_sync "
-				    "while flushing. address=0x%08lx, "
-				    "end=0x%08lx, ops=0x%x", p, e, ops);
-			}
-#endif
 		}
-	}
-#endif /* M68040 || M68060 */
 
-	if (ops & BUS_DMASYNC_PREREAD) {
-		switch (cputype) {
-		default:
-#ifdef M68020
-		case CPU_68020:
-			break;
-#endif
-#ifdef M68030
-		case CPU_68030:
-			break;
-#endif
-#if defined(M68040) || defined(M68060)
-#ifdef M68040
-		case CPU_68040:
-#endif
-#ifdef M68060
-		case CPU_68060:
-#endif
-			for (i = 0; i < map->dm_nsegs; i++) {
-				bus_addr_t p = map->dm_segs[i].ds_addr;
-				bus_addr_t e = p+map->dm_segs[i].ds_len;
-				if (p % 16) {
-					p -= p % 16;
-					DCFL(p);
-				}
-				if (e % 16) {
-					e += 16 - (e % 16);
-					DCFL(e - 16);
-				}
-#ifdef DIAGNOSTIC
-				if ((p % 16) || (e % 16)) {
-					panic("unaligned address in "
-					    "_bus_dmamap_sync while purging."
-					    "address=0x%08lx, end=0x%08lx, "
-					    "ops=0x%x", p, e, ops);
-				}
-#endif
-				while ((p < e) && (p % PAGE_SIZE)) {
-					DCPL(p);	/* purge cache line */
-					p += 16;
-				}
-				while (p + PAGE_SIZE <= e) {
-					DCPP(p);	/* purge page */
-					p += PAGE_SIZE;
-				}
-				while (p < e) {
-					DCPL(p);	/* purge cache line */
-					p += 16;
-				}
-#ifdef DIAGNOSTIC
-				if (p != e) {
-					panic("overrun in _bus_dmamap_sync "
-					    "while purging. address=0x%08lx, "
-					    "end=0x%08lx, ops=0x%x", p, e, ops);
-				}
-#endif
+		/*
+		 * Normally, the `PREREAD' flag instructs us to purge the
+		 * cache for the specified offset and length. However, if
+		 * the offset/length is not aligned to a cacheline boundary,
+		 * we may end up purging some legitimate data from the
+		 * start/end of the cache. In such a case, *flush* the
+		 * cachelines at the start and end of the required region.
+		 */
+		else if (ops & BUS_DMASYNC_PREREAD) {
+			/* flush cache lines on boundaries */
+			if (ps & 0xf) {
+				DCFL(ps & ~0xf);
+			}
+			if (pe & 0xf) {
+				DCFL(pe & ~0xf);
+			}
+
+			p = (ps + 15) & ~0xf;
+			e = pe & ~0xf;
+
+			/* purge cache line */
+			while ((p < e) && (p % PAGE_SIZE)) {
+				DCPL(p);
+				p += 16;
+			}
+
+			/* purge page */
+			while (p + PAGE_SIZE <= e) {
+				DCPP(p);
+				p += PAGE_SIZE;
+			}
+
+			/* purge cache line */
+			while (p < e) {
+				DCPL(p);
+				p += 16;
 			}
-			break;
-#endif /* M68040 || M68060 */
 		}
+		offset = 0;
+		len -= seglen;
 	}
+#endif
 }
 
 /*
Index: dev/ic/dp83932.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/dp83932.c,v
retrieving revision 1.14
diff -u -r1.14 dp83932.c
--- dev/ic/dp83932.c	4 Mar 2007 06:01:54 -0000	1.14
+++ dev/ic/dp83932.c	31 May 2007 23:21:38 -0000
@@ -649,11 +649,15 @@
 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 			tda32 = &sc->sc_tda32[i];
 			status = sonic32toh(sc, tda32->tda_status);
+			SONIC_CDTXSYNC32(sc, i,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 		} else {
 			SONIC_CDTXSYNC16(sc, i,
 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 			tda16 = &sc->sc_tda16[i];
 			status = sonic16toh(sc, tda16->tda_status);
+			SONIC_CDTXSYNC16(sc, i,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 		}
 
 		if ((status & ~(TCR_EXDIS|TCR_CRCI|TCR_POWC|TCR_PINT)) == 0)
@@ -713,6 +717,8 @@
 			SONIC_CDRXSYNC32(sc, i,
 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 			rda32 = &sc->sc_rda32[i];
+			SONIC_CDRXSYNC32(sc, i,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 			if (rda32->rda_inuse != 0)
 				break;
 			status = sonic32toh(sc, rda32->rda_status);
@@ -724,6 +730,8 @@
 			SONIC_CDRXSYNC16(sc, i,
 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 			rda16 = &sc->sc_rda16[i];
+			SONIC_CDRXSYNC16(sc, i,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 			if (rda16->rda_inuse != 0)
 				break;
 			status = sonic16toh(sc, rda16->rda_status);