Subject: Re: port-i386/21665: bus_dmamap_sync needs a memory barrier
To: None <port-i386@netbsd.org, port-amd64@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-amd64
Date: 01/08/2004 19:54:43
hi,

i'll commit the following diff if no one objects.
(not tested on amd64.)

YAMAMOTO Takashi


Index: arch/i386/include/cpufunc.h
===================================================================
--- arch/i386/include/cpufunc.h	(revision 490)
+++ arch/i386/include/cpufunc.h	(revision 492)
@@ -54,6 +54,16 @@ x86_pause(void)
 	__asm __volatile("pause");
 }
 
+static __inline void
+x86_lfence(void)
+{
+
+	/*
+	 * XXX it's better to use real lfence insn if available.
+	 */
+	__asm __volatile("lock; addl $0, 0(%%esp)" : : : "memory");
+}
+
 #ifdef _KERNEL
 
 extern unsigned int cpu_feature;
Index: arch/amd64/include/cpufunc.h
===================================================================
--- arch/amd64/include/cpufunc.h	(revision 490)
+++ arch/amd64/include/cpufunc.h	(revision 492)
@@ -54,6 +54,18 @@ x86_pause(void)
 	/* nothing */
 }
 
+static __inline void
+x86_lfence(void)
+{
+
+	/*
+	 * XXX if lfence isn't available...
+	 *
+	 * memory clobber to avoid compiler reordering.
+	 */
+	__asm __volatile("lfence" : : : "memory");
+}
+
 #ifdef _KERNEL
 
 extern int cpu_feature;
Index: arch/x86/include/bus.h
===================================================================
--- arch/x86/include/bus.h	(revision 490)
+++ arch/x86/include/bus.h	(revision 492)
@@ -72,6 +72,7 @@
 #define _X86_BUS_H_
 
 #include <machine/pio.h>
+#include <machine/cpufunc.h>	/* for x86_lfence */
 
 #ifdef BUS_SPACE_DEBUG
 #include <sys/systm.h> /* for printf() prototype */
@@ -1109,6 +1110,9 @@ struct x86_bus_dma_tag {
 		    int, off_t, int, int);
 };
 
+static __inline void bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t,
+    bus_addr_t, bus_size_t, int) __attribute__((__unused__));
+
 #define	bus_dmamap_create(t, s, n, m, b, f, p)			\
 	(*(t)->_dmamap_create)((t), (s), (n), (m), (b), (f), (p))
 #define	bus_dmamap_destroy(t, p)				\
@@ -1123,9 +1127,15 @@ struct x86_bus_dma_tag {
 	(*(t)->_dmamap_load_raw)((t), (m), (sg), (n), (s), (f))
 #define	bus_dmamap_unload(t, p)					\
 	(*(t)->_dmamap_unload)((t), (p))
-#define	bus_dmamap_sync(t, p, o, l, ops)			\
-	(void)((t)->_dmamap_sync ?				\
-	    (*(t)->_dmamap_sync)((t), (p), (o), (l), (ops)) : (void)0)
+static __inline void
+bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t p, bus_addr_t o, bus_size_t l,
+    int ops)
+{
+	if (ops & BUS_DMASYNC_POSTREAD)
+		x86_lfence();
+	if (t->_dmamap_sync)
+		(*t->_dmamap_sync)(t, p, o, l, ops);
+}
 
 #define	bus_dmamem_alloc(t, s, a, b, sg, n, r, f)		\
 	(*(t)->_dmamem_alloc)((t), (s), (a), (b), (sg), (n), (r), (f))