Subject: Re: bce(4) and memory > 1GB problem
To: Manuel Bouyer <bouyer@antioche.eu.org>
From: Yorick Hardy <yhardy@uj.ac.za>
List: tech-kern
Date: 01/12/2007 12:42:15
This is a multi-part message in MIME format.
--------------000300090809080100030207
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

My apologies for replying to multiple e-mails in one.

Manuel Bouyer wrote:
> On Mon, Jan 08, 2007 at 04:46:14PM -0800, Jason Thorpe wrote:
>   
>> On Jan 8, 2007, at 1:05 PM, matthew green wrote:
>>
>>     
>>> either way, this will require a bunch of work for each platform to do
>>> the right thing.  the patch proposed seems to be most of what we need
>>> for it on x86 so i see little harm in commiting it.
>>>       
>> ...except to note that the API will almost certainly change.
>>     
>
> Yes. Maybe it should be better to add this as a parameter to
> bus_dmamap_create(), and compute the proper _dm_bounce_thresh at this
> time. But this is a big change in the bus_dma API, and require
> changing a lot of files, unless we keep bus_dmamap_create() as is and
> intruduce a new function with this extra parameter.
>
>   
Is there an advantage to passing by parameter rather than by flag (as in 
my patch) ?
Obviously my patch limits the address to a power of 2, will this be a 
problem?

I think the patch I proposed may address the above paragraph without the 
need to
change many files. I improved the patch (attached) so that the 
additional flags no longer
need to be passed on bus_dmamap_load, only bus_dmamap_create and 
bus_dmamem_alloc.

Izumi Tsutsui wrote:

> bus_dmamem_alloc(9) should also be changed to allocate
> DMA safe memory for such devices.
>
> On some bus controller, it could map any physical memory into
> <1GB range on DMA address space in bus_dmamap_load(9) and
> no need to handle it by bounce buffer in bus_dmamap_sync(9).
The additional flags for bus_dmamem_alloc in the patch I propose
may address this on x86. If I understood the code correctly,
bus_dmamap_load already determines whether a bounce buffer is
necessary case for case, or did you mean even this could be avoided?

Is there any chance a fix will go in for NetBSD 4?

Thanks to everyone for all the input.

-- 
Kind regards,

Yorick Hardy

Tel: +2711 489 3640  | Fax: +2711 489 2616        | Office: C-Ring 517b
Auckland Park Campus | University of Johannesburg | South Africa
Department of Applied Mathematics                 : http://general.uj.ac.za/apm


--------------000300090809080100030207
Content-Type: text/plain;
 name="bce2.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="bce2.patch"

--- sys/arch/x86/include/bus.h.orig	2007-01-10 16:49:27.000000000 +0200
+++ sys/arch/x86/include/bus.h	2007-01-10 16:25:37.000000000 +0200
@@ -1029,6 +1029,20 @@
 #define	BUS_DMA_WRITE		0x200	/* mapping is memory -> device only */
 #define	BUS_DMA_NOCACHE		0x400	/* hint: map non-cached memory */
 
+/*
+ * BUS_DMA_USEWIDTH indicates that the number of bits used
+ * for addressing is specified in the high 16 bits of flags.
+ * For example, for a device which can only address 24 bits
+ * (16MB) we use (BUS_DMA_USEWIDTH | BUS_DMA_WIDTH(24)).
+ * Only necessary if the device cannot use the full width of
+ * the host bus.
+ */
+
+#define BUS_DMA_USEWIDTH	BUS_DMA_BUS1
+#define BUS_DMA_WIDTH(x)	((x) << 16)
+#define BUS_DMA_WIDTHFROM(x)	(((x) >> 16) << 16)
+#define BUS_DMA_HIGHADDR(x)	(1 << ((x) >> 16))
+
 /* Forwards needed by prototypes below. */
 struct mbuf;
 struct uio;
@@ -1163,6 +1177,7 @@
 	bus_size_t	_dm_boundary;	/* don't cross this */
 	bus_addr_t	_dm_bounce_thresh; /* bounce threshold; see tag */
 	int		_dm_flags;	/* misc. flags */
+	int		_dm_alloc_flags; /* flags to pass to bus_dmamem_alloc */
 
 	void		*_dm_cookie;	/* cookie for bus-specific functions */
 
--- sys/arch/x86/pci/pci_machdep.c.orig	2007-01-06 13:42:22.000000000 +0200
+++ sys/arch/x86/pci/pci_machdep.c	2007-01-07 18:05:18.000000000 +0200
@@ -198,11 +198,7 @@
 	_bus_dmamap_load_uio,
 	_bus_dmamap_load_raw,
 	_bus_dmamap_unload,
-#if defined(_LP64) || defined(PAE)
 	_bus_dmamap_sync,
-#else
-	NULL,
-#endif
 	_bus_dmamem_alloc,
 	_bus_dmamem_free,
 	_bus_dmamem_map,
--- sys/arch/x86/x86/bus_dma.c.orig	2007-01-06 12:04:01.000000000 +0200
+++ sys/arch/x86/x86/bus_dma.c	2007-01-10 16:53:41.000000000 +0200
@@ -252,6 +252,7 @@
 	map->_dm_boundary = boundary;
 	map->_dm_bounce_thresh = t->_bounce_thresh;
 	map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
+	map->_dm_alloc_flags = 0;
 	map->dm_maxsegsz = maxsegsz;
 	map->dm_mapsize = 0;		/* no valid mappings */
 	map->dm_nsegs = 0;
@@ -268,6 +269,18 @@
 			goto out;
 	}
 
+	/* address width limit was requested */
+	if(flags & BUS_DMA_USEWIDTH)
+	{
+		if(map->_dm_bounce_thresh == 0)
+			map->_dm_bounce_thresh = BUS_DMA_HIGHADDR(flags);
+		else
+			map->_dm_bounce_thresh = MIN(map->_dm_bounce_thresh,
+			                             BUS_DMA_HIGHADDR(flags));
+		map->_dm_alloc_flags |= BUS_DMA_USEWIDTH |
+		                        BUS_DMA_WIDTHFROM(flags);
+	}
+
 	if (map->_dm_bounce_thresh != 0)
 		cookieflags |= X86_DMA_MIGHT_NEED_BOUNCE;
 
@@ -864,10 +877,18 @@
     bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
     int flags)
 {
-	bus_addr_t high;
+	bus_addr_t high = t->_bounce_alloc_hi;
+
+	if(flags & BUS_DMA_USEWIDTH)
+	{
+		if(high == 0)
+			high = BUS_DMA_HIGHADDR(flags);
+		else
+			high = MIN(high, BUS_DMA_HIGHADDR(flags));
+	}
 
-	if (t->_bounce_alloc_hi != 0 && _BUS_AVAIL_END > t->_bounce_alloc_hi)
-		high = trunc_page(t->_bounce_alloc_hi);
+	if (high != 0 && _BUS_AVAIL_END > high)
+		high = trunc_page(high);
 	else
 		high = trunc_page(_BUS_AVAIL_END);
 
@@ -890,7 +911,8 @@
 	cookie->id_bouncebuflen = round_page(size);
 	error = _bus_dmamem_alloc(t, cookie->id_bouncebuflen,
 	    PAGE_SIZE, map->_dm_boundary, cookie->id_bouncesegs,
-	    map->_dm_segcnt, &cookie->id_nbouncesegs, flags);
+	    map->_dm_segcnt, &cookie->id_nbouncesegs,
+	    flags | map->_dm_alloc_flags);
 	if (error)
 		goto out;
 	error = _bus_dmamem_map(t, cookie->id_bouncesegs,
--- sys/dev/pci/if_bce.c.orig	2007-01-06 11:49:53.000000000 +0200
+++ sys/dev/pci/if_bce.c	2007-01-10 16:29:52.000000000 +0200
@@ -158,6 +158,13 @@
 	    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);			\
 } while (/* CONSTCOND */ 0)
 
+#ifdef BUS_DMA_USEWIDTH
+/* BCM440x can only address up to 1GB, i.e. 30 bits */
+#define BCE_DMA	(BUS_DMA_USEWIDTH | BUS_DMA_WIDTH(30))
+#else 
+#define BCE_DMA	0
+#endif
+
 static	int	bce_probe(struct device *, struct cfdata *, void *);
 static	void	bce_attach(struct device *, struct device *, void *);
 static	int	bce_ioctl(struct ifnet *, u_long, caddr_t);
@@ -377,8 +384,8 @@
 	 * due to the limition above. ??
 	 */
 	if ((error = bus_dmamem_alloc(sc->bce_dmatag,
-	    2 * PAGE_SIZE, PAGE_SIZE, 2 * PAGE_SIZE,
-				      &seg, 1, &rseg, BUS_DMA_NOWAIT))) {
+	    2 * PAGE_SIZE, PAGE_SIZE, 2 * PAGE_SIZE, &seg, 1, &rseg,
+	    BUS_DMA_NOWAIT | BCE_DMA))) {
 		printf("%s: unable to alloc space for ring descriptors, "
 		       "error = %d\n", sc->bce_dev.dv_xname, error);
 		return;
@@ -393,7 +400,7 @@
 	}
 	/* create a dma map for the ring */
 	if ((error = bus_dmamap_create(sc->bce_dmatag,
-	    2 * PAGE_SIZE, 1, 2 * PAGE_SIZE, 0, BUS_DMA_NOWAIT,
+	    2 * PAGE_SIZE, 1, 2 * PAGE_SIZE, 0, BUS_DMA_NOWAIT | BCE_DMA,
 				       &sc->bce_ring_map))) {
 		printf("%s: unable to create ring DMA map, error = %d\n",
 		    sc->bce_dev.dv_xname, error);
@@ -416,7 +423,7 @@
 	/* Create the transmit buffer DMA maps. */
 	for (i = 0; i < BCE_NTXDESC; i++) {
 		if ((error = bus_dmamap_create(sc->bce_dmatag, MCLBYTES,
-		    BCE_NTXFRAGS, MCLBYTES, 0, 0, &sc->bce_cdata.bce_tx_map[i])) != 0) {
+		    BCE_NTXFRAGS, MCLBYTES, 0, BCE_DMA, &sc->bce_cdata.bce_tx_map[i])) != 0) {
 			printf("%s: unable to create tx DMA map, error = %d\n",
 			    sc->bce_dev.dv_xname, error);
 		}
@@ -426,7 +433,7 @@
 	/* Create the receive buffer DMA maps. */
 	for (i = 0; i < BCE_NRXDESC; i++) {
 		if ((error = bus_dmamap_create(sc->bce_dmatag, MCLBYTES, 1,
-		    MCLBYTES, 0, 0, &sc->bce_cdata.bce_rx_map[i])) != 0) {
+		    MCLBYTES, 0, BCE_DMA, &sc->bce_cdata.bce_rx_map[i])) != 0) {
 			printf("%s: unable to create rx DMA map, error = %d\n",
 			    sc->bce_dev.dv_xname, error);
 		}

--------------000300090809080100030207--