Subject: Re: bge on big endian anyone?
To: None <garrett_damore@tadpole.com>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: current-users
Date: 11/23/2005 07:11:30
In article <43838C4A.1020402@tadpole.com>
garrett_damore@tadpole.com wrote:

> That didn't fix it -- probably there is another endian error somewhere. 
> But the knowledge that bge is known to be busted on big-endian machines
> is quite helpful.

Hmm, more bswap ops are needed (for all members in struct bge_ring_data)?

XXX1: It seems ugly to use values stored in DMA descriptors
      (which might be uncached) to write registers by bus_space ops.
XXX2: bge_set_hostaddr() is used both for DMA descs and register write.
XXX3: RCB_WRITE_4 macro seems awful.
---
Izumi Tsutsui


Index: if_bge.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.93
diff -u -r1.93 if_bge.c
--- if_bge.c	6 Sep 2005 15:42:21 -0000	1.93
+++ if_bge.c	22 Nov 2005 22:08:24 -0000
@@ -909,8 +909,8 @@
 	bge_set_hostaddr(&r->bge_addr,
 	    dmamap->dm_segs[0].ds_addr);
 	r->bge_flags = BGE_RXBDFLAG_END;
-	r->bge_len = m_new->m_len;
-	r->bge_idx = i;
+	r->bge_len = htole16(m_new->m_len);
+	r->bge_idx = htole16(i);
 
 	bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
 	    offsetof(struct bge_ring_data, bge_rx_std_ring) +
@@ -969,9 +969,9 @@
 	r = &sc->bge_rdata->bge_rx_jumbo_ring[i];
 	sc->bge_cdata.bge_rx_jumbo_chain[i] = m_new;
 	bge_set_hostaddr(&r->bge_addr, BGE_JUMBO_DMA_ADDR(sc, m_new));
-	r->bge_flags = BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING;
-	r->bge_len = m_new->m_len;
-	r->bge_idx = i;
+	r->bge_flags = htole16(BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING);
+	r->bge_len = htole16(m_new->m_len);
+	r->bge_idx = htole16(i);
 
 	bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
 	    offsetof(struct bge_ring_data, bge_rx_jumbo_ring) +
@@ -1053,7 +1053,7 @@
 
 	rcb = &sc->bge_rdata->bge_info.bge_jumbo_rx_rcb;
 	rcb->bge_maxlen_flags = 0;
-	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
+	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, le32toh(rcb->bge_maxlen_flags));
 
 	CSR_WRITE_4(sc, BGE_MBX_RX_JUMBO_PROD_LO, sc->bge_jumbo);
 
@@ -1530,18 +1530,18 @@
 	    BGE_RING_DMA_ADDR(sc, bge_rx_std_ring));
 	if ((sc->bge_quirks & BGE_QUIRK_5705_CORE) == 0) {
 		rcb->bge_maxlen_flags =
-		    BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0);
+		    htole32(BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN, 0));
 	} else {
-		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0);
+		rcb->bge_maxlen_flags = htole32(BGE_RCB_MAXLEN_FLAGS(512, 0));
 	}
 	if (sc->bge_extram)
-		rcb->bge_nicaddr = BGE_EXT_STD_RX_RINGS;
+		rcb->bge_nicaddr = htole32(BGE_EXT_STD_RX_RINGS);
 	else
-		rcb->bge_nicaddr = BGE_STD_RX_RINGS;
-	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi);
-	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo);
-	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
-	CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr);
+		rcb->bge_nicaddr = htole32(BGE_STD_RX_RINGS);
+	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, le32toh(rcb->bge_hostaddr.bge_addr_hi));
+	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, le32toh(rcb->bge_hostaddr.bge_addr_lo));
+	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, le32toh(rcb->bge_maxlen_flags));
+	CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, le32toh(rcb->bge_nicaddr));
 
 	if ((sc->bge_quirks & BGE_QUIRK_5705_CORE) == 0) {
 		sc->bge_return_ring_cnt = BGE_RETURN_RING_CNT;
@@ -1561,27 +1561,27 @@
 		bge_set_hostaddr(&rcb->bge_hostaddr,
 		    BGE_RING_DMA_ADDR(sc, bge_rx_jumbo_ring));
 		rcb->bge_maxlen_flags =
-		    BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN,
-			BGE_RCB_FLAG_RING_DISABLED);
+		    htole32(BGE_RCB_MAXLEN_FLAGS(BGE_MAX_FRAMELEN,
+			BGE_RCB_FLAG_RING_DISABLED));
 		if (sc->bge_extram)
-			rcb->bge_nicaddr = BGE_EXT_JUMBO_RX_RINGS;
+			rcb->bge_nicaddr = htole32(BGE_EXT_JUMBO_RX_RINGS);
 		else
-			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS;
+			rcb->bge_nicaddr = htole32(BGE_JUMBO_RX_RINGS);
 
 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI,
-		    rcb->bge_hostaddr.bge_addr_hi);
+		    le32toh(rcb->bge_hostaddr.bge_addr_hi));
 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO,
-		    rcb->bge_hostaddr.bge_addr_lo);
+		    le32toh(rcb->bge_hostaddr.bge_addr_lo));
 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS,
-		    rcb->bge_maxlen_flags);
+		    le32toh(rcb->bge_maxlen_flags));
 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr);
 
 		/* Set up dummy disabled mini ring RCB */
 		rcb = &sc->bge_rdata->bge_info.bge_mini_rx_rcb;
-		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0,
-		    BGE_RCB_FLAG_RING_DISABLED);
+		rcb->bge_maxlen_flags = htole32(BGE_RCB_MAXLEN_FLAGS(0,
+		    BGE_RCB_FLAG_RING_DISABLED));
 		CSR_WRITE_4(sc, BGE_RX_MINI_RCB_MAXLEN_FLAGS,
-		    rcb->bge_maxlen_flags);
+		    le32toh(rcb->bge_maxlen_flags));
 
 		bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
 		    offsetof(struct bge_ring_data, bge_info),
@@ -1612,9 +1612,9 @@
 
 	/* Configure TX RCB 0 (we use only the first ring) */
 	rcb_addr = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
-	bge_set_hostaddr(&taddr, BGE_RING_DMA_ADDR(sc, bge_tx_ring));
-	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi);
-	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo);
+	bge_set_hostaddr(&taddr, BGE_RING_DMA_ADDR(sc, bge_tx_ring)); /* XXX */
+	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_hi, le32toh(taddr.bge_addr_hi)); /* XXX */
+	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_lo, le32toh(taddr.bge_addr_lo)); /* XXX */
 	RCB_WRITE_4(sc, rcb_addr, bge_nicaddr,
 		    BGE_NIC_TXRING_ADDR(0, BGE_TX_RING_CNT));
 	if ((sc->bge_quirks & BGE_QUIRK_5705_CORE) == 0) {
@@ -1648,9 +1648,9 @@
 	 * nicaddr field in the RCB isn't used.
 	 */
 	rcb_addr = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
-	bge_set_hostaddr(&taddr, BGE_RING_DMA_ADDR(sc, bge_rx_return_ring));
-	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_hi, taddr.bge_addr_hi);
-	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_lo, taddr.bge_addr_lo);
+	bge_set_hostaddr(&taddr, BGE_RING_DMA_ADDR(sc, bge_rx_return_ring)); /* XXX */
+	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_hi, le32toh(taddr.bge_addr_hi));
+	RCB_WRITE_4(sc, rcb_addr, bge_hostaddr.bge_addr_lo, le32toh(taddr.bge_addr_lo));
 	RCB_WRITE_4(sc, rcb_addr, bge_nicaddr, 0x00000000);
 	RCB_WRITE_4(sc, rcb_addr, bge_maxlen_flags,
 	    BGE_RCB_MAXLEN_FLAGS(sc->bge_return_ring_cnt, 0));
@@ -1715,15 +1715,15 @@
 		    BGE_RING_DMA_ADDR(sc, bge_info.bge_stats));
 		CSR_WRITE_4(sc, BGE_HCC_STATS_TICKS, sc->bge_stat_ticks);
 		CSR_WRITE_4(sc, BGE_HCC_STATS_BASEADDR, BGE_STATS_BLOCK);
-		CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_HI, taddr.bge_addr_hi);
-		CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_LO, taddr.bge_addr_lo);
+		CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_HI, le32toh(taddr.bge_addr_hi));
+		CSR_WRITE_4(sc, BGE_HCC_STATS_ADDR_LO, le32toh(taddr.bge_addr_lo));
 	}
 
 	/* Set up address of status block */
 	bge_set_hostaddr(&taddr, BGE_RING_DMA_ADDR(sc, bge_status_block));
 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_BASEADDR, BGE_STATUS_BLOCK);
-	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI, taddr.bge_addr_hi);
-	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO, taddr.bge_addr_lo);
+	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI, le32toh(taddr.bge_addr_hi));
+	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO, le32toh(taddr.bge_addr_lo));
 	sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx = 0;
 	sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx = 0;
 
@@ -2767,7 +2767,7 @@
 	    BUS_DMASYNC_POSTREAD);
 
 	offset = offsetof(struct bge_ring_data, bge_rx_return_ring);
-	tosync = sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx -
+	tosync = le16toh(sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx) -
 	    sc->bge_rx_saved_considx;
 
 	toff = offset + (sc->bge_rx_saved_considx * sizeof (struct bge_rx_bd));
@@ -2785,7 +2785,7 @@
 	    BUS_DMASYNC_POSTREAD);
 
 	while(sc->bge_rx_saved_considx !=
-	    sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx) {
+	    le16toh(sc->bge_rdata->bge_status_block.bge_idx[0].bge_rx_prod_idx)) {
 		struct bge_rx_bd	*cur_rx;
 		u_int32_t		rxidx;
 		struct mbuf		*m = NULL;
@@ -2793,15 +2793,15 @@
 		cur_rx = &sc->bge_rdata->
 			bge_rx_return_ring[sc->bge_rx_saved_considx];
 
-		rxidx = cur_rx->bge_idx;
+		rxidx = le16toh(cur_rx->bge_idx);
 		BGE_INC(sc->bge_rx_saved_considx, sc->bge_return_ring_cnt);
 
-		if (cur_rx->bge_flags & BGE_RXBDFLAG_JUMBO_RING) {
+		if (le16toh(cur_rx->bge_flags) & BGE_RXBDFLAG_JUMBO_RING) {
 			BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT);
 			m = sc->bge_cdata.bge_rx_jumbo_chain[rxidx];
 			sc->bge_cdata.bge_rx_jumbo_chain[rxidx] = NULL;
 			jumbocnt++;
-			if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
+			if (le16toh(cur_rx->bge_flags) & BGE_RXBDFLAG_ERROR) {
 				ifp->if_ierrors++;
 				bge_newbuf_jumbo(sc, sc->bge_jumbo, m);
 				continue;
@@ -2819,7 +2819,7 @@
 			stdcnt++;
 			dmamap = sc->bge_cdata.bge_rx_std_map[rxidx];
 			sc->bge_cdata.bge_rx_std_map[rxidx] = 0;
-			if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
+			if (le16toh(cur_rx->bge_flags) & BGE_RXBDFLAG_ERROR) {
 				ifp->if_ierrors++;
 				bge_newbuf_std(sc, sc->bge_std, m, dmamap);
 				continue;
@@ -2841,12 +2841,12 @@
                  */
 		if (sc->bge_rx_alignment_bug) {
 			memmove(mtod(m, caddr_t) + ETHER_ALIGN, m->m_data,
-                                cur_rx->bge_len);
+                                le16toh(cur_rx->bge_len));
 			m->m_data += ETHER_ALIGN;
 		}
 #endif
 
-		m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
+		m->m_pkthdr.len = m->m_len = le16toh(cur_rx->bge_len) - ETHER_CRC_LEN;
 		m->m_pkthdr.rcvif = ifp;
 
 #if NBPFILTER > 0
@@ -2859,18 +2859,18 @@
 
 		m->m_pkthdr.csum_flags = M_CSUM_IPv4;
 
-		if ((cur_rx->bge_ip_csum ^ 0xffff) != 0)
+		if ((le16toh(cur_rx->bge_ip_csum) ^ 0xffff) != 0)
 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
 		/*
 		 * Rx transport checksum-offload may also
 		 * have bugs with packets which, when transmitted,
 		 * were `runts' requiring padding.
 		 */
-		if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM &&
+		if (le16toh(cur_rx->bge_flags) & BGE_RXBDFLAG_TCP_UDP_CSUM &&
 		    (/* (sc->_bge_quirks & BGE_QUIRK_SHORT_CKSUM_BUG) == 0 ||*/
 		     m->m_pkthdr.len >= ETHER_MIN_NOPAD)) {
 			m->m_pkthdr.csum_data =
-			    cur_rx->bge_tcp_udp_csum;
+			    le16toh(cur_rx->bge_tcp_udp_csum);
 			m->m_pkthdr.csum_flags |=
 			    (M_CSUM_TCPv4|M_CSUM_UDPv4|
 			     M_CSUM_DATA|M_CSUM_NO_PSEUDOHDR);
@@ -2880,8 +2880,8 @@
 		 * If we received a packet with a vlan tag, pass it
 		 * to vlan_input() instead of ether_input().
 		 */
-		if (cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG)
-			VLAN_INPUT_TAG(ifp, m, cur_rx->bge_vlan_tag, continue);
+		if (le16toh(cur_rx->bge_flags) & BGE_RXBDFLAG_VLAN_TAG)
+			VLAN_INPUT_TAG(ifp, m, le16toh(cur_rx->bge_vlan_tag), continue);
 
 		(*ifp->if_input)(ifp, m);
 	}
@@ -2913,7 +2913,7 @@
 	    BUS_DMASYNC_POSTREAD);
 
 	offset = offsetof(struct bge_ring_data, bge_tx_ring);
-	tosync = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx -
+	tosync = le16toh(sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx) -
 	    sc->bge_tx_saved_considx;
 
 	toff = offset + (sc->bge_tx_saved_considx * sizeof (struct bge_tx_bd));
@@ -2935,12 +2935,12 @@
 	 * frames that have been sent.
 	 */
 	while (sc->bge_tx_saved_considx !=
-	    sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx) {
+	    le16toh(sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx)) {
 		u_int32_t		idx = 0;
 
 		idx = sc->bge_tx_saved_considx;
 		cur_tx = &sc->bge_rdata->bge_tx_ring[idx];
-		if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
+		if (le16toh(cur_tx->bge_flags) & BGE_TXBDFLAG_END)
 			ifp->if_opackets++;
 		m = sc->bge_cdata.bge_tx_chain[idx];
 		if (m != NULL) {
@@ -3012,7 +3012,7 @@
 			    BRGPHY_INTRS);
 		}
 	} else {
-		if (sc->bge_rdata->bge_status_block.bge_status &
+		if (le32toh(sc->bge_rdata->bge_status_block.bge_status) &
 		    BGE_STATFLAG_LINKSTATE_CHANGED) {
 			sc->bge_link = 0;
 			callout_stop(&sc->bge_timeout);
@@ -3421,14 +3421,14 @@
 		if (sc->bge_cdata.bge_tx_chain[frag] != NULL)
 			break;
 		bge_set_hostaddr(&f->bge_addr, dmamap->dm_segs[i].ds_addr);
-		f->bge_len = dmamap->dm_segs[i].ds_len;
-		f->bge_flags = csum_flags;
+		f->bge_len = htole16(dmamap->dm_segs[i].ds_len);
+		f->bge_flags = htole16(csum_flags);
 
 		if (mtag != NULL) {
-			f->bge_flags |= BGE_TXBDFLAG_VLAN_TAG;
-			f->bge_vlan_tag = VLAN_TAG_VALUE(mtag);
+			f->bge_flags |= htole16(BGE_TXBDFLAG_VLAN_TAG);
+			f->bge_vlan_tag = htole16(VLAN_TAG_VALUE(mtag));
 		} else {
-			f->bge_vlan_tag = 0;
+			f->bge_vlan_tag = 0; /* no need bswap */
 		}
 		/*
 		 * Sanity check: avoid coming within 16 descriptors
@@ -3450,7 +3450,7 @@
 	if (frag == sc->bge_tx_saved_considx)
 		return(ENOBUFS);
 
-	sc->bge_rdata->bge_tx_ring[cur].bge_flags |= BGE_TXBDFLAG_END;
+	sc->bge_rdata->bge_tx_ring[cur].bge_flags |= htole16(BGE_TXBDFLAG_END);
 	sc->bge_cdata.bge_tx_chain[cur] = m_head;
 	SLIST_REMOVE_HEAD(&sc->txdma_list, link);
 	sc->txdma[cur] = dma;
Index: if_bgereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_bgereg.h,v
retrieving revision 1.26
diff -u -r1.26 if_bgereg.h
--- if_bgereg.h	28 Aug 2005 19:24:57 -0000	1.26
+++ if_bgereg.h	22 Nov 2005 22:08:25 -0000
@@ -1755,9 +1755,9 @@
 static __inline void
 bge_set_hostaddr(volatile bge_hostaddr *x, bus_addr_t y)
 {
-	x->bge_addr_lo = y & 0xffffffff;
+	x->bge_addr_lo = htole32(y & 0xffffffff);
 	if (sizeof (bus_addr_t) == 8)
-		x->bge_addr_hi = (u_int64_t)y >> 32;
+		x->bge_addr_hi = htole32((u_int64_t)y >> 32);
 	else
 		x->bge_addr_hi = 0;
 }
@@ -1769,11 +1769,7 @@
 	u_int32_t		bge_nicaddr;
 };
 
-#if BYTE_ORDER == BIG_ENDIAN
-#define	BGE_RCB_MAXLEN_FLAGS(maxlen, flags)	((flags) << 16 | (maxlen))
-#else
 #define	BGE_RCB_MAXLEN_FLAGS(maxlen, flags)	((maxlen) << 16 | (flags))
-#endif
 
 #define RCB_WRITE_4(sc, rcb, offset, val) \
 	bus_space_write_4(sc->bge_btag, sc->bge_bhandle, \