Subject: Re: NetBSD/xen network problems (need help)
To: Mike M. Volokhov <mishka@intostroy.com>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: port-xen
Date: 01/25/2006 20:14:22
--Dxnq1zWXvFF0Q93v
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jan 25, 2006 at 05:44:44PM +0200, Mike M. Volokhov wrote:
> > Hum, nothing looks wrong here. Still no messages in the consoles ?
> 
> Any :-( After three sequental kernel panics in domUs (twice for dom2
> and one for dom1) I've reverted back my kernels. Right now is testing
> your new improvements.
> 
> On the other hand, there are no more hangups and no errors on
> interfaces. Altough, I've small number of output errors (no drops) on
> bridge0, but all they are apperared after kernel panics (see below).

This makes sense. When a domU is in ddb network interrupts are blocked,
so the receive ring isn't drained. Once it's full, the xvif in dom0 will
stop transmitting, and once its send queue is full you'll see errors
at the bridge level.

> > > panic: kernel diagnostic assertion "((pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) == 0" failed: file "../../../../arch/xen/xen/if_xennet.c", line 1036
> > 
> > I can't see how my patch could be related to this, nor how this can
> > happen. This looks like memory corruption, 
> 
> Are you mean physical memory corruptions? Well, I haven't ran any
> memtest on it, but server works very well with all other kernels. The
> memory installed is dual Kingston KVR533D2E4/512 (DDR2, ECC) on
> Supermicro P8SCT mobo (E7221 chipset).

I was more thinking of a software bug, writing a place in memory it shouldn't
leading to bogus values here.

> 
> > or m_pkthdr.len being way
> > too large. If this happens again we'll have to put more printfs to see what
> > really happens.
> > 
> 
> Again, got it already twice for about one hour (it's the same, but here
> is an output for reference):
> 
> panic: kernel diagnostic assertion "((pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) == 0" failed: file "../../../../arch/xen/xen/if_xennet.c", line 1037
> Stopped at      netbsd:cpu_Debugger+0x4:        leave
> cpu_Debugger(c03f8aa8,ffffffff,c06bd600,c06a0f38,c06a0f00) at netbsd:cpu_Debugger+0x4
> panic(c033c780,c03097e7,c0338e80,c0338c60,40d) at netbsd:panic+0x121
> __main(c03097e7,c0338c60,40d,c0338e80,1) at netbsd:__main
> xennet_start(c072f038,c03f89cc,c072f038,2,c03f8a18) at netbsd:xennet_start+0x55a
> ether_output(c072f038,c06bd600,c068c710,c06a8294,c06bd600) at netbsd:ether_output+0x38b
> ip_output(c06bd600,0,c036e2f4,1,0) at netbsd:ip_output+0x547
> ip_forward(c06bd600,0,c072d038,1,c072d038) at netbsd:ip_forward+0x176
> ip_input(c06bd600,c02c2f26,c072d038,c06a0500,0) at netbsd:ip_input+0x29a
> ipintr(fffffffe,20,4,1,c03f8e10) at netbsd:ipintr+0xad
> DDB lost frame for netbsd:Xsoftnet+0x4f, trying 0xc03f8dd0
> Xsoftnet() at netbsd:Xsoftnet+0x4f

Always ip_forward. OK, can you use the attached patch instead ?
I hope this will give us more details about why we get this strange packet.

-- 
Manuel Bouyer <bouyer@antioche.eu.org>
     NetBSD: 26 ans d'experience feront toujours la difference
--

--Dxnq1zWXvFF0Q93v
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff

Index: if_xennet.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/if_xennet.c,v
retrieving revision 1.13.2.19
diff -u -r1.13.2.19 if_xennet.c
--- if_xennet.c	22 Jan 2006 10:41:59 -0000	1.13.2.19
+++ if_xennet.c	25 Jan 2006 19:13:13 -0000
@@ -638,6 +638,7 @@
 	mmu_update_t *mmu = rx_mmu;
 	multicall_entry_t *mcl = rx_mcl;
 	struct mbuf *m;
+	void *pktp;
 
 	network_tx_buf_gc(sc);
 	ifp->if_flags &= ~IFF_OACTIVE;
@@ -660,7 +661,7 @@
 		/* XXXcl check rx->status for error */
 
                 MGETHDR(m, M_DONTWAIT, MT_DATA);
-                if (m == NULL) {
+                if (__predict_false(m == NULL)) {
 			printf("xennet: rx no mbuf\n");
 			break;
 		}
@@ -687,14 +688,12 @@
 
 		/* Do all the remapping work, and M->P updates, in one
 		 * big hypercall. */
-		if ((mcl - rx_mcl) != 0) {
-			mcl->op = __HYPERVISOR_mmu_update;
-			mcl->args[0] = (unsigned long)rx_mmu;
-			mcl->args[1] = mmu - rx_mmu;
-			mcl->args[2] = 0;
-			mcl++;
-			(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-		}
+		mcl->op = __HYPERVISOR_mmu_update;
+		mcl->args[0] = (unsigned long)rx_mmu;
+		mcl->args[1] = mmu - rx_mmu;
+		mcl->args[2] = 0;
+		mcl++;
+		(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
 		if (0)
 		printf("page mapped at va %08lx -> %08x/%08lx\n",
 		    sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va,
@@ -711,11 +710,23 @@
 		    (void *)(PTE_BASE[x86_btop
 			(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME)));
 
+		pktp = (void *)(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va +
+		    (rx->addr & PAGE_MASK));
+		if ((ifp->if_flags & IFF_PROMISC) == 0) {
+			struct ether_header *eh = pktp;
+			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
+			    memcmp(LLADDR(ifp->if_sadl), eh->ether_dhost,
+			    ETHER_ADDR_LEN) != 0) {
+				m_freem(m);
+				xennet_rx_push_buffer(sc, rx->id);
+				continue; /* packet not for us */
+			}
+		}
 		m->m_pkthdr.rcvif = ifp;
-		if (sc->sc_rx->req_prod != sc->sc_rx->resp_prod) {
+		if (__predict_true(
+		    sc->sc_rx->req_prod != sc->sc_rx->resp_prod)) {
 			m->m_len = m->m_pkthdr.len = rx->status;
-			MEXTADD(m, (void *)(sc->sc_rx_bufa[rx->id].xb_rx.
-			    xbrx_va + (rx->addr & PAGE_MASK)), rx->status,
+			MEXTADD(m, pktp, rx->status,
 			    M_DEVBUF, xennet_rx_mbuf_free,
 			    &sc->sc_rx_bufa[rx->id]);
 		} else {
@@ -726,14 +737,13 @@
 			 */
 			m->m_len = MHLEN;
 			m->m_pkthdr.len = 0;
-			m_copyback(m, 0, rx->status, 
-			    (caddr_t)(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va +
-			    (rx->addr & PAGE_MASK)));
+			m_copyback(m, 0, rx->status, pktp);
 			xennet_rx_push_buffer(sc, rx->id);
 			if (m->m_pkthdr.len < rx->status) {
+				/* out of memory, just drop packets */
 				ifp->if_ierrors++;
 				m_freem(m);
-				break;
+				continue;
 			}
 		}
 
@@ -1009,22 +1019,31 @@
 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, caddr_t));
 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
 
-			m_freem(m);
-			m = new_m;
-			if ((m->m_flags & M_EXT) != 0) {
-				pa = m->m_ext.ext_paddr;
-				KASSERT(m->m_data == m->m_ext.ext_buf);
+			if ((new_m->m_flags & M_EXT) != 0) {
+				pa = new_m->m_ext.ext_paddr;
+				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
 				KASSERT(pa != M_PADDR_INVALID);
 			} else {
-				pa = m->m_paddr;
+				pa = new_m->m_paddr;
 				KASSERT(pa != M_PADDR_INVALID);
-				KASSERT(m->m_data == M_BUFADDR(m));
-				pa += M_BUFOFFSET(m);
+				KASSERT(new_m->m_data == M_BUFADDR(new_m));
+				pa += M_BUFOFFSET(new_m);
+			}
+			if (((pa ^ (pa + new_m->m_pkthdr.len)) & PG_FRAME) != 0) {
+				printf("pa %p len %d m_flags 0x%x\n",
+				    (void *)pa, new_m->m_pkthdr.len, new_m->m_flags);
+				panic("xennet_start1: buffer crosses page");
 			}
+			m_freem(m);
+			m = new_m;
 		} else
 			IFQ_DEQUEUE(&ifp->if_snd, m);
 
-		KASSERT(((pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) == 0);
+		if (((pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) != 0) {
+			printf("pa %p len %d m_flags 0x%x\n",
+			    (void *)pa, m->m_pkthdr.len, m->m_flags);
+			panic("xennet_start2: buffer crosses page");
+		}
 
 		bufid = get_bufarray_entry(sc->sc_tx_bufa);
 		KASSERT(bufid < NETIF_TX_RING_SIZE);

--Dxnq1zWXvFF0Q93v--