Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: NetBSD/xen network problems (need help)



On Tue, Jan 24, 2006 at 04:38:48PM +0200, Mike M. Volokhov wrote:
> 
> Well, behaviour has been changed. I've rebuilt all kernels and
> restarted the system. After that all worked fine for some time, and
> after a hour or so the whole system (including dom0) has been frozen
> again. Network stats:
> 
> Name     Address              Ipkts Ierrs    Opkts Oerrs Colls
> bge0     00:30:48:84:cf:98    31867  3714    24943     0     0
> bge1     00:30:48:84:cf:99    23117   989    28967     0     0
> lo0                             206     0      206     0     0
> bridge0                       56841     0    58398  1312     0
> bridge1                       52077     0    52074     0     0
> xvif1.0  aa:00:00:17:e9:7f      532    28     1444     0     0
> xvif2.0  aa:00:00:51:08:e4    24353     0    29746     0     0
> xvif2.1  aa:00:00:51:08:e5    28960     0    22879     0     0
> xvif3.0  aa:00:00:08:e9:d2       90     0     1055     0     0
> xvif4.0  aa:00:00:49:06:01        4     0      954     0     0

Hum, nothing looks wrong here. Still no messages in the consoles ?

> 
> 
> Heh, and with this patch I've another panic for dom2 (that one with two
> xennet interfaces):
> 
> panic: kernel diagnostic assertion "((pa ^ (pa + m->m_pkthdr.len)) & 
> PG_FRAME) == 0" failed: file "../../../../arch/xen/xen/if_xennet.c", line 1036

I can't see how my patch could be related to this, nor how this can
happen. This looks like memory corruption, or m_pkthdr.len being way
too large. If this happens again we'll have to put more printfs to see what
really happens.


On Tue, Jan 24, 2006 at 05:00:30PM +0200, Mike M. Volokhov wrote:
> 
> And yet another obscure with this patch. Right now I've lost connection
> with all my single-attached domains (i.e. with only one xennet, dom[134]),
> and dom0 showing the output errors gradually increased on bridge0.

OK, this I found why. when dropping packets with the wrong ether address
in if_xennet.c, the receive buffer was not recycled. After some time,
there were no RX buffer at xennet, and the transmit on the xvif stalls.

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: if_xennet.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/if_xennet.c,v
retrieving revision 1.13.2.19
diff -u -r1.13.2.19 if_xennet.c
--- if_xennet.c 22 Jan 2006 10:41:59 -0000      1.13.2.19
+++ if_xennet.c 24 Jan 2006 19:29:10 -0000
@@ -638,6 +638,7 @@
        mmu_update_t *mmu = rx_mmu;
        multicall_entry_t *mcl = rx_mcl;
        struct mbuf *m;
+       void *pktp;
 
        network_tx_buf_gc(sc);
        ifp->if_flags &= ~IFF_OACTIVE;
@@ -660,7 +661,7 @@
                /* XXXcl check rx->status for error */
 
                 MGETHDR(m, M_DONTWAIT, MT_DATA);
-                if (m == NULL) {
+                if (__predict_false(m == NULL)) {
                        printf("xennet: rx no mbuf\n");
                        break;
                }
@@ -687,14 +688,12 @@
 
                /* Do all the remapping work, and M->P updates, in one
                 * big hypercall. */
-               if ((mcl - rx_mcl) != 0) {
-                       mcl->op = __HYPERVISOR_mmu_update;
-                       mcl->args[0] = (unsigned long)rx_mmu;
-                       mcl->args[1] = mmu - rx_mmu;
-                       mcl->args[2] = 0;
-                       mcl++;
-                       (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-               }
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)rx_mmu;
+               mcl->args[1] = mmu - rx_mmu;
+               mcl->args[2] = 0;
+               mcl++;
+               (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
                if (0)
                printf("page mapped at va %08lx -> %08x/%08lx\n",
                    sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va,
@@ -711,11 +710,23 @@
                    (void *)(PTE_BASE[x86_btop
                        (sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME)));
 
+               pktp = (void *)(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va +
+                   (rx->addr & PAGE_MASK));
+               if ((ifp->if_flags & IFF_PROMISC) == 0) {
+                       struct ether_header *eh = pktp;
+                       if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
+                           memcmp(LLADDR(ifp->if_sadl), eh->ether_dhost,
+                           ETHER_ADDR_LEN) != 0) {
+                               m_freem(m);
+                               xennet_rx_push_buffer(sc, rx->id);
+                               continue; /* packet not for us */
+                       }
+               }
                m->m_pkthdr.rcvif = ifp;
-               if (sc->sc_rx->req_prod != sc->sc_rx->resp_prod) {
+               if (__predict_true(
+                   sc->sc_rx->req_prod != sc->sc_rx->resp_prod)) {
                        m->m_len = m->m_pkthdr.len = rx->status;
-                       MEXTADD(m, (void *)(sc->sc_rx_bufa[rx->id].xb_rx.
-                           xbrx_va + (rx->addr & PAGE_MASK)), rx->status,
+                       MEXTADD(m, pktp, rx->status,
                            M_DEVBUF, xennet_rx_mbuf_free,
                            &sc->sc_rx_bufa[rx->id]);
                } else {
@@ -726,14 +737,13 @@
                         */
                        m->m_len = MHLEN;
                        m->m_pkthdr.len = 0;
-                       m_copyback(m, 0, rx->status, 
-                           (caddr_t)(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va +
-                           (rx->addr & PAGE_MASK)));
+                       m_copyback(m, 0, rx->status, pktp);
                        xennet_rx_push_buffer(sc, rx->id);
                        if (m->m_pkthdr.len < rx->status) {
+                               /* out of memory, just drop packets */
                                ifp->if_ierrors++;
                                m_freem(m);
-                               break;
+                               continue;
                        }
                }
 
Index: xennetback.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xennetback.c,v
retrieving revision 1.4.2.7
diff -u -r1.4.2.7 xennetback.c
--- xennetback.c        20 Jan 2006 21:14:47 -0000      1.4.2.7
+++ xennetback.c        24 Jan 2006 19:29:10 -0000
@@ -496,8 +496,8 @@
                        do_event = 1;
 
                XENPRINTF(("%s pkg size %d\n", xneti->xni_if.if_xname, 
txreq->size));
-               if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
-                   (IFF_UP | IFF_RUNNING)) {
+               if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
+                   (IFF_UP | IFF_RUNNING))) {
                        /* interface not up, drop */
                        txresp->id = txreq->id;
                        txresp->status = NETIF_RSP_DROPPED;
@@ -506,8 +506,8 @@
                /*
                 * Do some sanity checks, and map the packet's page.
                 */
-               if (txreq->size < ETHER_HDR_LEN ||
-                  txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
+               if (__predict_false(txreq->size < ETHER_HDR_LEN ||
+                  txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN))) {
                        printf("%s: packet size %d too big\n",
                            ifp->if_xname, txreq->size);
                        txresp->id = txreq->id;
@@ -516,7 +516,8 @@
                        continue;
                }
                /* don't cross page boundaries */
-               if ((txreq->addr & PAGE_MASK) + txreq->size > PAGE_SIZE) {
+               if (__predict_false(
+                   (txreq->addr & PAGE_MASK) + txreq->size > PAGE_SIZE)) {
                        printf("%s: packet cross page boundary\n",
                            ifp->if_xname);
                        txresp->id = txreq->id;
@@ -526,7 +527,8 @@
                }
 
                ma = txreq->addr  & ~PAGE_MASK;
-               if (xen_shm_map(&ma, 1, xneti->domid, &pkt, 0) != 0) {
+               if (__predict_false(
+                   xen_shm_map(&ma, 1, xneti->domid, &pkt, 0) != 0)) {
                        printf("%s: can't map packet page\n", ifp->if_xname);
                        txresp->id = txreq->id;
                        txresp->status = NETIF_RSP_ERROR;
@@ -534,6 +536,18 @@
                        continue;
                }
                pkt |= (txreq->addr & PAGE_MASK);
+               if ((ifp->if_flags & IFF_PROMISC) == 0) {
+                       struct ether_header *eh = (void *)pkt;
+                       if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
+                           memcmp(LLADDR(ifp->if_sadl), eh->ether_dhost,
+                           ETHER_ADDR_LEN) != 0) {
+                               /* packet not for us */
+                               xen_shm_unmap(pkt, &ma, 1, xneti->domid);
+                               txresp->id = txreq->id;
+                               txresp->status = NETIF_RSP_OKAY;
+                               continue;
+                       }
+               }
                /* get a mbuf for this packet */
                MGETHDR(m, M_DONTWAIT, MT_DATA);
                if (m != NULL) {


Home | Main Index | Thread Index | Old Index