Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow

To: port-sparc64-maintainer%netbsd.org@localhost,gnats-admin%netbsd.org@localhost,netbsd-bugs%netbsd.org@localhost,he%NetBSD.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
From: Havard Eidnes <he%NetBSD.org@localhost>
Date: Wed, 11 Apr 2012 09:20:04 +0000 (UTC)

The following reply was made to PR port-sparc64/46260; it has been noted by 
GNATS.

From: Havard Eidnes <he%NetBSD.org@localhost>
To: gnats-bugs%NetBSD.org@localhost, jdc%coris.org.uk@localhost
Cc: port-sparc64-maintainer%netbsd.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX
 overflow
Date: Wed, 11 Apr 2012 11:15:53 +0200 (CEST)

 ----Next_Part(Wed_Apr_11_11_15_53_2012_993)--
 Content-Type: Text/Plain; charset=iso-8859-1
 Content-Transfer-Encoding: quoted-printable
 
 Hi,
 
 I've taken a look at the OpenBSD driver, and copied their method
 of detection & reset.  I'm currently testing this, but so far it
 has not yet triggered.  Diff attached below.
 
 - H=E5vard
 
 ----Next_Part(Wed_Apr_11_11_15_53_2012_993)--
 Content-Type: Text/Plain; charset=us-ascii
 Content-Transfer-Encoding: 7bit
 Content-Disposition: inline; filename=diff
 
 Index: gem.c
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/ic/gem.c,v
 retrieving revision 1.98
 diff -u -r1.98 gem.c
 --- gem.c      2 Feb 2012 19:43:03 -0000       1.98
 +++ gem.c      11 Apr 2012 09:12:46 -0000
 @@ -89,6 +89,7 @@
  int           gem_ioctl(struct ifnet *, u_long, void *);
  void          gem_tick(void *);
  void          gem_watchdog(struct ifnet *);
 +void          gem_rx_watchdog(void *);
  void          gem_pcs_start(struct gem_softc *sc);
  void          gem_pcs_stop(struct gem_softc *sc, int);
  int           gem_init(struct ifnet *);
 @@ -177,6 +178,7 @@
                ifmedia_delete_instance(&sc->sc_mii.mii_media, IFM_INST_ANY);
  
                callout_destroy(&sc->sc_tick_ch);
 +              callout_destroy(&sc->sc_rx_watchdog);
  
                /*FALLTHROUGH*/
        case GEM_ATT_MII:
 @@ -613,6 +615,8 @@
  #endif
  
        callout_init(&sc->sc_tick_ch, 0);
 +      callout_init(&sc->sc_rx_watchdog, 0);
 +      callout_setfunc(&sc->sc_rx_watchdog, gem_rx_watchdog, sc);
  
        sc->sc_att_stage = GEM_ATT_FINISHED;
  
 @@ -1824,6 +1828,8 @@
                if (gem_add_rxbuf(sc, i) != 0) {
                        GEM_COUNTER_INCR(sc, sc_ev_rxnobuf);
                        ifp->if_ierrors++;
 +                      aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX no buffer space\n");
                        GEM_INIT_RXDESC(sc, i);
                        bus_dmamap_sync(sc->sc_dmatag, rxs->rxs_dmamap, 0,
                            rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_PREREAD);
 @@ -1965,12 +1971,34 @@
        DPRINTF(sc, ("gem_rint: done sc->rxptr %d, complete %d\n",
                sc->sc_rxptr, bus_space_read_4(t, h, GEM_RX_COMPLETION)));
  
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX length error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX alignment error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX CRC error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX code violationn");
 +              ifp->if_ierrors += i;
 +      }
 +#if 0
        /* Read error counters ... */
        ifp->if_ierrors +=
            bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT) +
            bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR) +
            bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT) +
            bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL);
 +#endif
  
        /* ... then clear the hardware counters. */
        bus_space_write_4(t, h, GEM_MAC_RX_LEN_ERR_CNT, 0);
 @@ -2209,7 +2237,21 @@
                 */
                if (rxstat & GEM_MAC_RX_OVERFLOW) {
                        ifp->if_ierrors++;
 +                      aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX overflow\n");
                        gem_reset_rxdma(sc);
 +                      /*
 +                       * Apparently a silicon bug causes ERI to hang from 
 +                       * time to time.  So if we detect an RX FIFO overflow,
 +                       * we fire off a timer, and check whether we're still
 +                       * making progress by looking at the RX FIFO write
 +                       * and read pointers.
 +                       */
 +                      sc->sc_rx_fifo_wr_ptr =
 +                              bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
 +                      sc->sc_rx_fifo_rd_ptr =
 +                              bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
 +                      callout_schedule(&sc->sc_rx_watchdog, 400);
                } else if (rxstat & ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT))
                        printf("%s: MAC rx fault, status 0x%02x\n",
                            device_xname(sc->sc_dev), rxstat);
 @@ -2236,6 +2278,40 @@
        return (r);
  }
  
 +void
 +gem_rx_watchdog(void *arg)
 +{
 +      struct gem_softc *sc = arg;
 +      struct ifnet *ifp = &sc->sc_ethercom.ec_if;
 +      bus_space_tag_t t = sc->sc_bustag;
 +      bus_space_handle_t h = sc->sc_h1;
 +      u_int32_t rx_fifo_wr_ptr;
 +      u_int32_t rx_fifo_rd_ptr;
 +      u_int32_t state;
 +
 +      if ((ifp->if_flags & IFF_RUNNING) == 0)
 +              return;
 +
 +      rx_fifo_wr_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
 +      rx_fifo_rd_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
 +      state = bus_space_read_4(t, h, GEM_MAC_MAC_STATE);
 +      if ((state & GEM_MAC_RX_OVERFLOW) == GEM_MAC_RX_OVERFLOW &&
 +          ((rx_fifo_wr_ptr == rx_fifo_rd_ptr) ||
 +           ((sc->sc_rx_fifo_wr_ptr == rx_fifo_wr_ptr) &&
 +            (sc->sc_rx_fifo_rd_ptr == rx_fifo_rd_ptr))))
 +      {
 +              /*
 +               * The RX state machine is still in overflow state and
 +               * the RX FIFO write and read pointers seem to be
 +               * stuck.  Whack the chip over the head to get things
 +               * going again.
 +               */
 +              aprint_error_dev(sc->sc_dev,
 +                  "receiver stuck in overflow, resetting\n");
 +              gem_init(ifp);
 +      }
 +      
 +}
  
  void
  gem_watchdog(struct ifnet *ifp)
 Index: gemvar.h
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/ic/gemvar.h,v
 retrieving revision 1.23
 diff -u -r1.23 gemvar.h
 --- gemvar.h   2 Feb 2012 19:43:03 -0000       1.23
 +++ gemvar.h   11 Apr 2012 09:12:46 -0000
 @@ -130,6 +130,7 @@
        struct ethercom sc_ethercom;    /* ethernet common data */
        struct mii_data sc_mii;         /* MII media control */
        struct callout  sc_tick_ch;     /* tick callout */
 +      struct callout  sc_rx_watchdog; /* RX watchdog callout */
  
        /* The following bus handles are to be provided by the bus front-end */
        bus_space_tag_t sc_bustag;      /* bus tag */
 @@ -223,6 +224,10 @@
        struct evcnt sc_ev_rxhist[9];
  #endif
  
 +      /* For use by the RX watchdog */
 +      u_int32_t       sc_rx_fifo_wr_ptr;
 +      u_int32_t       sc_rx_fifo_rd_ptr;
 +
        enum gem_attach_stage   sc_att_stage;
  };
  
 
 ----Next_Part(Wed_Apr_11_11_15_53_2012_993)----

Prev by Date: Re: port-i386/45704 (Floppy install fails to load //ffs/ffs.kmod and reboots)
Next by Date: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Previous by Thread: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Next by Thread: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Indexes:

Home | Main Index | Thread Index | Old Index