Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow

To: port-sparc64-maintainer%netbsd.org@localhost,gnats-admin%netbsd.org@localhost,netbsd-bugs%netbsd.org@localhost,he%NetBSD.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
From: Havard Eidnes <he%NetBSD.org@localhost>
Date: Wed, 11 Apr 2012 14:30:04 +0000 (UTC)
The following reply was made to PR port-sparc64/46260; it has been noted by 
GNATS.

From: Havard Eidnes <he%NetBSD.org@localhost>
To: gnats-bugs%NetBSD.org@localhost, jdc%coris.org.uk@localhost
Cc: port-sparc64-maintainer%netbsd.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX
 overflow
Date: Wed, 11 Apr 2012 16:25:57 +0200 (CEST)

 ----Next_Part(Wed_Apr_11_16_25_57_2012_393)--
 Content-Type: Text/Plain; charset=iso-8859-1
 Content-Transfer-Encoding: quoted-printable
 
 > I've taken a look at the OpenBSD driver, and copied their method
 > of detection & reset.  I'm currently testing this, but so far it
 > has not yet triggered.  Diff attached below.
 
 Scratch that diff, here is one which works, but which ends up
 resetting the interface Quite Often, despite the state register
 indicating it's not in overflow mode -- it prints
 
   gem0: rx_watchdog: not in overflow state: 0x10400
 
 Only once in my testing did I see
 
   gem0: rx_watchdog: rd pointer !=3D saved
 
 occur, but it *did* occur.
 
 Regards,
 
 - H=E5vard
 
 ----Next_Part(Wed_Apr_11_16_25_57_2012_393)--
 Content-Type: Text/Plain; charset=us-ascii
 Content-Transfer-Encoding: 7bit
 Content-Disposition: inline; filename=diff
 
 Index: gem.c
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/ic/gem.c,v
 retrieving revision 1.98
 diff -u -r1.98 gem.c
 --- gem.c      2 Feb 2012 19:43:03 -0000       1.98
 +++ gem.c      11 Apr 2012 14:19:52 -0000
 @@ -89,6 +89,7 @@
  int           gem_ioctl(struct ifnet *, u_long, void *);
  void          gem_tick(void *);
  void          gem_watchdog(struct ifnet *);
 +void          gem_rx_watchdog(void *);
  void          gem_pcs_start(struct gem_softc *sc);
  void          gem_pcs_stop(struct gem_softc *sc, int);
  int           gem_init(struct ifnet *);
 @@ -177,6 +178,7 @@
                ifmedia_delete_instance(&sc->sc_mii.mii_media, IFM_INST_ANY);
  
                callout_destroy(&sc->sc_tick_ch);
 +              callout_destroy(&sc->sc_rx_watchdog);
  
                /*FALLTHROUGH*/
        case GEM_ATT_MII:
 @@ -613,6 +615,8 @@
  #endif
  
        callout_init(&sc->sc_tick_ch, 0);
 +      callout_init(&sc->sc_rx_watchdog, 0);
 +      callout_setfunc(&sc->sc_rx_watchdog, gem_rx_watchdog, sc);
  
        sc->sc_att_stage = GEM_ATT_FINISHED;
  
 @@ -764,6 +768,8 @@
        /* Wait till it finishes */
        if (!gem_bitwait(sc, h, GEM_RX_CONFIG, 1, 0))
                aprint_error_dev(sc->sc_dev, "cannot disable read dma\n");
 +      /* Wait 5ms extra. */
 +      delay(5000);
  
        /* Finally, reset the ERX */
        bus_space_write_4(t, h2, GEM_RESET, GEM_RESET_RX);
 @@ -1824,6 +1830,8 @@
                if (gem_add_rxbuf(sc, i) != 0) {
                        GEM_COUNTER_INCR(sc, sc_ev_rxnobuf);
                        ifp->if_ierrors++;
 +                      aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX no buffer space\n");
                        GEM_INIT_RXDESC(sc, i);
                        bus_dmamap_sync(sc->sc_dmatag, rxs->rxs_dmamap, 0,
                            rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_PREREAD);
 @@ -1965,12 +1973,34 @@
        DPRINTF(sc, ("gem_rint: done sc->rxptr %d, complete %d\n",
                sc->sc_rxptr, bus_space_read_4(t, h, GEM_RX_COMPLETION)));
  
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX length error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX alignment error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX CRC error\n");
 +              ifp->if_ierrors += i;
 +      }
 +      if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL)) != 0) {
 +              aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX code violationn");
 +              ifp->if_ierrors += i;
 +      }
 +#if 0
        /* Read error counters ... */
        ifp->if_ierrors +=
            bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT) +
            bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR) +
            bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT) +
            bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL);
 +#endif
  
        /* ... then clear the hardware counters. */
        bus_space_write_4(t, h, GEM_MAC_RX_LEN_ERR_CNT, 0);
 @@ -2209,7 +2239,21 @@
                 */
                if (rxstat & GEM_MAC_RX_OVERFLOW) {
                        ifp->if_ierrors++;
 +                      aprint_error_dev(sc->sc_dev,
 +                          "receive error: RX overflow\n");
                        gem_reset_rxdma(sc);
 +                      /*
 +                       * Apparently a silicon bug causes ERI to hang from 
 +                       * time to time.  So if we detect an RX FIFO overflow,
 +                       * we fire off a timer, and check whether we're still
 +                       * making progress by looking at the RX FIFO write
 +                       * and read pointers.
 +                       */
 +                      sc->sc_rx_fifo_wr_ptr =
 +                              bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
 +                      sc->sc_rx_fifo_rd_ptr =
 +                              bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
 +                      callout_schedule(&sc->sc_rx_watchdog, 400);
                } else if (rxstat & ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT))
                        printf("%s: MAC rx fault, status 0x%02x\n",
                            device_xname(sc->sc_dev), rxstat);
 @@ -2236,6 +2280,61 @@
        return (r);
  }
  
 +void
 +gem_rx_watchdog(void *arg)
 +{
 +      struct gem_softc *sc = arg;
 +      struct ifnet *ifp = &sc->sc_ethercom.ec_if;
 +      bus_space_tag_t t = sc->sc_bustag;
 +      bus_space_handle_t h = sc->sc_h1;
 +      u_int32_t rx_fifo_wr_ptr;
 +      u_int32_t rx_fifo_rd_ptr;
 +      u_int32_t state;
 +
 +      if ((ifp->if_flags & IFF_RUNNING) == 0) {
 +              aprint_error_dev(sc->sc_dev, "receiver not running\n");
 +              return;
 +      }
 +
 +      rx_fifo_wr_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
 +      rx_fifo_rd_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
 +      state = bus_space_read_4(t, h, GEM_MAC_MAC_STATE);
 +      if ((state & GEM_MAC_STATE_OVERFLOW) == GEM_MAC_STATE_OVERFLOW &&
 +          ((rx_fifo_wr_ptr == rx_fifo_rd_ptr) ||
 +           ((sc->sc_rx_fifo_wr_ptr == rx_fifo_wr_ptr) &&
 +            (sc->sc_rx_fifo_rd_ptr == rx_fifo_rd_ptr))))
 +      {
 +              /*
 +               * The RX state machine is still in overflow state and
 +               * the RX FIFO write and read pointers seem to be
 +               * stuck.  Whack the chip over the head to get things
 +               * going again.
 +               */
 +              aprint_error_dev(sc->sc_dev,
 +                  "receiver stuck in overflow, resetting\n");
 +              gem_init(ifp);
 +      } else {
 +              if ((state & GEM_MAC_STATE_OVERFLOW) != GEM_MAC_STATE_OVERFLOW) 
{
 +                      aprint_error_dev(sc->sc_dev,
 +                              "rx_watchdog: not in overflow state: 0x%x\n",
 +                              state);
 +              }
 +              if (rx_fifo_wr_ptr != rx_fifo_rd_ptr) {
 +                      aprint_error_dev(sc->sc_dev,
 +                              "rx_watchdog: wr & rd ptr different\n");
 +              }
 +              if (sc->sc_rx_fifo_wr_ptr != rx_fifo_wr_ptr) {
 +                      aprint_error_dev(sc->sc_dev,
 +                              "rx_watchdog: wr pointer != saved\n");
 +              }
 +              if (sc->sc_rx_fifo_rd_ptr != rx_fifo_rd_ptr) {
 +                      aprint_error_dev(sc->sc_dev,
 +                              "rx_watchdog: rd pointer != saved\n");
 +              }
 +              aprint_error_dev(sc->sc_dev, "resetting anyway\n");
 +              gem_init(ifp);
 +      }
 +}
  
  void
  gem_watchdog(struct ifnet *ifp)
 Index: gemreg.h
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/ic/gemreg.h,v
 retrieving revision 1.14
 diff -u -r1.14 gemreg.h
 --- gemreg.h   15 Sep 2008 19:43:24 -0000      1.14
 +++ gemreg.h   11 Apr 2012 14:19:52 -0000
 @@ -516,6 +516,8 @@
  #define       GEM_MAC_CC_PASS_PAUSE   0x00000004      /* pass pause up */
  #define       GEM_MAC_CC_BITS         
"\177\020b\0TXPAUSE\0b\1RXPAUSE\0b\2NOPAUSE\0\0"
  
 +/* GEM_MAC_MAC_STATE register bits */
 +#define GEM_MAC_STATE_OVERFLOW        0x03800000
  
  /* 
   * Bits in GEM_MAC_SLOT_TIME register
 Index: gemvar.h
 ===================================================================
 RCS file: /cvsroot/src/sys/dev/ic/gemvar.h,v
 retrieving revision 1.23
 diff -u -r1.23 gemvar.h
 --- gemvar.h   2 Feb 2012 19:43:03 -0000       1.23
 +++ gemvar.h   11 Apr 2012 14:19:52 -0000
 @@ -130,6 +130,7 @@
        struct ethercom sc_ethercom;    /* ethernet common data */
        struct mii_data sc_mii;         /* MII media control */
        struct callout  sc_tick_ch;     /* tick callout */
 +      struct callout  sc_rx_watchdog; /* RX watchdog callout */
  
        /* The following bus handles are to be provided by the bus front-end */
        bus_space_tag_t sc_bustag;      /* bus tag */
 @@ -223,6 +224,10 @@
        struct evcnt sc_ev_rxhist[9];
  #endif
  
 +      /* For use by the RX watchdog */
 +      u_int32_t       sc_rx_fifo_wr_ptr;
 +      u_int32_t       sc_rx_fifo_rd_ptr;
 +
        enum gem_attach_stage   sc_att_stage;
  };
  
 
 ----Next_Part(Wed_Apr_11_16_25_57_2012_393)----
Prev by Date: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Next by Date: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Previous by Thread: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Next by Thread: Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
Indexes:
Home | Main Index | Thread Index | Old Index