Subject: Re: sip*: reset in interrupt logic
To: Jun-ichiro itojun Hagino <itojun@iijlab.net>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 08/10/2002 15:54:06
--A9z/3b/E4MkkD+7G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Mon, Aug 05, 2002 at 10:09:42AM +0900, Jun-ichiro itojun Hagino wrote:

 > 	with my Net4501, it hangs up before the above message is printed.
 > 	here's the printf trace - looks to me some interrupt source is not
 > 	cleared (not sure why NOT calling sip_reset() solves the problem).

I finally got a chance to debug this today.  The problem is that we were
ignoring the "reset complete" interrupts, but in doing so mistakenly thought
that the chip encountered a fatal error, and then reset it, which caused
another "reset complete" interrupt, etc.

I'm not sure WHY we get these now (likely something to do with the
interrupt mitigation code I added a while ago), nor do I understand why
they are not seen on the DP83820, but in any case, the following patch
fixes the problem.  I'll check this into the trunk.  Can you please tell
me if a 1.6 branch kernel has the same problem?

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--A9z/3b/E4MkkD+7G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=sip-patch

Index: if_sip.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/pci/if_sip.c,v
retrieving revision 1.61
diff -c -r1.61 if_sip.c
*** if_sip.c	2002/07/11 18:07:56	1.61
--- if_sip.c	2002/08/10 22:52:29
***************
*** 258,263 ****
--- 256,262 ----
  	struct evcnt sc_ev_txdintr;	/* Tx descriptor interrupts */
  	struct evcnt sc_ev_txiintr;	/* Tx idle interrupts */
  	struct evcnt sc_ev_rxintr;	/* Rx interrupts */
+ 	struct evcnt sc_ev_hiberr;	/* HIBERR interrupts */
  #ifdef DP83820
  	struct evcnt sc_ev_rxipsum;	/* IP checksums checked in-bound */
  	struct evcnt sc_ev_rxtcpsum;	/* TCP checksums checked in-bound */
***************
*** 1029,1034 ****
--- 1013,1020 ----
  	    NULL, sc->sc_dev.dv_xname, "txiintr");
  	evcnt_attach_dynamic(&sc->sc_ev_rxintr, EVCNT_TYPE_INTR,
  	    NULL, sc->sc_dev.dv_xname, "rxintr");
+ 	evcnt_attach_dynamic(&sc->sc_ev_hiberr, EVCNT_TYPE_INTR,
+ 	    NULL, sc->sc_dev.dv_xname, "hiberr");
  #ifdef DP83820
  	evcnt_attach_dynamic(&sc->sc_ev_rxipsum, EVCNT_TYPE_MISC,
  	    NULL, sc->sc_dev.dv_xname, "rxipsum");
***************
*** 1573,1587 ****
  #endif /* ! DP83820 */
  
  		if (isr & ISR_HIBERR) {
  #define	PRINTERR(bit, str)						\
! 			if (isr & (bit))				\
! 				printf("%s: %s\n", sc->sc_dev.dv_xname, str)
  			PRINTERR(ISR_DPERR, "parity error");
  			PRINTERR(ISR_SSERR, "system error");
  			PRINTERR(ISR_RMABT, "master abort");
  			PRINTERR(ISR_RTABT, "target abort");
  			PRINTERR(ISR_RXSOVR, "receive status FIFO overrun");
! 			(void) SIP_DECL(init)(ifp);
  #undef PRINTERR
  		}
  	}
--- 1559,1589 ----
  #endif /* ! DP83820 */
  
  		if (isr & ISR_HIBERR) {
+ 			int want_init = 0;
+ 
+ 			SIP_EVCNT_INCR(&sc->sc_ev_hiberr);
+ 
  #define	PRINTERR(bit, str)						\
! 			do {						\
! 				if (isr & (bit)) {			\
! 					printf("%s: %s\n",		\
! 					    sc->sc_dev.dv_xname, str);	\
! 					want_init = 1;			\
! 				}					\
! 			} while (/*CONSTCOND*/0)
! 
  			PRINTERR(ISR_DPERR, "parity error");
  			PRINTERR(ISR_SSERR, "system error");
  			PRINTERR(ISR_RMABT, "master abort");
  			PRINTERR(ISR_RTABT, "target abort");
  			PRINTERR(ISR_RXSOVR, "receive status FIFO overrun");
! 			/*
! 			 * Ignore:
! 			 *	Tx reset complete
! 			 *	Rx reset complete
! 			 */
! 			if (want_init)
! 				(void) SIP_DECL(init)(ifp);
  #undef PRINTERR
  		}
  	}

--A9z/3b/E4MkkD+7G--