Subject: kern/9571: fxp0 hangs across apm suspend on sony vaio z505he
To: None <gnats-bugs@gnats.netbsd.org>
From: John Hawkinson <jhawk@mit.edu>
List: netbsd-bugs
Date: 03/08/2000 01:45:42
>Number:         9571
>Category:       kern
>Synopsis:       fxp0 hangs across apm suspend on sony vaio z505he
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people (Kernel Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Mar  8 01:45:00 2000
>Last-Modified:
>Originator:     John Hawkinson
>Organization:
	MIT
>Release:        NetBSD 1.4.1
>Environment:
	
System: NetBSD zorkmid.mit.edu 1.4.1 NetBSD 1.4.1 (ZORKMID) #62: Wed Mar 8 03:59:10 EST 2000 jhawk@zorkmid.mit.edu:/usr/src/sys/arch/i386/compile/ZORKMID i386


>Description:
	This problem is also present in -current, however the fix
here is again 1.4.1.

The fxp on-board ethernet on the Sony VAIO Z505-series laptops
(specifically my Z505HE) goes out to lunch when the laptop
is APM suspended and then APM resumed. The machine then ends
up hanging hard waiting for a DMA transfer (see seperate PR
to be submitted shortly).

This is quite similar to the problem seen in kern/9370 (card goes
out to lunch after win98 warm boot) and its dupe kern/9548. I spent
quite a while with the Intel data sheets on this one, whereas I
didn't need to do that for fixing kern/9370; on the other hand,
probably if I had just dumped the PCI registers initially
I wouldn't have needed to do that.

The problem is unrelated to ACPI, but basically critical parts of
PCI configuration space get blown away, and it is necessary to
restore them before the card functions again. These are basically
the same registers that need to be restored in kern/9370 across the
ACPI wakeup. Perhaps the fix in kern/9370 might be implemented
differently if this is applied.

Do note that this calls fxp_pci_config_restore() to recover from
this state inside fxp_init(), which is called from fxp_watchdog. This
means it may take ~20 seconds for the card to come back. A better
fix would be to use an APM hook, which I understand exists in -current
but not in 1.4.x.

You'll note that the registers restored here are a little different
from those restored in kern/9370; in this case I restore the ones
that were lost. In kern/9370, I restored the ones that Linux
restored, but only if the card needed them restored to function.
I think the choice here is a bit cleaner.

>How-To-Repeat:
	Buy new SONY laptop, fire up the ethernet interface,
	suspend the laptop, resume it, and watch the ethernet
	not come back, and potentially worse, the laptop hang.
>Fix:
	

*** sys/dev/pci/if_fxpvar.h	2000/03/07 03:18:08	1.1
--- sys/dev/pci/if_fxpvar.h	2000/03/08 09:14:18	1.2
***************
*** 135,140 ****
--- 135,143 ----
  struct fxp_softc {
  	struct device sc_dev;		/* generic device structures */
  	void *sc_ih;			/* interrupt handler cookie */
+ 	pci_chipset_tag_t sc_pc;
+ 	pcitag_t sc_tag;
+ 	pcireg_t sc_regs[0x20>>2];      /* saved PCI config registers */
  	bus_space_tag_t sc_st;		/* bus space tag */
  	bus_space_handle_t sc_sh;	/* bus space handle */
  	bus_dma_tag_t sc_dmat;		/* bus dma tag */

*** sys/dev/pci/if_fxp.c	2000/03/07 06:28:59	1.6
--- sys/dev/pci/if_fxp.c	2000/03/08 09:13:41	1.7
***************
*** 113,125 ****
  
  #include <dev/mii/miivar.h>
  
- #include <dev/pci/if_fxpreg.h>
- #include <dev/pci/if_fxpvar.h>
- 
  #include <dev/pci/pcivar.h>
  #include <dev/pci/pcireg.h>
  #include <dev/pci/pcidevs.h>
  
  /*
   * NOTE!  On the Alpha, we have an alignment constraint.  The
   * card DMAs the packet immediately following the RFA.  However,
--- 113,125 ----
  
  #include <dev/mii/miivar.h>
  
  #include <dev/pci/pcivar.h>
  #include <dev/pci/pcireg.h>
  #include <dev/pci/pcidevs.h>
  
+ #include <dev/pci/if_fxpreg.h>
+ #include <dev/pci/if_fxpvar.h>
+ 
  /*
   * NOTE!  On the Alpha, we have an alignment constraint.  The
   * card DMAs the packet immediately following the RFA.  However,
***************
*** 171,176 ****
--- 171,178 ----
  static int fxp_80c24_mediachange __P((struct ifnet *));
  static void fxp_80c24_mediastatus __P((struct ifnet *, struct ifmediareq *));
  
+ static inline void fxp_pci_confreg_restore __P((struct fxp_softc *));
+ 
  static inline void fxp_scb_wait	__P((struct fxp_softc *));
  static int fxp_intr		__P((void *));
  static void fxp_start		__P((struct ifnet *));
***************
*** 216,222 ****
  	while (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) && --i)
  		DELAY(1);
  	if (i == 0)
! 		printf("%s: WARNING: SCB timed out!\n", sc->sc_dev.dv_xname);
  }
  
  static int fxp_match __P((struct device *, struct cfdata *, void *));
--- 218,260 ----
  	while (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) && --i)
  		DELAY(1);
  	if (i == 0)
! 	  printf("%s: WARNING: SCB timed out!\n", sc->sc_dev.dv_xname);
! }
! 
! /*
!  * Restore PCI configuration registers that may have been clobbered.
!  * This is necessary due to bugs on the Sony VAIO Z505-series on-board
!  * ethernet, after an APM suspend/resume.  Ideally this function would
!  * be called from a power-hook after APM resume, but no such hook
!  * exists at this time, so instead we call it when the driver detects
!  * something awry.
!  */
! 
! static inline void
! fxp_pci_confreg_restore(sc)
!         struct fxp_softc *sc;
! {
!     pcireg_t reg;
! 
!     /*
!      * Check to see if the command register is blank -- if so, then
!      * we'll assume that all the cloberable-registers have been
!      * clobbered.
!      */
!     if (((reg = pci_conf_read(sc->sc_pc, sc->sc_tag, PCI_COMMAND_STATUS_REG))
! 	 & 0xffff) == 0) {
! 	     pci_conf_write(sc->sc_pc, sc->sc_tag, PCI_COMMAND_STATUS_REG,
! 	         (reg & 0xffff0000) |
! 		     (sc->sc_regs[PCI_COMMAND_STATUS_REG>>2] & 0xffff));
! 	     pci_conf_write(sc->sc_pc, sc->sc_tag, PCI_BHLC_REG,
! 	         sc->sc_regs[PCI_BHLC_REG>>2]);
! 	     pci_conf_write(sc->sc_pc, sc->sc_tag, PCI_MAPREG_START+0x0,
! 		 sc->sc_regs[(PCI_MAPREG_START+0x0)>>2]);
! 	     pci_conf_write(sc->sc_pc, sc->sc_tag, PCI_MAPREG_START+0x4,
! 		 sc->sc_regs[(PCI_MAPREG_START+0x4)>>2]);
! 	     pci_conf_write(sc->sc_pc, sc->sc_tag, PCI_MAPREG_START+0x8,
! 		 sc->sc_regs[(PCI_MAPREG_START+0x8)>>2]);
!     }
  }
  
  static int fxp_match __P((struct device *, struct cfdata *, void *));
***************
*** 384,389 ****
--- 422,447 ----
  	    PCI_COMMAND_MASTER_ENABLE);
  
  	/*
+ 	 * Under some circumstances (such as APM suspend/resume
+ 	 * cycles), the i82257-family can lose the contents of
+ 	 * critical PCI configuration registers, causing the card to
+ 	 * be non-responsive and useless.  Preserve them here so they
+ 	 * can be later restored.
+ 	 */
+ 	sc->sc_pc = pc;
+ 	sc->sc_tag = pa->pa_tag;
+ 	sc->sc_regs[PCI_COMMAND_STATUS_REG>>2] =
+ 	  pci_conf_read(pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
+ 	sc->sc_regs[PCI_BHLC_REG>>2] =
+ 	  pci_conf_read(pc, pa->pa_tag, PCI_BHLC_REG);
+ 	sc->sc_regs[(PCI_MAPREG_START+0x0)>>2] =
+ 	  pci_conf_read(pc, pa->pa_tag, PCI_MAPREG_START+0x0);
+ 	sc->sc_regs[(PCI_MAPREG_START+0x4)>>2] =
+ 	  pci_conf_read(pc, pa->pa_tag, PCI_MAPREG_START+0x4);
+ 	sc->sc_regs[(PCI_MAPREG_START+0x8)>>2] =
+ 	  pci_conf_read(pc, pa->pa_tag, PCI_MAPREG_START+0x8);
+ 
+ 	/*
  	 * Allocate our interrupt.
  	 */
  	if (pci_intr_map(pc, pa->pa_intrtag, pa->pa_intrpin,
***************
*** 1267,1272 ****
--- 1325,1332 ----
  	struct fxp_cb_ias *cb_ias;
  	struct fxp_cb_tx *txp;
  	int i, s, prm, error;
+ 
+ 	fxp_pci_confreg_restore(sc);
  
  	s = splnet();
  	/*
>Audit-Trail:
>Unformatted: