Subject: Re: wd woes
To: Marc Wandschneider <marcwan@microsoft.com>
From: Michael L. VanLoon -- Iowa State University <michaelv@iastate.edu>
List: current-users
Date: 11/27/1993 23:25:55
>	In case anybody is working on this stuff, and is interested, I've
>	got come up with some, um... interesting data points on problems
>	with the wd driver (specifically the hanging disk light problem).

>	Three times now, I have been struck by the hanging disk light
>	problem.
[blah blah blah]
>						Marc 'em.

Have you applied the patch that's been posted here several times?
Please apply this and see if it fixes your problem.

Why hasn't this shown up in current yet?  This patch *needs* to be in
current.  I can testify to its worthiness.



To: jkreska@hpmail2.fwrdc.rtsg.mot.com
cc: current-users@sun-lamp.cs.berkeley.edu
Subject: Re: frequent kernel panicing 
In-reply-to: Your message of Fri, 19 Nov 93 14:58:32 -0600.
             <9311192058.AA00290@fwhnm12.fwrdc.rtsg.mot.com> 
Date: Fri, 19 Nov 93 16:18:58 CST
From: Michael L. VanLoon -- Iowa State University <michaelv>


> > Incidentally, I run with 2 IDE hard drives, both completely NetBSD.  I
> > used to get the occasional hard drive hang that has been plaguing
> > people for centuries here (it seems).  I also compiled in that wd.c
> > patch that someone posted here a few days ago, and it must be the fix,
> > cause I haven't had *any* hard drive problems since.  Has this been
> > incorporated into current?  If not, please do so.
> > 
> > 				--Michael

>Michael,
>	I missed that patch for wd.c.  If you still have it could you send it
>to me.
>
>Thanks
>Jeff

Here it is (this is just the NetBSD-current version; I omitted the
FreeBSD version of this patch):

  ----- Begin Included File -----

Due to "popular" demand I'm posting these patches to NetBSD/FreeBSD
instead of mailing them around the world :-)

As many have found out there is a problem when using IDE disks on
both FreeBSD/NetBSD. Following is two patches (one for NetBSD-current
and one for FreeBSD) that fixes the problem with lost intterrupts.
Both fixes is based on a patch posted here some month ago by 
Stefan Behrens?? (sorry I've lost the original article). But anyway it
works (for me :-).
Basically it does a timeout on lost interrupts, starting the operation
again and logging and error message on the console.

The NetBSD version additionally fixes some of the (newer ending) while loops
(that made it work with two IDE disks !)

The FreeBSD version additionally makes the allready present while loop timeouts 
independent of CPU speed, and adds minor numbers for easy access to
dos partitions. (This was actually sent to freebsd-wd@freefall in early
august)

		Enjoy !!

                                           Soeren Schmidt
                                           sos@login.dkuug.dk

NetBSD-current version:
===8<======8<======8<======8<======8<======8<======8<======8<===
*** wd.c.orig	Wed Aug 25 12:52:08 1993
--- wd.c	Wed Nov  3 19:35:35 1993
***************
*** 141,155 ****
  	short dkc_port;
  };
  
- void bad144intern(struct disk *);
- void wddisksort();
- 
  struct	board	wdcontroller[NWDC];
  struct	disk	*wddrives[NWD];		/* table of units */
  struct	buf	wdtab[NWDC];		/* various per-controller info */
  struct	buf	wdutab[NWD];		/* head of queue per drive */
  struct	buf	rwdbuf[NWD];		/* buffers for raw IO */
  long	wdxfer[NWD];			/* count of transfers */
  
  int wdprobe(), wdattach();
  
--- 141,153 ----
  	short dkc_port;
  };
  
  struct	board	wdcontroller[NWDC];
  struct	disk	*wddrives[NWD];		/* table of units */
  struct	buf	wdtab[NWDC];		/* various per-controller info */
  struct	buf	wdutab[NWD];		/* head of queue per drive */
  struct	buf	rwdbuf[NWD];		/* buffers for raw IO */
  long	wdxfer[NWD];			/* count of transfers */
+ int	wdtimeoutstatus[NWD];		/* timeout counters */
  
  int wdprobe(), wdattach();
  
***************
*** 157,168 ****
  	wdprobe, wdattach, "wdc",
  };
  
! void wdustart(struct disk *);
! void wdstart(int);
! int wdcommand(struct disk *, int);
! int wdcontrol(struct buf *);
! int wdsetctlr(dev_t, struct disk *);
! int wdgetctlr(int, struct disk *);
  
  /*
   * Probe for controller.
--- 155,170 ----
  	wdprobe, wdattach, "wdc",
  };
  
! static void wdustart(struct disk *);
! static void wdstart(int);
! static int wdcommand(struct disk *, int);
! static int wdcontrol(struct buf *);
! static int wdsetctlr(dev_t, struct disk *);
! static int wdgetctlr(int, struct disk *);
! static void bad144intern(struct disk *);
! static void wddisksort();
! static int wdreset(int, int, int);
! static int wdtimeout(caddr_t);
  
  /*
   * Probe for controller.
***************
*** 238,244 ****
  	bzero(&wdutab[lunit], sizeof(struct buf));
  	bzero(&rwdbuf[lunit], sizeof(struct buf));
  	wdxfer[lunit] = 0;
! 
  	du->dk_ctrlr = dvp->id_masunit;
  	du->dk_unit = unit;
  	du->dk_lunit = lunit;
--- 240,247 ----
  	bzero(&wdutab[lunit], sizeof(struct buf));
  	bzero(&rwdbuf[lunit], sizeof(struct buf));
  	wdxfer[lunit] = 0;
! 	wdtimeoutstatus[lunit] = 0;
! 	wdtimeout(lunit);
  	du->dk_ctrlr = dvp->id_masunit;
  	du->dk_unit = unit;
  	du->dk_lunit = lunit;
***************
*** 595,602 ****
  	}
      
  	/* if this is a read operation, just go away until it's done.	*/
! 	if (bp->b_flags & B_READ)
  		return;
      
  	/* ready to send data?	*/
  	for (timeout=0; (inb(wdc+wd_altsts) & WDCS_DRQ) == 0; ) {
--- 598,607 ----
  	}
      
  	/* if this is a read operation, just go away until it's done.	*/
! 	if (bp->b_flags & B_READ) {
! 		wdtimeoutstatus[lunit] = 2;
  		return;
+ 	}
      
  	/* ready to send data?	*/
  	for (timeout=0; (inb(wdc+wd_altsts) & WDCS_DRQ) == 0; ) {
***************
*** 617,622 ****
--- 622,628 ----
  		DEV_BSIZE/sizeof(short));
  	du->dk_bc -= DEV_BSIZE;
  	du->dk_bct -= DEV_BSIZE;
+ 	wdtimeoutstatus[lunit] = 2;
  }
  
  /* Interrupt routine for the controller.  Acknowledge the interrupt, check for
***************
*** 629,635 ****
  {
  	register struct	disk *du;
  	register struct buf *bp, *dp;
! 	int status, wdc, ctrlr;
      
  	ctrlr = wdif.if_vec;
  
--- 635,641 ----
  {
  	register struct	disk *du;
  	register struct buf *bp, *dp;
! 	int status, wdc, ctrlr, timeout;
      
  	ctrlr = wdif.if_vec;
  
***************
*** 642,654 ****
  	bp = dp->b_actf;
  	du = wddrives[wdunit(bp->b_dev)];
  	wdc = du->dk_port;
      
  #ifdef	WDDEBUG
  	printf("I%d ", ctrlr);
  #endif
  
! 	while ((status = inb(wdc+wd_status)) & WDCS_BUSY)
! 		;
      
  	/* is it not a transfer, but a control operation? */
  	if (du->dk_state < OPEN) {
--- 648,668 ----
  	bp = dp->b_actf;
  	du = wddrives[wdunit(bp->b_dev)];
  	wdc = du->dk_port;
+ 	wdtimeoutstatus[wdunit(bp->b_dev)] = 0;
      
  #ifdef	WDDEBUG
  	printf("I%d ", ctrlr);
  #endif
  
! 	for (timeout=0; ((status=inb(wdc+wd_status)) & WDCS_BUSY); ) {
! 		DELAY(WDCDELAY);
! 		if (++timeout < WDCNDELAY/20)
! 			continue;
! 		wdstart(ctrlr);
! /* #ifdef WDDEBUG */
! 		printf("wdc%d: timeout in wdintr WDCS_BUSY\n", ctrlr);
! /* #endif */
! 	}
      
  	/* is it not a transfer, but a control operation? */
  	if (du->dk_state < OPEN) {
***************
*** 708,716 ****
  		chk = min(DEV_BSIZE / sizeof(short), du->dk_bc / sizeof(short));
  	
  		/* ready to receive data? */
! 		while ((inb(wdc+wd_status) & WDCS_DRQ) == 0)
! 			;
! 	
  		/* suck in data */
  		insw (wdc+wd_data,
  			(int)bp->b_un.b_addr + du->dk_skip * DEV_BSIZE, chk);
--- 722,738 ----
  		chk = min(DEV_BSIZE / sizeof(short), du->dk_bc / sizeof(short));
  	
  		/* ready to receive data? */
! 		for (timeout=0; (inb(wdc+wd_status) & WDCS_DRQ) == 0; ) {
! 			DELAY(WDCDELAY);
! 			if (++timeout < WDCNDELAY/20)
! 				continue;
! 			wdstart(ctrlr);
! /* #ifdef WDDEBUG */
! 			printf("wdc%d: timeout in wdintr WDCS_DRQ\n", ctrlr);
! /* #endif */
! 			break;
! 		}
! 
  		/* suck in data */
  		insw (wdc+wd_data,
  			(int)bp->b_un.b_addr + du->dk_skip * DEV_BSIZE, chk);
***************
*** 1690,1697 ****
  		dp->b_actl = bp;
  }
  
! wdreset(ctrlr, wdc, err)
! int ctrlr;
  {
  	int stat, timeout;
  
--- 1712,1719 ----
  		dp->b_actl = bp;
  }
  
! static int
! wdreset(int ctrlr, int wdc, int err)
  {
  	int stat, timeout;
  
***************
*** 1714,1717 ****
--- 1736,1770 ----
  	if(timeout>WDCNDELAY_DEBUG)
  		printf("wdc%d: timeout took %dus\n", ctrlr, WDCDELAY * timeout);
  #endif
+ }
+ 
+ 
+ static int
+ wdtimeout(caddr_t arg)
+ {
+ 	int x = splbio();
+ 	register int unit = (int) arg;
+ 
+ 	if (wdtimeoutstatus[unit]) {
+ 		if (--wdtimeoutstatus[unit] == 0) {
+ 			struct disk *du = wddrives[unit];
+ 			int wdc = du->dk_port;
+ /* #ifdef WDDEBUG */
+ 			printf("wd%d: lost interrupt - status %x, error %x\n",
+ 			       unit, inb(wdc+wd_status), inb(wdc+wd_error));
+ /* #endif */
+ 			outb(wdc+wd_ctlr, (WDCTL_RST|WDCTL_IDS));
+ 			DELAY(1000);
+ 			outb(wdc+wd_ctlr, WDCTL_IDS);
+ 			DELAY(1000);
+ 			(void) inb(wdc+wd_error);
+ 			outb(wdc+wd_ctlr, WDCTL_4BIT);
+ 			du->dk_skip = 0;
+ 			du->dk_flags |= DKFL_SINGLE;
+ 			wdstart(du->dk_ctrlr);		/* start controller */
+ 		}
+ 	}
+ 	timeout((timeout_t)wdtimeout, (caddr_t)unit, 50);
+ 	splx(x);
+ 	return (0);
  }
===8<======8<======8<======8<======8<======8<======8<======8<===

  ----- End Included File -----

				--Michael

------------------------------------------------------------------------------
    Michael L. VanLoon  --  michaelv@iastate.edu  --  gg.mlv@isumvs.bitnet
 Iowa State University of Science and Technology -- The way cool place to be!
   Project Vincent Systems Staff, Iowa State University Computation Center
------------------------------------------------------------------------------

------------------------------------------------------------------------------