Subject: kern/2841: NCR 53c810 driver is slow. Here's a faster one
To: None <gnats-bugs@gnats.netbsd.org>
From: Dave Huang <khym@bga.com>
List: netbsd-bugs
Date: 10/13/1996 16:13:37
>Number:         2841
>Category:       kern
>Synopsis:       NCR 53c810 driver is slow. Here's a faster one
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people (Kernel Bug People)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Sun Oct 13 14:20:05 1996
>Last-Modified:
>Originator:     Dave Huang
>Organization:
Name: Dave Huang     |   Mammal, mammal / their names are called /
INet: khym@bga.com   |   they raise a paw / the bat, the cat /
FurryMUCK: Dahan     |   dolphin and dog / koala bear and hog -- TMBG
Dahan: Hani G Y+C 20 Y++ L+++ W- C++ T++ A+ E+ S++ V++ F- Q+++ P+ B+ PA+ PL++
>Release:        1.2
>Environment:
System: NetBSD apm2-105.realtime.net 1.2 NetBSD 1.2 (SPIFF) #39: Sun Oct 13 12:58:31 CDT 1996 khym@dahan.metonymy.com:/usr/src/sys/arch/i386/compile/SPIFF i386


>Description:
People have been telling me to send-pr this, so here it is :)


I recently got a Quantum Atlas drive, and was disappointed that using
dd to read the first 128 megs of the disk, I could only get about
2.8MB/s out of it. I've also got a Quantum Capella, which is a slower
drive, but I was getting 5.8MB/s out of it, so I figured something was
wrong with the driver... Also, if I used dd to read from both disk
simultaneously, I only got about 2.2MB/s out of each drive, for
4.4MB/s total... much less than the theoretical max of 10MB/s.

Since the FreeBSD ncr driver is supposed to work a lot better, I
downloaded a FreeBSD boot floppy and fixit floppy and tried the dd
test again... got 7.1MB/s out of the Atlas this time (still 5.8MB/s
>How-To-Repeat:

Connect a Quantum Atlas drive to a NCR53c810 card in a P100, and do
something like:

time dd if=/dev/rsd1d of=/dev/null bs=1m count=128
--------------------------------------------------
NetBSD 1.2 driver
134217728 bytes transferred in 47 secs (2855696 bytes/sec)
       46.94 real         0.01 user         0.82 sys

and be disappointed. Then try it again with a new ncr driver and see:

New driver
134217728 bytes transferred in 19 secs (7064090 bytes/sec)
       18.53 real         0.01 user         0.89 sys


>Fix:

--- /usr/src/sys/dev/pci/ncr.c	Tue Jun  4 06:36:20 1996
+++ ncr.c	Sun Oct 13 12:58:03 1996
@@ -51,6 +51,12 @@
 
 #define NCR_GETCC_WITHMSG
 
+#ifdef	FAILSAFE
+#define	SCSI_NCR_DFLT_TAGS (0)
+#define	MAX_LUN		(1)
+#define	CDROM_ASYNC
+#endif	/* FAILSAFE */
+
 /*==========================================================
 **
 **	Configuration and Debugging
@@ -93,9 +99,9 @@
 **    Used only for disk devices that support tags.
 */
 
-#ifndef SCSI_NCR_MAX_TAGS
-#define SCSI_NCR_MAX_TAGS    (4)
-#endif /* SCSI_NCR_MAX_TAGS */
+#ifndef SCSI_NCR_DFLT_TAGS
+#define SCSI_NCR_DFLT_TAGS    (4)
+#endif /* SCSI_NCR_DFLT_TAGS */
 
 /*==========================================================
 **
@@ -120,7 +126,9 @@
 **    one lun, so take 1 as the default.
 */
 
-#define MAX_LUN     (1)
+#ifndef	MAX_LUN
+#define MAX_LUN     (8)
+#endif	/* MAX_LUN */
 
 /*
 **    The maximum number of jobs scheduled for starting.
@@ -129,7 +137,7 @@
 **    The calculation below is actually quite silly ...
 */
 
-#define MAX_START   (MAX_TARGET + 7 * SCSI_NCR_MAX_TAGS)
+#define MAX_START   (MAX_TARGET + 7 * SCSI_NCR_DFLT_TAGS)
 
 /*
 **    The maximum number of segments a transfer is split into.
@@ -174,23 +182,24 @@
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/kernel.h>
+#ifdef __NetBSD__
+#define bootverbose	1
+#endif
 #ifndef __NetBSD__
+#include <sys/sysctl.h>
 #include <machine/clock.h>
-#include <machine/cpu.h> /* bootverbose */
-#else
-#define bootverbose	1
 #endif
 #include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #endif /* KERNEL */
 
 
 #ifndef __NetBSD__
-#include <sys/devconf.h>
 #include <pci/pcivar.h>
 #include <pci/pcireg.h>
 #include <pci/ncrreg.h>
-extern PRINT_ADDR();
 #else
 #include <sys/device.h>
 #include <machine/bus.h>
@@ -205,9 +214,6 @@
 
 #include <scsi/scsi_all.h>
 #include <scsi/scsiconf.h>
-#ifndef __NetBSD__
-#include <machine/clock.h>
-#endif /* __NetBSD__ */
 
 #if defined(__NetBSD__) && defined(__alpha__)
 /* XXX XXX NEED REAL DMA MAPPING SUPPORT XXX XXX */
@@ -1198,7 +1204,7 @@
 	ncrcmd	skip		[  8];
 	ncrcmd	skip2		[  3];
 	ncrcmd  idle		[  2];
-	ncrcmd	select		[ 24];
+	ncrcmd	select		[ 22];
 	ncrcmd	prepare		[  4];
 	ncrcmd	loadpos		[ 14];
 	ncrcmd	prepare2	[ 24];
@@ -1263,7 +1269,7 @@
 */
 
 #ifdef KERNEL
-static	void	ncr_alloc_ccb	(ncb_p np, struct scsi_xfer * xp);
+static	void	ncr_alloc_ccb	(ncb_p np, u_long target, u_long lun);
 static	void	ncr_complete	(ncb_p np, ccb_p cp);
 static	int	ncr_delta	(struct timeval * from, struct timeval * to);
 static	void	ncr_exception	(ncb_p np);
@@ -1272,11 +1278,11 @@
 static	ccb_p	ncr_get_ccb	(ncb_p np, u_long flags, u_long t,u_long l);
 static	void	ncr_init	(ncb_p np, char * msg, u_long code);
 #ifdef __NetBSD__
-static	int     ncr_intr        (void *);
-#else	/* !__NetBSD__ */
-static	int	ncr_intr	(ncb_p np);
+static	int	ncr_intr	(void *vnp);
+#else
+static	void	ncr_intr	(void *vnp);
 static  U_INT32 ncr_info	(int unit);
-#endif	/* __NetBSD__ */	
+#endif	/* !__NetBSD__ */	
 static	void	ncr_int_ma	(ncb_p np);
 static	void	ncr_int_sir	(ncb_p np);
 static  void    ncr_int_sto     (ncb_p np);
@@ -1294,8 +1300,8 @@
 static	void	ncr_script_copy_and_bind
 				(struct script * script, ncb_p np);
 static  void    ncr_script_fill (struct script * scr);
-static	int	ncr_scatter	(ncb_p np, struct dsb* phys,u_long vaddr,
-				    u_long datalen);
+static	int	ncr_scatter	(struct dsb* phys, vm_offset_t vaddr,
+				 vm_size_t datalen);
 static	void	ncr_setmaxtags	(tcb_p tp, u_long usrtags);
 static	void	ncr_setsync	(ncb_p np, ccb_p cp, u_char sxfer);
 static	void	ncr_settags     (tcb_p tp, lcb_p lp);
@@ -1310,7 +1316,7 @@
 #ifdef __NetBSD__
 static	int	ncr_probe	(struct device *, void *, void *);
 static	void	ncr_attach	(struct device *, struct device *, void *);
-#else /* !__NetBSD */
+#else /* !__NetBSD__ */
 static  char*	ncr_probe       (pcici_t tag, pcidi_t type);
 static	void	ncr_attach	(pcici_t tag, int unit);
 #endif /* __NetBSD__ */
@@ -1332,7 +1338,7 @@
 	"\n$NetBSD: ncr.c,v 1.35.4.1 1996/06/03 20:32:17 cgd Exp $\n";
 #endif
 
-u_long	ncr_version = NCR_VERSION	* 11
+static const u_long	ncr_version = NCR_VERSION	* 11
 	+ (u_long) sizeof (struct ncb)	*  7
 	+ (u_long) sizeof (struct ccb)	*  5
 	+ (u_long) sizeof (struct lcb)	*  3
@@ -1341,13 +1347,16 @@
 #ifdef KERNEL
 
 #ifndef __NetBSD__
-u_long		nncr=MAX_UNITS;
-ncb_p		ncrp [MAX_UNITS];
+static const int nncr=MAX_UNITS;	/* XXX to be replaced by SYSCTL */
+ncb_p         ncrp [MAX_UNITS];		/* XXX to be replaced by SYSCTL */
 #endif /* !__NetBSD__ */
 
 static int ncr_debug = SCSI_DEBUG_FLAGS;
+#ifndef __NetBSD__
+SYSCTL_INT(_debug, OID_AUTO, ncr_debug, CTLFLAG_RW, &ncr_debug, 0, "");
+#endif /* !__NetBSD__ */
 
-int ncr_cache; /* to be aligned _NOT_ static */
+static int ncr_cache; /* to be aligned _NOT_ static */
 
 /*==========================================================
 **
@@ -1379,7 +1388,7 @@
 
 static u_long ncr_count;
 
-struct	pci_device ncr_device = {
+static struct	pci_device ncr_device = {
 	"ncr",
 	ncr_probe,
 	ncr_attach,
@@ -1391,7 +1400,7 @@
 
 #endif /* !__NetBSD__ */
 
-struct scsi_adapter ncr_switch =
+static struct scsi_adapter ncr_switch =
 {
 	ncr_start,
 #ifndef __NetBSD__
@@ -1407,7 +1416,7 @@
 #endif /* !__NetBSD__ */
 };
 
-struct scsi_device ncr_dev =
+static struct scsi_device ncr_dev =
 {
 	NULL,			/* Use default error handler */
 	NULL,			/* have a queue, served by this */
@@ -1677,8 +1686,10 @@
 	*/
 	SCR_MOVE_TBL ^ SCR_MSG_OUT,
 		offsetof (struct dsb, smsg),
+#ifdef undef /* XXX better fail than try to deal with this ... */
 	SCR_JUMPR ^ IFTRUE (WHEN (SCR_MSG_OUT)),
 		-16,
+#endif
 	SCR_CLR (SCR_ATN),
 		0,
 	SCR_COPY (1),
@@ -2747,7 +2758,7 @@
 	**	- struct ccb
 	**	to understand what's going on.
 	*/
-	SCR_REG_SFBR (ssid, SCR_AND, 0x87),
+	SCR_REG_SFBR (ssid, SCR_AND, 0x8F),
 		0,
 	SCR_TO_REG (ctest0),
 		0,
@@ -3081,7 +3092,7 @@
 	int relocs;
 
 #ifndef __NetBSD__
-	np->script = (struct script*) vm_page_alloc_contig
+	np->script = (struct script*) vm_page_alloc_contig 
 	(round_page(sizeof (struct script)), 0x100000, 0xffffffff, PAGE_SIZE);
 #else  /* !__NetBSD___ */
 	np->script = (struct script *)
@@ -3473,6 +3484,14 @@
 	if (!pci_map_mem (config_id, 0x14, &np->vaddr, &np->paddr))
 		return;
 
+	/*
+	**	Make the controller's registers available.
+	**	Now the INB INW INL OUTB OUTW OUTL macros
+	**	can be used safely.
+	*/
+
+	np->reg = (struct ncr_reg*) np->vaddr;
+
 #ifdef NCR_IOMAPPED
 	/*
 	**	Try to map the controller chip into iospace.
@@ -3488,17 +3507,30 @@
 	**	Do chip dependent initialization.
 	*/
 
+	np->maxwide = 0;
+	np->rv_scntl3 = 0x13;	/* default: 40MHz clock */
+	np->ns_sync = 25;
+	np->ns_async = 50;
+
+	/*
+	**	Get the frequency of the chip's clock.
+	**	Find the right value for scntl3.
+	*/
+
 #ifdef __NetBSD__
 	switch (pa->pa_id) {
 #else /* !__NetBSD__ */
 	switch (pci_conf_read (config_id, PCI_ID_REG)) {
 #endif /* __NetBSD__ */
 	case NCR_825_ID:
-	case NCR_875_ID:
 		np->maxwide = 1;
 		break;
-	default:
-		np->maxwide = 0;
+	case NCR_860_ID:
+		np->rv_scntl3 = 0x35;	/* always assume 80MHz clock for 860 */
+		break;
+	case NCR_875_ID:
+		np->maxwide = 1;
+		ncr_getclock(np);
 		break;
 	}
 
@@ -3517,16 +3549,6 @@
 	np->jump_tcb.l_cmd	= SCR_JUMP;
 	np->jump_tcb.l_paddr	= NCB_SCRIPT_PHYS (np, abort);
 
-#ifndef __NetBSD__
-	/*
-	**	Make the controller's registers available.
-	**	Now the INB INW INL OUTB OUTW OUTL macros
-	**	can be used safely.
-	*/
-
-	np->reg = (struct ncr_reg*) np->vaddr;
-#endif
-
 	/*
 	**  Get SCSI addr of host adapter (set by bios?).
 	*/
@@ -3535,13 +3557,6 @@
 	if (!np->myaddr) np->myaddr = SCSI_NCR_MYADDR;
 
 	/*
-	**	Get the value of the chip's clock.
-	**	Find the right value for scntl3.
-	*/
-
-	ncr_getclock (np);
-
-	/*
 	**	Reset chip.
 	*/
 
@@ -3621,6 +3636,7 @@
 	np->sc_link.openings = 1;
 #else /* !__NetBSD__ */
 	np->sc_link.adapter_unit = unit;
+	np->sc_link.adapter_softc = np;
 	np->sc_link.adapter_targ = np->myaddr;
 	np->sc_link.fordriver	 = 0;
 #endif /* !__NetBSD__ */
@@ -3670,6 +3686,11 @@
 	np->lasttime=0;
 
 	/*
+	**  use SIMPLE TAG messages by default
+	*/
+
+	np->order = M_SIMPLE_TAG;
+	/*
 	**  Done.
 	*/
 
@@ -3686,18 +3707,17 @@
 */
 
 #ifdef __NetBSD__
-int
-ncr_intr(arg)
-        void *arg;
-{               
-        ncb_p np = arg;
+static int
 #else /* !__NetBSD__ */
-int
-ncr_intr(np)
-	ncb_p np;
-{
+static void
 #endif /* __NetBSD__ */
+ncr_intr(vnp)
+	void *vnp;
+{
+#ifdef __NetBSD__
 	int n = 0;
+#endif
+	ncb_p np = vnp;
 	int oldspl = splbio();
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("[");
@@ -3710,14 +3730,18 @@
 			ncr_exception (np);
 		} while (INB(nc_istat) & (INTF|SIP|DIP));
 
+#ifdef __NetBSD__
 		n=1;
+#endif
 		np->ticks = 100;
 	};
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("]\n");
 
 	splx (oldspl);
+#ifdef __NetBSD__
 	return (n);
+#endif
 }
 
 /*==========================================================
@@ -3732,11 +3756,7 @@
 
 static INT32 ncr_start (struct scsi_xfer * xp)
 {
-#ifdef __NetBSD__
-	ncb_p np  = xp->sc_link->adapter_softc;
-#else /*__NetBSD__*/
-	ncb_p np  = ncrp[xp->sc_link->adapter_unit];
-#endif/*__NetBSD__*/
+	ncb_p np  = (ncb_p) xp->sc_link->adapter_softc;
 
 	struct scsi_generic * cmd = xp->cmd;
 	ccb_p cp;
@@ -3744,11 +3764,9 @@
 	tcb_p tp = &np->target[xp->sc_link->target];
 
 	int	i, oldspl, segments, flags = xp->flags;
-	u_char	ptr, nego, idmsg;
+	u_char	qidx, nego, idmsg, *msgptr;
 	u_long  msglen, msglen2;
 
-
-
 	/*---------------------------------------------
 	**
 	**   Reset SCSI bus
@@ -3799,10 +3817,25 @@
 		};
 	};
 
+	if ((unsigned)xp->datalen > 128*1024*1024) {
+		PRINT_ADDR(xp);
+		printf ("trying to transfer %8x bytes, mem addr = %p\n", 
+			xp->datalen, xp->data);
+		{
+			int i;
+			PRINT_ADDR(xp);
+			printf ("command: %2x (", cmd->opcode);
+			for (i = 0; i<11; i++)
+				printf (" %2x", cmd->bytes[i]);
+			printf (")\n");
+		}
+	}
+
 	if (DEBUG_FLAGS & DEBUG_TINY) {
 		PRINT_ADDR(xp);
-		printf ("CMD=%x F=%x L=%x ", cmd->opcode,
-			(unsigned)xp->flags, (unsigned) xp->datalen);
+		printf ("CMD=%x F=%x A=%x L=%x ", 
+			cmd->opcode, (unsigned)xp->flags, 
+			(unsigned) xp->data, (unsigned) xp->datalen);
 	}
 
 	/*--------------------------------------------
@@ -3890,10 +3923,21 @@
 
 	if (tp->inqdata[7]) {
 		/*
+		**	negotiate wide transfers ?
+		*/
+
+		if (!tp->widedone) {
+			if (tp->inqdata[7] & INQ7_WIDE16) {
+				nego = NS_WIDE;
+			} else
+				tp->widedone=1;
+		};
+
+		/*
 		**	negotiate synchronous transfers?
 		*/
 
-		if (!tp->period) {
+		if (!nego && !tp->period) {
 			if (SCSI_NCR_MAX_SYNC 
 #if defined (CDROM_ASYNC) || defined (GENERIC)
 			    && ((tp->inqdata[0] & 0x1f) != 5)
@@ -3907,17 +3951,6 @@
 				printf ("asynchronous.\n");
 			};
 		};
-
-		/*
-		**	negotiate wide transfers ?
-		*/
-
-		if (!tp->widedone) {
-			if (tp->inqdata[7] & INQ7_WIDE16) {
-				if (!nego) nego = NS_WIDE;
-			} else
-				tp->widedone=1;
-		};
 	};
 
 	/*---------------------------------------------------
@@ -3955,25 +3988,18 @@
 	*/
 
 	idmsg = M_IDENTIFY | xp->sc_link->lun;
-#ifndef NCR_NO_DISCONNECT
-	/*---------------------------------------------------------------------
-	** Some users have problems with this driver.
-	** I assume that the current problems relate to a conflict between
-	** a disconnect and an immediately following reconnect operation.
-	** With this option one can prevent the driver from using disconnects.
-	** Without disconnects the performance will be severely degraded.
-	** But it may help to trace down the core problem.
-	**---------------------------------------------------------------------
-	*/
 	if ((cp!=&np->ccb) && (np->disc))
 		idmsg |= 0x40;
-#endif
 
-	cp -> scsi_smsg [0] = idmsg;
-	msglen=1;
+	msgptr = cp->scsi_smsg;
+	msglen = 0;
+	msgptr[msglen++] = idmsg;
 
 	if (cp->tag) {
+	    char tag;
 
+	    tag = np->order;
+	    if (tag == 0) {
 		/*
 		**	Ordered write ops, unordered read ops.
 		*/
@@ -3981,31 +4007,23 @@
 		case 0x08:  /* READ_SMALL (6) */
 		case 0x28:  /* READ_BIG  (10) */
 		case 0xa8:  /* READ_HUGE (12) */
-			cp -> scsi_smsg [msglen] = M_SIMPLE_TAG;
-			break;
+		    tag = M_SIMPLE_TAG;
+		    break;
 		default:
-			cp -> scsi_smsg [msglen] = M_ORDERED_TAG;
+		    tag = M_ORDERED_TAG;
 		}
-
-		/*
-		**	can be overwritten by ncrcontrol
-		*/
-		switch (np->order) {
-		case M_SIMPLE_TAG:
-		case M_ORDERED_TAG:
-			cp -> scsi_smsg [msglen] = np->order;
-		};
-		msglen++;
-		cp -> scsi_smsg [msglen++] = cp -> tag;
+	    }
+	    msgptr[msglen++] = tag;
+	    msgptr[msglen++] = cp -> tag;
 	}
 
 	switch (nego) {
 	case NS_SYNC:
-		cp -> scsi_smsg [msglen++] = M_EXTENDED;
-		cp -> scsi_smsg [msglen++] = 3;
-		cp -> scsi_smsg [msglen++] = M_X_SYNC_REQ;
-		cp -> scsi_smsg [msglen++] = tp->minsync;
-		cp -> scsi_smsg [msglen++] = tp->maxoffs;
+		msgptr[msglen++] = M_EXTENDED;
+		msgptr[msglen++] = 3;
+		msgptr[msglen++] = M_X_SYNC_REQ;
+		msgptr[msglen++] = tp->minsync;
+		msgptr[msglen++] = tp->maxoffs;
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			PRINT_ADDR(cp->xfer);
 			printf ("sync msgout: ");
@@ -4014,10 +4032,10 @@
 		};
 		break;
 	case NS_WIDE:
-		cp -> scsi_smsg [msglen++] = M_EXTENDED;
-		cp -> scsi_smsg [msglen++] = 2;
-		cp -> scsi_smsg [msglen++] = M_X_WIDE_REQ;
-		cp -> scsi_smsg [msglen++] = tp->usrwide;
+		msgptr[msglen++] = M_EXTENDED;
+		msgptr[msglen++] = 2;
+		msgptr[msglen++] = M_X_WIDE_REQ;
+		msgptr[msglen++] = tp->usrwide;
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			PRINT_ADDR(cp->xfer);
 			printf ("wide msgout: ");
@@ -4044,7 +4062,7 @@
 	**----------------------------------------------------
 	*/
 
-	segments = ncr_scatter (np, &cp->phys, (vm_offset_t) xp->data,
+	segments = ncr_scatter (&cp->phys, (vm_offset_t) xp->data,
 					(vm_size_t) xp->datalen);
 
 	if (segments < 0) {
@@ -4099,11 +4117,9 @@
 	/*
 	**	message
 	*/
-/*	cp->phys.smsg.addr		= cp->p_scsi_smsg;*/
 	cp->phys.smsg.addr		= CCB_PHYS (cp, scsi_smsg);
 	cp->phys.smsg.size		= msglen;
 
-/*	cp->phys.smsg2.addr		= cp->p_scsi_smsg2;*/
 	cp->phys.smsg2.addr		= CCB_PHYS (cp, scsi_smsg2);
 	cp->phys.smsg2.size		= msglen2;
 	/*
@@ -4114,7 +4130,6 @@
 	/*
 	**	sense command
 	*/
-/*	cp->phys.scmd.addr		= cp->p_sensecmd;*/
 	cp->phys.scmd.addr		= CCB_PHYS (cp, sensecmd);
 	cp->phys.scmd.size		= 6;
 	/*
@@ -4162,11 +4177,11 @@
 	**	insert into start queue.
 	*/
 
-	ptr = np->squeueput + 1;
-	if (ptr >= MAX_START) ptr=0;
-	np->squeue [ptr	  ] = NCB_SCRIPT_PHYS (np, idle);
+	qidx = np->squeueput + 1;
+	if (qidx >= MAX_START) qidx=0;
+	np->squeue [qidx	 ] = NCB_SCRIPT_PHYS (np, idle);
 	np->squeue [np->squeueput] = CCB_PHYS (cp, phys);
-	np->squeueput = ptr;
+	np->squeueput = qidx;
 
 	if(DEBUG_FLAGS & DEBUG_QUEUE)
 		printf ("%s: queuepos=%d tryoffset=%d.\n", ncr_name (np),
@@ -4296,7 +4311,7 @@
 	ncb_profile (np, cp);
 
 	if (DEBUG_FLAGS & DEBUG_TINY)
-		printf ("CCB=%lx STAT=%x/%x\n", (unsigned long)cp & 0xfff,
+		printf ("CCB=%x STAT=%x/%x\n", (unsigned)cp & 0xfff,
 			cp->host_status,cp->scsi_status);
 
 	xp = cp->xfer;
@@ -4372,7 +4387,7 @@
 		/*
 		**	Try to assign a ccb to this nexus
 		*/
-		ncr_alloc_ccb (np, xp);
+		ncr_alloc_ccb (np, xp->sc_link->target, xp->sc_link->lun);
 
 		/*
 		**	On inquire cmd (0x12) save some data.
@@ -4441,8 +4456,13 @@
 		*/
 		xp->error = XS_BUSY;
 
-	} else if ((cp->host_status == HS_SEL_TIMEOUT)
-		|| (cp->host_status == HS_TIMEOUT)) {
+	} else if (cp->host_status == HS_SEL_TIMEOUT) {
+
+		/*
+		**   Device failed selection
+		*/
+		xp->error = XS_SELTIMEOUT;
+	} else if(cp->host_status == HS_TIMEOUT) {
 
 		/*
 		**   No response
@@ -4626,7 +4646,7 @@
 	OUTB (nc_ctest4, 0x08	);	/*  enable master parity checking    */
 	OUTB (nc_stest2, EXT    );	/*  Extended Sreq/Sack filtering     */
 	OUTB (nc_stest3, TE     );	/*  TolerANT enable		     */
-	OUTB (nc_stime0, 0xfb	);	/*  HTH = 1.6sec  STO = 0.1 sec.     */
+	OUTB (nc_stime0, 0x0b	);	/*  HTH = disabled, STO = 0.1 sec.   */
 
 	/*
 	**	Reinitialize usrsync.
@@ -4755,7 +4775,7 @@
 {
 	struct scsi_xfer *xp;
 	tcb_p tp;
-	u_char target = INB (nc_ctest0)&7;
+	u_char target = INB (nc_ctest0) & 0x0f;
 
 	assert (cp);
 	if (!cp) return;
@@ -4763,7 +4783,7 @@
 	xp = cp->xfer;
 	assert (xp);
 	if (!xp) return;
-	assert (target == (xp->sc_link->target & 7));
+	assert (target == (xp->sc_link->target & 0x0f));
 
 	tp = &np->target[target];
 	tp->period= sxfer&0xf ? ((sxfer>>5)+4) * np->ns_sync : 0xffff;
@@ -4812,7 +4832,7 @@
 static void ncr_setwide (ncb_p np, ccb_p cp, u_char wide)
 {
 	struct scsi_xfer *xp;
-	u_short target = INB (nc_ctest0)&7;
+	u_short target = INB (nc_ctest0) & 0x0f;
 	tcb_p tp;
 	u_char	scntl3 = np->rv_scntl3 | (wide ? EWS : 0);
 
@@ -4822,7 +4842,7 @@
 	xp = cp->xfer;
 	assert (xp);
 	if (!xp) return;
-	assert (target == (xp->sc_link->target & 7));
+	assert (target == (xp->sc_link->target & 0x0f));
 
 	tp = &np->target[target];
 	tp->widedone  =  wide+1;
@@ -4885,8 +4905,10 @@
 	**	only disk devices
 	**	only if enabled by user ..
 	*/
-	if ((  tp->inqdata[7] & INQ7_QUEUE) && ((tp->inqdata[0] & 0x1f)==0x00)
-		&& tp->usrtags) {
+	if ((tp->inqdata[7] & INQ7_QUEUE) == 0) {
+	    tp->usrtags=0;
+	}
+	if (tp->usrtags && ((tp->inqdata[0] & 0x1f) == 0x00)) {
 		reqtags = tp->usrtags;
 		if (lp->actlink <= 1)
 			lp->usetags=reqtags;
@@ -5035,6 +5057,7 @@
 			OUTB (nc_istat, SIGP);
 		};
 
+#ifdef undef
 		if (np->latetime>4) {
 			/*
 			**	Although we tried to wake it up,
@@ -5052,7 +5075,7 @@
 			ncr_init (np, "ncr dead ?", HS_TIMEOUT);
 			np->heartbeat = thistime;
 		};
-
+#endif
 		/*----------------------------------------------------
 		**
 		**	handle ccb timeouts
@@ -5142,13 +5165,14 @@
 	**	interrupt on the fly ?
 	*/
 	while ((istat = INB (nc_istat)) & INTF) {
-		if (DEBUG_FLAGS & DEBUG_TINY) printf ("F");
+		if (DEBUG_FLAGS & DEBUG_TINY) printf ("F ");
 		OUTB (nc_istat, INTF);
 		np->profile.num_fly++;
 		ncr_wakeup (np, 0);
 	};
-
-	if (!(istat & (SIP|DIP))) return;
+	if (!(istat & (SIP|DIP))) {
+		return;
+	}
 
 	/*
 	**	Steinbach's Guideline for Systems Programming:
@@ -5580,9 +5604,9 @@
 	    return;
 	}
 	if (cp != np->header.cp) {
-	    printf ("%s: SCSI phase error fixup: CCB address mismatch (0x%08lx != 0x%08lx)\n", 
-		    ncr_name (np), (u_long) cp, (u_long) np->header.cp);
-	    return;
+	    printf ("%s: SCSI phase error fixup: CCB address mismatch (0x%08lx != 0x%08lx) np.ccb = %p\n", 
+		    ncr_name (np), (u_long) cp, (u_long) np->header.cp, &np->ccb);
+/*	    return;*/
 	}
 
 	/*
@@ -5726,7 +5750,7 @@
 	u_char num = INB (nc_dsps);
 	ccb_p	cp=0;
 	u_long	dsa;
-	u_char	target = INB (nc_ctest0) & 7;
+	u_char	target = INB (nc_ctest0) & 0x0f;
 	tcb_p	tp     = &np->target[target];
 	int     i;
 	if (DEBUG_FLAGS & DEBUG_TINY) printf ("I#%d", num);
@@ -5978,9 +6002,18 @@
 		/*
 		**	Check against controller limits.
 		*/
-		fak = (4ul * per - 1) / np->ns_sync - 3;
-		if (ofs && (fak>7))   {chg = 1; ofs = 0;}
-		if (!ofs) fak=7;
+		if (ofs != 0) {
+			fak = (4ul * per - 1) / np->ns_sync - 3;
+			if (fak>7) {
+				chg = 1;
+				ofs = 0;
+			}
+		}
+		if (ofs == 0) {
+			fak = 7;
+			per = 0;
+			tp->minsync = 0;
+		}
 
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			PRINT_ADDR(cp->xfer);
@@ -6035,7 +6068,7 @@
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			PRINT_ADDR(cp->xfer);
 			printf ("sync msgout: ");
-			(void) ncr_show_msg (np->msgin);
+			(void) ncr_show_msg (np->msgout);
 			printf (".\n");
 		}
 
@@ -6133,7 +6166,7 @@
 		if (DEBUG_FLAGS & DEBUG_NEGO) {
 			PRINT_ADDR(cp->xfer);
 			printf ("wide msgout: ");
-			(void) ncr_show_msg (np->msgin);
+			(void) ncr_show_msg (np->msgout);
 			printf (".\n");
 		}
 		break;
@@ -6377,20 +6410,13 @@
 **==========================================================
 */
 
-static	void ncr_alloc_ccb (ncb_p np, struct scsi_xfer * xp)
+static	void ncr_alloc_ccb (ncb_p np, u_long target, u_long lun)
 {
 	tcb_p tp;
 	lcb_p lp;
 	ccb_p cp;
 
-	u_long	target;
-	u_long	lun;
-
 	assert (np != NULL);
-	assert (xp != NULL);
-
-	target = xp->sc_link->target;
-	lun    = xp->sc_link->lun;
 
 	if (target>=MAX_TARGET) return;
 	if (lun   >=MAX_LUN   ) return;
@@ -6424,7 +6450,7 @@
 		tp->jump_lcb.l_paddr = NCB_SCRIPT_PHYS (np, abort);
 		np->jump_tcb.l_paddr = vtophys (&tp->jump_tcb);
 
-		ncr_setmaxtags (tp, SCSI_NCR_MAX_TAGS);
+		ncr_setmaxtags (tp, SCSI_NCR_DFLT_TAGS);
 	}
 
 	/*
@@ -6481,7 +6507,6 @@
 		return;
 
 	if (DEBUG_FLAGS & DEBUG_ALLOC) {
-		PRINT_ADDR(xp);
 		printf ("new ccb @%p.\n", cp);
 	}
 
@@ -6602,7 +6627,7 @@
 */
 
 static	int	ncr_scatter
-	(ncb_p np, struct dsb* phys, vm_offset_t vaddr, vm_size_t datalen)
+	(struct dsb* phys, vm_offset_t vaddr, vm_size_t datalen)
 {
 	u_long	paddr, pnext;
 
@@ -6961,34 +6986,94 @@
 #	define NCR_CLOCK 40
 #endif /* NCR_CLOCK */
 
+/*
+ *	calculate NCR SCSI clock frequency (in KHz)
+ */
+static unsigned
+ncrgetfreq (ncb_p np, int gen)
+{
+	int ms = 0;
+	/*
+	 * Measure GEN timer delay in order 
+	 * to calculate SCSI clock frequency
+	 *
+	 * This code will never execute too
+	 * many loop iterations (if DELAY is 
+	 * reasonably correct). It could get
+	 * too low a delay (too high a freq.)
+	 * if the CPU is slow executing the 
+	 * loop for some reason (an NMI, for
+	 * example). For this reason we will
+	 * if multiple measurements are to be 
+	 * performed trust the higher delay 
+	 * (lower frequency returned).
+	 */
+	OUTB (nc_stest1, 0);	/* make sure clock doubler is OFF	    */
+	OUTW (nc_sien , 0);	/* mask all scsi interrupts		    */
+	(void) INW (nc_sist);	/* clear pending scsi interrupt		    */
+	OUTB (nc_dien , 0);	/* mask all dma interrupts		    */
+	(void) INW (nc_sist);	/* another one, just to be sure :)	    */
+	OUTB (nc_scntl3, 4);	/* set pre-scaler to divide by 3	    */
+	OUTB (nc_stime1, 0);	/* disable general purpose timer	    */
+	OUTB (nc_stime1, gen);	/* set to nominal delay of (1<<gen) * 125us */
+	while (!(INW(nc_sist) & GEN) && ms++ < 1000)
+		DELAY(1000);	/* count ms				    */
+	OUTB (nc_stime1, 0);	/* disable general purpose timer	    */
+	OUTB (nc_scntl3, 0);
+	/*
+	 * Set prescaler to divide by whatever "0" means.
+	 * "0" ought to choose divide by 2, but appears
+	 * to set divide by 3.5 mode in my 53c810 ...
+	 */
+	OUTB (nc_scntl3, 0);
+
+	if (bootverbose)
+	  	printf ("\tDelay (GEN=%d): %u msec\n", gen, ms);
+	/*
+	 * adjust for prescaler, and convert into KHz 
+	 */
+	return ms ? ((1 << gen) * 4440) / ms : 0;
+}
 
 static void ncr_getclock (ncb_p np)
 {
-	u_char	tbl[5] = {6,2,3,4,6};
-	u_char	f;
-	u_char	ns_clock = (1000/NCR_CLOCK);
-
-	/*
-	**	Compute the best value for scntl3.
-	*/
-
-	f = (2 * MIN_SYNC_PD - 1) / ns_clock;
-	if (!f ) f=1;
-	if (f>4) f=4;
-	np -> ns_sync = (ns_clock * tbl[f]) / 2;
-	np -> rv_scntl3 = f<<4;
-
-	f = (2 * MIN_ASYNC_PD - 1) / ns_clock;
-	if (!f ) f=1;
-	if (f>4) f=4;
-	np -> ns_async = (ns_clock * tbl[f]) / 2;
-	np -> rv_scntl3 |= f;
-	if (DEBUG_FLAGS & DEBUG_TIMING)
-		printf ("%s: sclk=%d async=%d sync=%d (ns) scntl3=0x%x\n",
-		ncr_name (np), ns_clock, np->ns_async, np->ns_sync, np->rv_scntl3);
+	unsigned char scntl3;
+	unsigned char stest1;
+	scntl3 = INB(nc_scntl3);
+	stest1 = INB(nc_stest1);
+	  
+	/* always false, except for 875 with clock doubler selected */
+	if ((stest1 & (DBLEN+DBLSEL)) == DBLEN+DBLSEL) {
+		OUTB(nc_stest1, 0);
+		scntl3 = 3;
+	} else {
+		if ((scntl3 & 7) == 0) {
+			unsigned f1, f2;
+			/* throw away first result */
+			(void) ncrgetfreq (np, 11);
+			f1 = ncrgetfreq (np, 11);
+			f2 = ncrgetfreq (np, 11);
+
+			if (bootverbose)
+			  printf ("\tNCR clock is %uKHz, %uKHz\n", f1, f2);
+			if (f1 > f2) f1 = f2;	/* trust lower result	*/
+			if (f1 > 45000) {
+				scntl3 = 5;	/* >45Mhz: assume 80MHz	*/
+			} else {
+				scntl3 = 3;	/* <45Mhz: assume 40MHz	*/
+			}
+		}
+	}
+
+	np->ns_sync   = 25;
+	np->ns_async  = 50;
+	np->rv_scntl3 = ((scntl3 & 0x7) << 4) -0x20 + (scntl3 & 0x7);
+
+	if (bootverbose) {
+		printf ("\tinitial value of SCNTL3 = %02x, final = %02x\n",
+			scntl3, np->rv_scntl3);
+	}
 }
 
 /*=========================================================================*/
 #endif /* KERNEL */
-
-
--- /usr/src/sys/dev/pci/ncr_reg.h	Tue Mar 12 06:24:12 1996
+++ ncr_reg.h	Sat Oct 12 23:00:08 1996
@@ -223,6 +223,8 @@
 /*4c*/  U_INT8    nc_stest0;
 
 /*4d*/  U_INT8    nc_stest1;
+	#define   DBLEN   0x08	/* clock doubler running		*/
+	#define   DBLSEL  0x04	/* clock doubler selected		*/
 
 /*4e*/  U_INT8    nc_stest2;
 	#define   ROF     0x40	/* reset scsi offset (after gross error!) */

>Audit-Trail:
>Unformatted:
>From the Capella; that's about the max rate of the drive, reading the
outer rings of the disk). Reading from both drives simultaneously gets
me 4.6MB/s per drive, 9.2MB/s total... much better :)

I also decided to try a recent Linux kernel, using the ncr driver that
was ported from FreeBSD, but for some reason, the performance was
pitiful... I waited about 3 minutes for the 128MB dd to finish, and
finally ^C'd it since it was taking so long. The older Linux ncr
driver seemed pretty decent though.

So anyways, I decided to port the latest FreeBSD driver to NetBSD, and
here it is! :) It's based on:

**  $Id: ncr.c,v 1.81 1996/10/12 17:33:48 se Exp $

which perhaps is a bit too new... I had to make some minor
modifications for it to do synchronous mode... but after doing that,
it seems to work great. I get the same transfer rates as under FreeBSD
now, and I've turned tagged command queueing back on and haven't had
any of those "assert failed" errors yet (although it might take
another day of two for that to show up).

Someone who knows what they're doing might want to take a look at it
and make sure I didn't mess anything up :) ncr_intr() is probably a
good place to start... I'm pretty sure it's okay, but it looks like
FreeBSD changed the way its interrupt handlers worked. They used to
return int, but now they return void. NetBSD's still return int, so I
used the old ncr.c driver to see what I should be returning...

It'd be cool if the {Free,Net,Open}BSD ncr drivers could be merged
back together so we could all use the same source :)