Subject: Re: tape driver behavior
To: Charles Shannon Hendrix <shannon@widomaker.com>
From: Matthew Jacob <mjacob@feral.com>
List: current-users
Date: 09/06/2001 15:33:01
>
> I've a couple of problems with NetBSD 1.5 and tape drives.
>
> "mt status" report the file number and drive status.

See patches below for a first pass at addressing this.


> The driver doesn't return end-of-media to applications.
>
> I use the tape driver all the time, and it works great. But I really
> miss being about to use mt to determine which file number I'm on or get
> other information.
>
> All I ever get is this:
>
> [203] % mt status
> SCSI tape drive, residual=0
> ds=3<Mounted>
> er=0
> blocksize: 0 (0, 0, 0, 0)
> density: 36 (0, 0, 0, 0)
> current file number: 0
> current block number: 0
>
> When running dump, it simply hangs on end-of-media, instead of
> asking for the next tape.

I tried doing some tape testing, and as reported by Thor, the system just
totally wedged up while I was doing this- but it *did* still eventually
complete.

I've begun the process to try and sort this out a bit.

The architecture for NetBSD's tape driver is that unless you enable early
warning, a hard I/O error is your indication that you've hit EOT. Without
early warning set, you'll get literally hundreds of EOM check conditions,
you'll then get a complete flood of 'errors' as you approach physical EOT.

Actual early EOM  handling *should* work, but I wouldn't be surprised if it
didn't.

Below are some patches which start toward maintaiing logical tape position-
plus a couple of 'fixes' for hard eot detection and tape hardware && SCSI
logical block positioning. I'll be gone all of next week, but this is
definitely on my list of things to be working on (finally).

The tape tester is part of my tool kit package which is clonable via BitKeeper
at bk://blade.feral.com:9003.

-matt

Index: st.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/scsipi/st.c,v
retrieving revision 1.143
diff -u -r1.143 st.c
--- st.c	2001/07/18 18:21:06	1.143
+++ st.c	2001/09/06 22:31:38
@@ -617,6 +617,10 @@
 			goto bad;
 		st->last_dsty = dsty;
 	}
+	if (!(st->quirks & ST_Q_NOPREVENT)) {
+		scsipi_prevent(periph, PR_PREVENT,
+		    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
+	}

 	SC_DEBUG(periph, SCSIPI_DB2, ("open complete\n"));
 	return (0);
@@ -668,6 +672,12 @@
 		error = st_check_eod(st, FALSE, &nm, 0);
 	}

+	/*
+	 * Allow robots to eject tape if needed.
+	 */
+	scsipi_prevent(periph, PR_ALLOW,
+	    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
+
 	switch (STMODE(dev)) {
 	case NORMAL_MODE:
 		st_unmount(st, NOEJECT);
@@ -814,14 +824,10 @@
 			return (error);
 		}
 	}
-	if (!(st->quirks & ST_Q_NOPREVENT)) {
-		scsipi_prevent(periph, PR_PREVENT,
-		    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
-	}
 	st->flags &= ~ST_NEW_MOUNT;
 	st->flags |= ST_MOUNTED;
 	periph->periph_flags |= PERIPH_MEDIA_LOADED;	/* move earlier? */
-
+	st->blkno = st->fileno = (daddr_t) 0;
 	return (0);
 }

@@ -861,10 +867,12 @@
 			st->sc_dev.dv_xname);
 	}

-	scsipi_prevent(periph, PR_ALLOW,
-	    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
-	if (eject)
+	if (eject) {
 		st_load(st, LD_UNLOAD, XS_CTL_IGNORE_NOT_READY);
+		st->blkno = st->fileno = (daddr_t) -1;
+	} else {
+		st->blkno = st->fileno = (daddr_t) 0;
+	}
 	st->flags &= ~(ST_MOUNTED | ST_NEW_MOUNT);
 	periph->periph_flags &= ~PERIPH_MEDIA_LOADED;
 }
@@ -1181,6 +1189,11 @@
 			_lto3b(bp->b_bcount, cmd.len);

 		/*
+		 * Clear 'position updated' indicator
+		 */
+		st->flags &= ~ST_POSUPDATED;
+
+		/*
 		 * go ask the adapter to do all this for us
 		 */
 		error = scsipi_command(periph,
@@ -1209,6 +1222,19 @@
 #if NRND > 0
 		rnd_add_uint32(&st->rnd_source, xs->bp->b_blkno);
 #endif
+
+		if ((st->flags & ST_POSUPDATED) == 0) {
+			if (xs->bp->b_flags & B_ERROR) {
+				st->fileno = st->blkno = -1;
+			} else if (st->blkno != -1) {
+				if (st->flags & ST_FIXEDBLOCKS) {
+					st->blkno +=
+					    (xs->bp->b_bcount / st->blksize);
+				} else {
+					st->blkno++;
+				}
+			}
+		}
 	}
 }

@@ -1297,6 +1323,8 @@
 		g->mt_mdensity[1] = st->modes[1].density;
 		g->mt_mdensity[2] = st->modes[2].density;
 		g->mt_mdensity[3] = st->modes[3].density;
+		g->mt_fileno = st->fileno;
+		g->mt_blkno = st->blkno;
 		if (st->flags & ST_READONLY)
 			g->mt_dsreg |= MT_DS_RDONLY;
 		if (st->flags & ST_MOUNTED)
@@ -1641,9 +1669,35 @@
 	cmd.byte2 = what;
 	_lto3b(number, cmd.number);

-	return (scsipi_command(st->sc_periph,
+	st->flags &= ~ST_POSUPDATED;
+	st->last_ctl_resid = 0;
+	error = scsipi_command(st->sc_periph,
 	    (struct scsipi_generic *)&cmd, sizeof(cmd),
-	    0, 0, 0, ST_SPC_TIME, NULL, flags));
+	    0, 0, 0, ST_SPC_TIME, NULL, flags);
+
+	if (error == 0 && (st->flags & ST_POSUPDATED) == 0) {
+		number = number - st->last_ctl_resid;
+		if (what == SP_BLKS) {
+			if (st->blkno != -1) {
+				st->blkno += number;
+			}
+		} else if (what == SP_FILEMARKS) {
+			if (st->fileno != -1) {
+				st->fileno += number;
+				if (number > 0) {
+					st->blkno = 0;
+				} else if (number < 0) {
+					st->blkno = -1;
+				}
+			}
+		} else if (what == SP_EOM) {
+			/*
+			 * This loses us relative position.
+			 */
+			st->fileno = st->blkno = -1;
+		}
+	}
+	return (error);
 }

 /*
@@ -1655,6 +1709,7 @@
 	int flags;
 	int number;
 {
+	int error;
 	struct scsi_write_filemarks cmd;

 	/*
@@ -1681,9 +1736,14 @@
 	cmd.opcode = WRITE_FILEMARKS;
 	_lto3b(number, cmd.number);

-	return (scsipi_command(st->sc_periph,
+	/* XXX WE NEED TO BE ABLE TO GET A RESIDIUAL XXX */
+	error = scsipi_command(st->sc_periph,
 	    (struct scsipi_generic *)&cmd, sizeof(cmd),
-	    0, 0, 0, ST_IO_TIME * 4, NULL, flags));
+	    0, 0, 0, ST_IO_TIME * 4, NULL, flags);
+	if (error == 0 && st->fileno != -1) {
+		st->fileno += number;
+	}
+	return (error);
 }

 /*
@@ -1803,6 +1863,10 @@
 	if (error) {
 		printf("%s: error %d trying to rewind\n",
 		    st->sc_dev.dv_xname, error);
+		/* lost position */
+		st->fileno = st->blkno = -1;
+	} else {
+		st->fileno = st->blkno = 0;
 	}
 	return (error);
 }
@@ -1818,17 +1882,28 @@
 	struct scsi_tape_read_position cmd;

 	/*
-	 * First flush any pending writes...
+	 * We try and flush any buffered writes here if we were writing
+	 * and we're trying to get hardware block position. It eats
+	 * up performance substantially, but I'm wary of drive firmware.
+	 *
+	 * I think that *logical* block position is probably okay-
+	 * but hardware block position might have to wait for data
+	 * to hit media to be valid. Caveat Emptor.
 	 */
-	error = st_write_filemarks(st, 0, XS_CTL_SILENT);

-	/*
-	 * The latter case is for 'write protected' tapes
-	 * which are too stupid to recognize a zero count
-	 * for writing filemarks as a no-op.
-	 */
-	if (error != 0 && error != EACCES && error != EROFS)
-		return (error);
+	if (hard && (st->flags & ST_WRITTEN)) {
+		/*
+		 * First flush any pending writes...
+		 */
+		error = st_write_filemarks(st, 0, XS_CTL_SILENT);
+		/*
+		 * The latter case is for 'write protected' tapes
+		 * which are too stupid to recognize a zero count
+		 * for writing filemarks as a no-op.
+		 */
+		if (error != 0 && error != EACCES && error != EROFS)
+			return (error);
+	}

 	memset(&cmd, 0, sizeof(cmd));
 	memset(&posdata, 0, sizeof(posdata));
@@ -1865,20 +1940,15 @@
 	int error;
 	struct scsi_tape_locate cmd;

-	/*
-	 * First flush any pending writes. Strictly speaking,
-	 * we're not supposed to have to worry about this,
-	 * but let's be untrusting.
-	 */
-	error = st_write_filemarks(st, 0, XS_CTL_SILENT);
-
 	/*
-	 * The latter case is for 'write protected' tapes
-	 * which are too stupid to recognize a zero count
-	 * for writing filemarks as a no-op.
+	 * We used to try and flush any buffered writes here.
+	 * Now we push this onto user applications to either
+	 * flush the pending writes themselves (via a zero count
+	 * WRITE FILEMARKS command) or they can trust their tape
+	 * drive to do this correctly for them.
+	 *
+	 * There are very ugly performance limitations otherwise.
 	 */
-	if (error != 0 && error != EACCES && error != EROFS)
-		return (error);

 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = LOCATE;
@@ -1889,9 +1959,10 @@
 		(struct scsipi_generic *)&cmd, sizeof(cmd),
 		NULL, 0, ST_RETRIES, ST_SPC_TIME, NULL, 0);
 	/*
-	 * XXX: Note file && block number position now unknown (if
-	 * XXX: these things ever start being maintained in this driver)
+	 * Note file && block number position now unknown (if
+	 * these things ever start being maintained in this driver)
 	 */
+	st->fileno = st->blkno = -1;
 	return (error);
 }

@@ -1940,20 +2011,45 @@
 		return (retval);
 	}

-
+	xs->resid = info;
 	if (st->flags & ST_FIXEDBLOCKS) {
-		xs->resid = info * st->blksize;
-		if (sense->flags & SSD_EOM) {
+		if (bp) {
+			xs->resid *= st->blksize;
+			st->last_io_resid = xs->resid;
+		} else {
+			st->last_ctl_resid = xs->resid;
+		}
+		if (key == SKEY_VOLUME_OVERFLOW) {
+			st->flags |= ST_EIO_PENDING;
+			if (bp)
+				bp->b_resid = xs->resid;
+		} else if (sense->flags & SSD_EOM) {
 			if ((st->flags & ST_EARLYWARN) == 0)
 				st->flags |= ST_EIO_PENDING;
 			st->flags |= ST_EOM_PENDING;
-			if (bp)
+			if (bp) {
+#if	0
 				bp->b_resid = xs->resid;
+#else
+				/*
+				 * Grotesque as it seems, the few times
+				 * I've actually seen a non-zero resid,
+				 * the tape drive actually lied and had
+				 * written all the data!
+				 */
+				bp->b_resid = 0;
+#endif
+			}
 		}
 		if (sense->flags & SSD_FILEMARK) {
 			st->flags |= ST_AT_FILEMARK;
 			if (bp)
 				bp->b_resid = xs->resid;
+			if (st->fileno != (daddr_t) -1) {
+				st->fileno++;
+				st->blkno = 0;
+				st->flags |= ST_POSUPDATED;
+			}
 		}
 		if (sense->flags & SSD_ILI) {
 			st->flags |= ST_EIO_PENDING;
@@ -1974,6 +2070,13 @@
 			if ((st->quirks & ST_Q_SENSE_HELP) &&
 			    (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
 				st->blksize -= 512;
+			else if ((st->flags & ST_POSUPDATED) == 0) {
+				if (st->blkno != (daddr_t) -1) {
+					st->blkno +=
+					    (xs->datalen / st->blksize);
+					st->flags |= ST_POSUPDATED;
+				}
+			}
 		}
 		/*
 		 * If data wanted and no data was transferred, do it immediately
@@ -1988,6 +2091,11 @@
 			}
 		}
 	} else {		/* must be variable mode */
+		if (bp) {
+			st->last_io_resid = xs->resid;
+		} else {
+			st->last_ctl_resid = xs->resid;
+		}
 		if (sense->flags & SSD_EOM) {
 			/*
 			 * The current semantics of this
@@ -2013,6 +2121,11 @@
 			}
 		} else if (sense->flags & SSD_FILEMARK) {
 			retval = 0;
+			if (st->fileno != (daddr_t) -1) {
+				st->fileno++;
+				st->blkno = 0;
+				st->flags |= ST_POSUPDATED;
+			}
 		} else if (sense->flags & SSD_ILI) {
 			if (info < 0) {
 				/*
@@ -2028,9 +2141,12 @@
 				retval = EIO;
 			} else {
 				retval = 0;
+				if (st->blkno != (daddr_t) -1) {
+					st->blkno++;
+					st->flags |= ST_POSUPDATED;
+				}
 			}
 		}
-		xs->resid = info;
 		if (bp)
 			bp->b_resid = info;
 	}
@@ -2058,6 +2174,8 @@
 				/* return an EOF */
 			}
 			retval = 0;
+			/* lost position */
+			st->fileno = st->blkno = -1;
 		}
 	}

@@ -2081,6 +2199,7 @@
 			case SKEY_UNIT_ATTENTION:
 			case SKEY_WRITE_PROTECT:
 				break;
+			case SKEY_VOLUME_OVERFLOW:
 			case SKEY_BLANK_CHECK:
 				printf(", requested size: %d (decimal)", info);
 				break;
Index: stvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/scsipi/stvar.h,v
retrieving revision 1.2
diff -u -r1.2 stvar.h
--- stvar.h	2001/06/18 09:05:05	1.2
+++ stvar.h	2001/09/06 22:31:38
@@ -113,6 +113,11 @@
 	u_int last_dsty;	/* last density opened               */
 	short mt_resid;		/* last (short) resid                */
 	short mt_erreg;		/* last error (sense key) seen       */
+	/* relative to BOT location */
+	daddr_t fileno;
+	daddr_t blkno;
+	int32_t last_io_resid;
+	int32_t last_ctl_resid;
 #define	mt_key	mt_erreg
 	u_int8_t asc;		/* last asc code seen		     */
 	u_int8_t ascq;		/* last asc code seen		     */
@@ -166,12 +171,13 @@
 #define	ST_DONTBUFFER	0x1000	/* Disable buffering/caching */
 #define	ST_EARLYWARN	0x2000	/* Do (deferred) EOM for variable mode */
 #define	ST_EOM_PENDING	0x4000	/* EOM reporting deferred until next op */
+#define	ST_POSUPDATED	0x8000	/* tape position already updated */

 #define	ST_PER_ACTION	(ST_AT_FILEMARK | ST_EIO_PENDING | ST_EOM_PENDING | \
 			 ST_BLANK_READ)
 #define	ST_PER_MOUNT	(ST_INFO_VALID | ST_BLOCK_SET | ST_WRITTEN |	\
 			 ST_FIXEDBLOCKS | ST_READONLY | ST_FM_WRITTEN |	\
-			 ST_2FM_AT_EOD | ST_PER_ACTION)
+			 ST_2FM_AT_EOD | ST_PER_ACTION | ST_POSUPDATED)

 void	stattach __P((struct device *, struct st_softc *, void *));