Subject: cd(4) bounce buffer patch (was: Filesystems vs. device sector
To: None <tech-kern@netbsd.org>
From: Stephen M. Rumble <stephen.rumble@utoronto.ca>
List: tech-kern
Date: 07/28/2007 16:05:59
This message is in MIME format.

--=_6z7pzjjj27i8
Content-Type: text/plain;
	charset=ISO-8859-1;
	DelSp="Yes";
	format="flowed"
Content-Disposition: inline
Content-Transfer-Encoding: 7bit

Hi all,

I've gone ahead and fixed cd(4)'s bounce buffering to handle rounding  
past MAXPHYS. While there I also nuked some dead code in the bounce  
path and plugged a buffer leak.

EFS now works and 'mtree -c -K md5' reports the same thing using cd(4)  
as it does with wd(4).

Any feedback on the attached patch would be appreciated.

Steve

--=_6z7pzjjj27i8
Content-Type: text/x-patch;
	charset=UTF-8;
	name="cd.c.diff"
Content-Disposition: attachment;
	filename="cd.c.diff"
Content-Transfer-Encoding: quoted-printable

Index: cd.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/dev/scsipi/cd.c,v
retrieving revision 1.266
diff -u -r1.266 cd.c
--- cd.c=0921 Jul 2007 19:51:48 -0000=091.266
+++ cd.c=0928 Jul 2007 20:06:07 -0000
@@ -119,6 +119,14 @@
 =09=09=09=09=09=09 /* leadout */
 };
=20
+struct cdbounce {
+=09struct buf     *obp;=09=09=09/* original buf */
+=09int=09=09doff;=09=09=09/* byte offset in orig. buf */
+=09int=09=09soff;=09=09=09/* byte offset in bounce buf */
+=09int=09=09resid;=09=09=09/* residual i/o in orig. buf */
+=09int=09=09bcount;=09=09=09/* actual obp bytes in bounce */
+};
+
 static void=09cdstart(struct scsipi_periph *);
 static void=09cdrestart(void *);
 static void=09cdminphys(struct buf *);
@@ -636,8 +644,8 @@
 =09=09 */
 =09=09if ((bp->b_bcount % cd->params.blksize) !=3D 0 ||
 =09=09=09((blkno * lp->d_secsize) % cd->params.blksize) !=3D 0) {
+=09=09=09struct cdbounce *bounce;
 =09=09=09struct buf *nbp;
-=09=09=09void *bounce =3D NULL;
 =09=09=09long count;
=20
 =09=09=09if ((bp->b_flags & B_READ) =3D=3D 0) {
@@ -646,23 +654,39 @@
 =09=09=09=09bp->b_error =3D EACCES;
 =09=09=09=09goto bad;
 =09=09=09}
-=09=09=09count =3D ((blkno * lp->d_secsize) % cd->params.blksize);
-=09=09=09/* XXX Store starting offset in bp->b_rawblkno */
-=09=09=09bp->b_rawblkno =3D count;
=20
+=09=09=09bounce =3D malloc(sizeof(*bounce), M_DEVBUF, M_NOWAIT);
+=09=09=09if (!bounce) {
+=09=09=09=09/* No memory -- fail the iop. */
+=09=09=09=09bp->b_error =3D ENOMEM;
+=09=09=09=09goto bad;
+=09=09=09}
+
+=09=09=09bounce->obp =3D bp;
+=09=09=09bounce->resid =3D bp->b_bcount;
+=09=09=09bounce->doff =3D 0;
+=09=09=09count =3D ((blkno * lp->d_secsize) % cd->params.blksize);
+=09=09=09bounce->soff =3D count;
 =09=09=09count +=3D bp->b_bcount;
 =09=09=09count =3D roundup(count, cd->params.blksize);
+=09=09=09bounce->bcount =3D bounce->resid;
+=09=09=09if (count > MAXPHYS) {
+=09=09=09=09bounce->bcount =3D MAXPHYS - bounce->soff;
+=09=09=09=09count =3D MAXPHYS;
+=09=09=09}
=20
 =09=09=09blkno =3D ((blkno * lp->d_secsize) / cd->params.blksize);
 =09=09=09nbp =3D getiobuf_nowait();
 =09=09=09if (!nbp) {
 =09=09=09=09/* No memory -- fail the iop. */
+=09=09=09=09free(bounce, M_DEVBUF);
 =09=09=09=09bp->b_error =3D ENOMEM;
 =09=09=09=09goto bad;
 =09=09=09}
-=09=09=09bounce =3D malloc(count, M_DEVBUF, M_NOWAIT);
-=09=09=09if (!bounce) {
+=09=09=09nbp->b_data =3D malloc(count, M_DEVBUF, M_NOWAIT);
+=09=09=09if (!nbp->b_data) {
 =09=09=09=09/* No memory -- fail the iop. */
+=09=09=09=09free(bounce, M_DEVBUF);
 =09=09=09=09putiobuf(nbp);
 =09=09=09=09bp->b_error =3D ENOMEM;
 =09=09=09=09goto bad;
@@ -675,16 +699,14 @@
=20
 =09=09=09nbp->b_bcount =3D count;
 =09=09=09nbp->b_bufsize =3D count;
-=09=09=09nbp->b_data =3D bounce;
=20
 =09=09=09nbp->b_rawblkno =3D blkno;
=20
-=09=09=09/* We need to do a read-modify-write operation */
 =09=09=09nbp->b_flags =3D bp->b_flags | B_READ | B_CALL;
 =09=09=09nbp->b_iodone =3D cdbounce;
=20
-=09=09=09/* Put ptr to orig buf in b_private and use new buf */
-=09=09=09nbp->b_private =3D bp;
+=09=09=09/* store bounce state in b_private and use new buf */
+=09=09=09nbp->b_private =3D bounce;
=20
 =09=09=09BIO_COPYPRIO(nbp, bp);
=20
@@ -910,85 +932,91 @@
 static void
 cdbounce(struct buf *bp)
 {
-=09struct buf *obp =3D (struct buf *)bp->b_private;
+=09struct cdbounce *bounce =3D (struct cdbounce *)bp->b_private;
+=09struct buf *obp =3D bounce->obp;
+=09struct cd_softc *cd =3D cd_cd.cd_devs[CDUNIT(obp->b_dev)];
+=09struct disklabel *lp =3D cd->sc_dk.dk_label;
=20
 =09if (bp->b_flags & B_ERROR) {
 =09=09/* EEK propagate the error and free the memory */
 =09=09goto done;
 =09}
-=09if (obp->b_flags & B_READ) {
-=09=09/* Copy data to the final destination and free the buf. */
-=09=09memcpy(obp->b_data, (char *)bp->b_data+obp->b_rawblkno,
-=09=09=09obp->b_bcount);
-=09} else {
-=09=09/*
-=09=09 * XXXX This is a CD-ROM -- READ ONLY -- why do we bother with
-=09=09 * XXXX any of this write stuff?
-=09=09 */
-=09=09if (bp->b_flags & B_READ) {
-=09=09=09struct cd_softc *cd =3D cd_cd.cd_devs[CDUNIT(bp->b_dev)];
-=09=09=09struct buf *nbp;
-=09=09=09int s;
=20
-=09=09=09/* Read part of RMW complete. */
-=09=09=09memcpy((char *)bp->b_data+obp->b_rawblkno, obp->b_data,
-=09=09=09=09obp->b_bcount);
+=09KASSERT(obp->b_flags & B_READ);
=20
-=09=09=09/* We need to alloc a new buf. */
-=09=09=09nbp =3D getiobuf_nowait();
-=09=09=09if (!nbp) {
-=09=09=09=09/* No buf available. */
-=09=09=09=09bp->b_flags |=3D B_ERROR;
-=09=09=09=09bp->b_error =3D ENOMEM;
-=09=09=09=09bp->b_resid =3D bp->b_bcount;
-=09=09=09=09goto done;
-=09=09=09}
+=09/* copy bounce buffer to final destination */
+=09memcpy((char *)obp->b_data + bounce->doff,
+=09    (char *)bp->b_data + bounce->soff, bounce->bcount);
+
+=09/* check if we need more I/O, i.e. bounce put us over MAXPHYS */
+=09KASSERT(bounce->resid >=3D bounce->bcount);
+=09bounce->resid -=3D bounce->bcount;
+=09if (bounce->resid > 0) {
+=09=09struct buf *nbp;
+=09=09daddr_t blkno;
+=09=09long count;
+=09=09int s;
+
+=09=09blkno =3D obp->b_rawblkno +
+=09=09    ((obp->b_bcount - bounce->resid) / lp->d_secsize);
+=09=09count =3D ((blkno * lp->d_secsize) % cd->params.blksize);
+=09=09blkno =3D (blkno * lp->d_secsize) / cd->params.blksize;
+=09=09bounce->soff =3D count;
+=09=09bounce->doff +=3D bounce->bcount;
+=09=09count +=3D bounce->resid;
+=09=09count =3D roundup(count, cd->params.blksize);
+=09=09bounce->bcount =3D bounce->resid;
+=09=09if (count > MAXPHYS) {
+=09=09=09bounce->bcount =3D MAXPHYS - bounce->soff;
+=09=09=09count =3D MAXPHYS;
+=09=09}
+
+=09=09nbp =3D getiobuf_nowait();
+=09=09if (!nbp) {
+=09=09=09/* No memory -- fail the iop. */
+=09=09=09bp->b_error =3D ENOMEM;
+=09=09=09goto done;
+=09=09}
=20
-=09=09=09/* Set up the IOP to the bounce buffer. */
-=09=09=09nbp->b_error =3D 0;
-=09=09=09nbp->b_proc =3D bp->b_proc;
-=09=09=09nbp->b_vp =3D NULLVP;
+=09=09/* Set up the IOP to the bounce buffer. */
+=09=09nbp->b_error =3D 0;
+=09=09nbp->b_proc =3D obp->b_proc;
+=09=09nbp->b_vp =3D NULLVP;
=20
-=09=09=09nbp->b_bcount =3D bp->b_bcount;
-=09=09=09nbp->b_bufsize =3D bp->b_bufsize;
-=09=09=09nbp->b_data =3D bp->b_data;
+=09=09nbp->b_bcount =3D count;
+=09=09nbp->b_bufsize =3D count;
+=09=09nbp->b_data =3D bp->b_data;
=20
-=09=09=09nbp->b_rawblkno =3D bp->b_rawblkno;
+=09=09nbp->b_rawblkno =3D blkno;
=20
-=09=09=09/* We need to do a read-modify-write operation */
-=09=09=09nbp->b_flags =3D obp->b_flags | B_CALL;
-=09=09=09nbp->b_iodone =3D cdbounce;
+=09=09nbp->b_flags =3D obp->b_flags | B_READ | B_CALL;
+=09=09nbp->b_iodone =3D cdbounce;
=20
-=09=09=09/* Put ptr to orig buf in b_private and use new buf */
-=09=09=09nbp->b_private =3D obp;
+=09=09/* store bounce state in b_private and use new buf */
+=09=09nbp->b_private =3D bounce;
=20
-=09=09=09s =3D splbio();
-=09=09=09/*
-=09=09=09 * Place it in the queue of disk activities for this
-=09=09=09 * disk.
-=09=09=09 *
-=09=09=09 * XXX Only do disksort() if the current operating mode
-=09=09=09 * XXX does not include tagged queueing.
-=09=09=09 */
-=09=09=09BUFQ_PUT(cd->buf_queue, nbp);
+=09=09BIO_COPYPRIO(nbp, obp);
=20
-=09=09=09/*
-=09=09=09 * Tell the device to get going on the transfer if it's
-=09=09=09 * not doing anything, otherwise just wait for
-=09=09=09 * completion
-=09=09=09 */
-=09=09=09cdstart(cd->sc_periph);
+=09=09bp->b_data =3D NULL;
+=09=09putiobuf(bp);
=20
-=09=09=09splx(s);
-=09=09=09return;
+=09=09/* enqueue the request and return */
+=09=09s =3D splbio();
+=09=09BUFQ_PUT(cd->buf_queue, nbp);
+=09=09cdstart(cd->sc_periph);
+=09=09splx(s);
=20
-=09=09}
+=09=09return;
 =09}
+
 done:
 =09obp->b_flags |=3D bp->b_flags & B_ERROR;
 =09obp->b_error =3D bp->b_error;
 =09obp->b_resid =3D bp->b_resid;
 =09free(bp->b_data, M_DEVBUF);
+=09free(bounce, M_DEVBUF);
+=09bp->b_data =3D NULL;
+=09putiobuf(bp);
 =09biodone(obp);
 }
=20

--=_6z7pzjjj27i8--