tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kill dk_start()/dk_iodone() [Re: sys/dev/dksubr.c may reorder requests]



Hello,
after comments from different peoples and thinking about this issue,
I came to the conclusion that the dk_start/dk_iodone part of the
dksubr interface cause more troubles than anything else.
dk_iodone() only call dk_start(), and dk_start() is nothing
else than a while() loop getting struct buf* out of the bufq and calling
the underlying driver with a bp,  but with hacks to hacks to deal
with the underlying driver causing recursions, or resource shortage.

killing dk_start() and dk_iodone() and moving this code to the underlying
driver only marginally increase complexity: it has to do the while()
loop itself, but it can now do it properly with bufq_peek()/bufq_get(),
removing the buffer from the queue at the right time. This handle
both the recursion and reordering issues.

The attached patch does that, and adapts the 2 consumers of dksubr.c:
xbd(4) and cgd(4). I also attach the result of a diff -b, where it's
clear that the changes to the underlying drivers are minimal, and may even
make them simpler.

With this change in place, hopefully dksubr.c will be more usefull, and
can be used by other disk drivers.

Any objection ?

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: sys/dev/cgd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/cgd.c,v
retrieving revision 1.85
diff -u -p -u -r1.85 cgd.c
--- sys/dev/cgd.c       18 Mar 2014 15:44:37 -0000      1.85
+++ sys/dev/cgd.c       21 May 2014 14:00:55 -0000
@@ -100,7 +100,7 @@ static int cgd_destroy(device_t);
 
 /* Internal Functions */
 
-static int     cgdstart(struct dk_softc *, struct buf *);
+static void    cgdstart(struct dk_softc *);
 static void    cgdiodone(struct buf *);
 
 static int     cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *);
@@ -381,69 +381,70 @@ cgd_putdata(struct dk_softc *dksc, void 
        }
 }
 
-static int
-cgdstart(struct dk_softc *dksc, struct buf *bp)
+static void
+cgdstart(struct dk_softc *dksc)
 {
        struct  cgd_softc *cs = (struct cgd_softc *)dksc;
-       struct  buf *nbp;
+       struct  buf *bp, *nbp;
        void *  addr;
        void *  newaddr;
        daddr_t bn;
        struct  vnode *vp;
 
-       DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp));
-       disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */
+       while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
 
-       bn = bp->b_rawblkno;
+               DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp));
+               disk_busy(&dksc->sc_dkdev);
 
-       /*
-        * We attempt to allocate all of our resources up front, so that
-        * we can fail quickly if they are unavailable.
-        */
+               bn = bp->b_rawblkno;
 
-       nbp = getiobuf(cs->sc_tvn, false);
-       if (nbp == NULL) {
-               disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-               return -1;
-       }
-
-       /*
-        * If we are writing, then we need to encrypt the outgoing
-        * block into a new block of memory.  If we fail, then we
-        * return an error and let the dksubr framework deal with it.
-        */
-       newaddr = addr = bp->b_data;
-       if ((bp->b_flags & B_READ) == 0) {
-               newaddr = cgd_getdata(dksc, bp->b_bcount);
-               if (!newaddr) {
-                       putiobuf(nbp);
+               /*
+                * We attempt to allocate all of our resources up front, so that
+                * we can fail quickly if they are unavailable.
+                */
+               nbp = getiobuf(cs->sc_tvn, false);
+               if (nbp == NULL) {
                        disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-                       return -1;
+                       break;
                }
-               cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn,
-                   DEV_BSIZE, CGD_CIPHER_ENCRYPT);
-       }
 
-       nbp->b_data = newaddr;
-       nbp->b_flags = bp->b_flags;
-       nbp->b_oflags = bp->b_oflags;
-       nbp->b_cflags = bp->b_cflags;
-       nbp->b_iodone = cgdiodone;
-       nbp->b_proc = bp->b_proc;
-       nbp->b_blkno = bn;
-       nbp->b_bcount = bp->b_bcount;
-       nbp->b_private = bp;
-
-       BIO_COPYPRIO(nbp, bp);
-
-       if ((nbp->b_flags & B_READ) == 0) {
-               vp = nbp->b_vp;
-               mutex_enter(vp->v_interlock);
-               vp->v_numoutput++;
-               mutex_exit(vp->v_interlock);
+               /*
+                * If we are writing, then we need to encrypt the outgoing
+                * block into a new block of memory.
+                */
+               newaddr = addr = bp->b_data;
+               if ((bp->b_flags & B_READ) == 0) {
+                       newaddr = cgd_getdata(dksc, bp->b_bcount);
+                       if (!newaddr) {
+                               putiobuf(nbp);
+                               disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & 
B_READ));
+                               break;
+                       }
+                       cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn,
+                           DEV_BSIZE, CGD_CIPHER_ENCRYPT);
+               }
+               /* we now have all needed resources to process this buf */
+               (void)bufq_get(dksc->sc_bufq);
+               nbp->b_data = newaddr;
+               nbp->b_flags = bp->b_flags;
+               nbp->b_oflags = bp->b_oflags;
+               nbp->b_cflags = bp->b_cflags;
+               nbp->b_iodone = cgdiodone;
+               nbp->b_proc = bp->b_proc;
+               nbp->b_blkno = bn;
+               nbp->b_bcount = bp->b_bcount;
+               nbp->b_private = bp;
+
+               BIO_COPYPRIO(nbp, bp);
+
+               if ((nbp->b_flags & B_READ) == 0) {
+                       vp = nbp->b_vp;
+                       mutex_enter(vp->v_interlock);
+                       vp->v_numoutput++;
+                       mutex_exit(vp->v_interlock);
+               }
+               VOP_STRATEGY(cs->sc_tvn, nbp);
        }
-       VOP_STRATEGY(cs->sc_tvn, nbp);
-       return 0;
 }
 
 static void
@@ -492,7 +493,7 @@ cgdiodone(struct buf *nbp)
        disk_unbusy(&dksc->sc_dkdev, obp->b_bcount - obp->b_resid,
            (obp->b_flags & B_READ));
        biodone(obp);
-       dk_iodone(di, dksc);
+       cgdstart(dksc);
        splx(s);
 }
 
Index: sys/dev/dksubr.c
===================================================================
RCS file: /cvsroot/src/sys/dev/dksubr.c,v
retrieving revision 1.49
diff -u -p -u -r1.49 dksubr.c
--- sys/dev/dksubr.c    28 Dec 2013 19:25:07 -0000      1.49
+++ sys/dev/dksubr.c    21 May 2014 14:00:55 -0000
@@ -224,37 +224,11 @@ dk_strategy(struct dk_intf *di, struct d
         */
        s = splbio();
        bufq_put(dksc->sc_bufq, bp);
-       dk_start(di, dksc);
+       di->di_diskstart(dksc);
        splx(s);
        return;
 }
 
-void
-dk_start(struct dk_intf *di, struct dk_softc *dksc)
-{
-       struct  buf *bp;
-
-       DPRINTF_FOLLOW(("dk_start(%s, %p)\n", di->di_dkname, dksc));
-
-       /* Process the work queue */
-       while ((bp = bufq_get(dksc->sc_bufq)) != NULL) {
-               if (di->di_diskstart(dksc, bp) != 0) {
-                       bufq_put(dksc->sc_bufq, bp);
-                       break;
-               }
-       }
-}
-
-void
-dk_iodone(struct dk_intf *di, struct dk_softc *dksc)
-{
-
-       DPRINTF_FOLLOW(("dk_iodone(%s, %p)\n", di->di_dkname, dksc));
-
-       /* We kick the queue in case we are able to get more work done */
-       dk_start(di, dksc);
-}
-
 int
 dk_size(struct dk_intf *di, struct dk_softc *dksc, dev_t dev)
 {
Index: sys/dev/dkvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/dkvar.h,v
retrieving revision 1.18
diff -u -p -u -r1.18 dkvar.h
--- sys/dev/dkvar.h     29 May 2013 23:25:55 -0000      1.18
+++ sys/dev/dkvar.h     21 May 2014 14:00:55 -0000
@@ -74,7 +74,7 @@ struct dk_intf {
        int     (*di_open)(dev_t, int, int, struct lwp *);
        int     (*di_close)(dev_t, int, int, struct lwp *);
        void    (*di_strategy)(struct buf *);
-       int     (*di_diskstart)(struct dk_softc *, struct buf *);
+       void    (*di_diskstart)(struct dk_softc *);
 };
 
 #define DK_BUSY(_dksc, _pmask)                         \
@@ -93,8 +93,6 @@ int   dk_open(struct dk_intf *, struct dk_
 int    dk_close(struct dk_intf *, struct dk_softc *, dev_t,
                 int, int, struct lwp *);
 void   dk_strategy(struct dk_intf *, struct dk_softc *, struct buf *);
-void   dk_start(struct dk_intf *, struct dk_softc *);
-void   dk_iodone(struct dk_intf *, struct dk_softc *);
 int    dk_size(struct dk_intf *, struct dk_softc *, dev_t);
 int    dk_ioctl(struct dk_intf *, struct dk_softc *, dev_t,
                 u_long, void *, int, struct lwp *);
Index: sys/arch/xen/xen/xbd_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xbd_xenbus.c,v
retrieving revision 1.62
diff -u -p -u -r1.62 xbd_xenbus.c
--- sys/arch/xen/xen/xbd_xenbus.c       16 Mar 2014 05:20:26 -0000      1.62
+++ sys/arch/xen/xen/xbd_xenbus.c       21 May 2014 14:00:55 -0000
@@ -168,7 +168,7 @@ static bool xbd_xenbus_suspend(device_t,
 static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
 
 static int  xbd_handler(void *);
-static int  xbdstart(struct dk_softc *, struct buf *);
+static void xbdstart(struct dk_softc *);
 static void xbd_backend_changed(void *, XenbusState);
 static void xbd_connect(struct xbd_xenbus_softc *);
 
@@ -722,9 +722,10 @@ done:
        if (more_to_do)
                goto again;
 
-       dk_iodone(di, &sc->sc_dksc);
        if (sc->sc_xbdreq_wait)
                wakeup(&sc->sc_xbdreq_wait);
+       else
+               xbdstart(&sc->sc_dksc);
        return 1;
 }
 
@@ -926,133 +927,137 @@ xbddump(dev_t dev, daddr_t blkno, void *
        return dk_dump(di, &sc->sc_dksc, dev, blkno, va, size);
 }
 
-static int
-xbdstart(struct dk_softc *dksc, struct buf *bp)
+static void
+xbdstart(struct dk_softc *dksc)
 {
        struct xbd_xenbus_softc *sc = (struct xbd_xenbus_softc *)dksc;
+       struct buf *bp;
        struct xbd_req *xbdreq;
        blkif_request_t *req;
-       int ret = 0, runqueue = 1;
        size_t bcount, off;
        paddr_t ma;
        vaddr_t va;
        int nsects, nbytes, seg;
        int notify;
 
-       DPRINTF(("xbdstart(%p): b_bcount = %ld\n", bp, (long)bp->b_bcount));
+       while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
 
-       if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
-               bp->b_error = EIO;
-               goto err;
-       }
+               DPRINTF(("xbdstart(%p): b_bcount = %ld\n",
+                   bp, (long)bp->b_bcount));
 
-       if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
-               /* invalid block number */
-               bp->b_error = EINVAL;
-               goto err;
-       }
+               if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
+                       bp->b_error = EIO;
+                       goto err;
+               }
 
-       if (bp->b_rawblkno == sc->sc_xbdsize) {
-               /* at end of disk; return short read */
-               bp->b_resid = bp->b_bcount;
-               biodone(bp);
-               return 0;
-       }
+               if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
+                       /* invalid block number */
+                       bp->b_error = EINVAL;
+                       goto err;
+               }
+
+               if (bp->b_rawblkno == sc->sc_xbdsize) {
+                       /* at end of disk; return short read */
+                       bp->b_resid = bp->b_bcount;
+                       (void)bufq_get(dksc->sc_bufq);
+                       biodone(bp);
+                       continue;
+               }
 
-       if (__predict_false(sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
-               /* device is suspended, do not consume buffer */
-               DPRINTF(("%s: (xbdstart) device suspended\n",
-                   device_xname(sc->sc_dksc.sc_dev)));
-               ret = -1;
-               goto out;
-       }
-
-       if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
-               DPRINTF(("xbdstart: ring_full\n"));
-               ret = -1;
-               goto out;
-       }
-
-       xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
-       if (__predict_false(xbdreq == NULL)) {
-               DPRINTF(("xbdstart: no req\n"));
-               ret = -1; /* dk_start should not remove bp from queue */
-               goto out;
-       }
-
-       xbdreq->req_bp = bp;
-       xbdreq->req_data = bp->b_data;
-       if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
-               if (__predict_false(xbd_map_align(xbdreq) != 0)) {
-                       ret = -1;
+               if (__predict_false(
+                   sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
+                       /* device is suspended, do not consume buffer */
+                       DPRINTF(("%s: (xbdstart) device suspended\n",
+                           device_xname(sc->sc_dksc.sc_dev)));
                        goto out;
                }
-       }
-       /* now we're sure we'll send this buf */
-       disk_busy(&dksc->sc_dkdev);
-       SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
-       req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
-       req->id = xbdreq->req_id;
-       req->operation = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
-       req->sector_number = bp->b_rawblkno;
-       req->handle = sc->sc_handle;
-
-       va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
-       off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
-       if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >= sc->sc_xbdsize) {
-               bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE;
-               bp->b_resid = bp->b_bcount - bcount;
-       } else {
-               bcount = bp->b_bcount;
-               bp->b_resid = 0;
-       }
-       if (bcount > XBD_MAX_XFER) {
-               bp->b_resid += bcount - XBD_MAX_XFER;
-               bcount = XBD_MAX_XFER;
-       }
-       for (seg = 0; bcount > 0;) {
-               pmap_extract_ma(pmap_kernel(), va, &ma);
-               KASSERT((ma & (XEN_BSIZE - 1)) == 0);
-               if (bcount > PAGE_SIZE - off)
-                       nbytes = PAGE_SIZE - off;
-               else
-                       nbytes = bcount;
-               nsects = nbytes >> XEN_BSHIFT;
-               req->seg[seg].first_sect = off >> XEN_BSHIFT;
-               req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
-               KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
-               KASSERT(req->seg[seg].last_sect < 8);
-               if (__predict_false(xengnt_grant_access(
-                   sc->sc_xbusd->xbusd_otherend_id, ma,
-                   (bp->b_flags & B_READ) == 0, &xbdreq->req_gntref[seg])))
-                       panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! 
*/
-               req->seg[seg].gref = xbdreq->req_gntref[seg];
-               seg++;
-               KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
-               va += PAGE_SIZE;
-               off = 0;
-               bcount -= nbytes;
-       }
-       xbdreq->req_nr_segments = req->nr_segments = seg;
-       sc->sc_ring.req_prod_pvt++;
-       if (bufq_peek(sc->sc_dksc.sc_bufq)) {
-                /* we will be called again; don't notify guest yet */
-               runqueue = 0;
+
+               if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
+                       DPRINTF(("xbdstart: ring_full\n"));
+                       goto out;
+               }
+
+               xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
+               if (__predict_false(xbdreq == NULL)) {
+                       DPRINTF(("xbdstart: no req\n"));
+                       goto out;
+               }
+
+               xbdreq->req_bp = bp;
+               xbdreq->req_data = bp->b_data;
+               if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
+                       if (__predict_false(xbd_map_align(xbdreq) != 0)) {
+                               DPRINTF(("xbdstart: no align\n"));
+                               goto out;
+                       }
+               }
+               /* now we're sure we'll send this buf */
+               (void)bufq_get(dksc->sc_bufq);
+               disk_busy(&dksc->sc_dkdev);
+
+               SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+               req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
+               req->id = xbdreq->req_id;
+               req->operation =
+                   bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
+               req->sector_number = bp->b_rawblkno;
+               req->handle = sc->sc_handle;
+
+               va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
+               off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
+               if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >=
+                   sc->sc_xbdsize) {
+                       bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE;
+                       bp->b_resid = bp->b_bcount - bcount;
+               } else {
+                       bcount = bp->b_bcount;
+                       bp->b_resid = 0;
+               }
+               if (bcount > XBD_MAX_XFER) {
+                       bp->b_resid += bcount - XBD_MAX_XFER;
+                       bcount = XBD_MAX_XFER;
+               }
+               for (seg = 0; bcount > 0;) {
+                       pmap_extract_ma(pmap_kernel(), va, &ma);
+                       KASSERT((ma & (XEN_BSIZE - 1)) == 0);
+                       if (bcount > PAGE_SIZE - off)
+                               nbytes = PAGE_SIZE - off;
+                       else
+                               nbytes = bcount;
+                       nsects = nbytes >> XEN_BSHIFT;
+                       req->seg[seg].first_sect = off >> XEN_BSHIFT;
+                       req->seg[seg].last_sect =
+                           (off >> XEN_BSHIFT) + nsects - 1;
+                       KASSERT(req->seg[seg].first_sect <=
+                           req->seg[seg].last_sect);
+                       KASSERT(req->seg[seg].last_sect < 8);
+                       if (__predict_false(xengnt_grant_access(
+                           sc->sc_xbusd->xbusd_otherend_id, ma,
+                           (bp->b_flags & B_READ) == 0,
+                           &xbdreq->req_gntref[seg])))
+                               panic("xbdstart: xengnt_grant_access"); /* XXX 
XXX !!! */
+                       req->seg[seg].gref = xbdreq->req_gntref[seg];
+                       seg++;
+                       KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                       va += PAGE_SIZE;
+                       off = 0;
+                       bcount -= nbytes;
+               }
+               xbdreq->req_nr_segments = req->nr_segments = seg;
+               sc->sc_ring.req_prod_pvt++;
        }
 
 out:
-       if (runqueue) {
-               RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
-               if (notify)
-                       hypervisor_notify_via_evtchn(sc->sc_evtchn);
-       }
-       
-       return ret;
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
+       if (notify)
+               hypervisor_notify_via_evtchn(sc->sc_evtchn);
+       return;
 
 err:
+       (void)bufq_get(dksc->sc_bufq);
        bp->b_resid = bp->b_bcount;
        biodone(bp);
-       return 0;
+       return;
 }
 
 static int
Index: sys/dev/cgd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/cgd.c,v
retrieving revision 1.85
diff -u -p -u -b -r1.85 cgd.c
--- sys/dev/cgd.c       18 Mar 2014 15:44:37 -0000      1.85
+++ sys/dev/cgd.c       21 May 2014 14:01:10 -0000
@@ -100,7 +100,7 @@ static int cgd_destroy(device_t);
 
 /* Internal Functions */
 
-static int     cgdstart(struct dk_softc *, struct buf *);
+static void    cgdstart(struct dk_softc *);
 static void    cgdiodone(struct buf *);
 
 static int     cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *);
@@ -381,18 +381,20 @@ cgd_putdata(struct dk_softc *dksc, void 
        }
 }
 
-static int
-cgdstart(struct dk_softc *dksc, struct buf *bp)
+static void
+cgdstart(struct dk_softc *dksc)
 {
        struct  cgd_softc *cs = (struct cgd_softc *)dksc;
-       struct  buf *nbp;
+       struct  buf *bp, *nbp;
        void *  addr;
        void *  newaddr;
        daddr_t bn;
        struct  vnode *vp;
 
+       while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
+
        DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp));
-       disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */
+               disk_busy(&dksc->sc_dkdev);
 
        bn = bp->b_rawblkno;
 
@@ -400,17 +402,15 @@ cgdstart(struct dk_softc *dksc, struct b
         * We attempt to allocate all of our resources up front, so that
         * we can fail quickly if they are unavailable.
         */
-
        nbp = getiobuf(cs->sc_tvn, false);
        if (nbp == NULL) {
                disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-               return -1;
+                       break;
        }
 
        /*
         * If we are writing, then we need to encrypt the outgoing
-        * block into a new block of memory.  If we fail, then we
-        * return an error and let the dksubr framework deal with it.
+                * block into a new block of memory.
         */
        newaddr = addr = bp->b_data;
        if ((bp->b_flags & B_READ) == 0) {
@@ -418,12 +418,13 @@ cgdstart(struct dk_softc *dksc, struct b
                if (!newaddr) {
                        putiobuf(nbp);
                        disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-                       return -1;
+                               break;
                }
                cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn,
                    DEV_BSIZE, CGD_CIPHER_ENCRYPT);
        }
-
+               /* we now have all needed resources to process this buf */
+               (void)bufq_get(dksc->sc_bufq);
        nbp->b_data = newaddr;
        nbp->b_flags = bp->b_flags;
        nbp->b_oflags = bp->b_oflags;
@@ -443,7 +444,7 @@ cgdstart(struct dk_softc *dksc, struct b
                mutex_exit(vp->v_interlock);
        }
        VOP_STRATEGY(cs->sc_tvn, nbp);
-       return 0;
+       }
 }
 
 static void
@@ -492,7 +493,7 @@ cgdiodone(struct buf *nbp)
        disk_unbusy(&dksc->sc_dkdev, obp->b_bcount - obp->b_resid,
            (obp->b_flags & B_READ));
        biodone(obp);
-       dk_iodone(di, dksc);
+       cgdstart(dksc);
        splx(s);
 }
 
Index: sys/dev/dksubr.c
===================================================================
RCS file: /cvsroot/src/sys/dev/dksubr.c,v
retrieving revision 1.49
diff -u -p -u -b -r1.49 dksubr.c
--- sys/dev/dksubr.c    28 Dec 2013 19:25:07 -0000      1.49
+++ sys/dev/dksubr.c    21 May 2014 14:01:10 -0000
@@ -224,37 +224,11 @@ dk_strategy(struct dk_intf *di, struct d
         */
        s = splbio();
        bufq_put(dksc->sc_bufq, bp);
-       dk_start(di, dksc);
+       di->di_diskstart(dksc);
        splx(s);
        return;
 }
 
-void
-dk_start(struct dk_intf *di, struct dk_softc *dksc)
-{
-       struct  buf *bp;
-
-       DPRINTF_FOLLOW(("dk_start(%s, %p)\n", di->di_dkname, dksc));
-
-       /* Process the work queue */
-       while ((bp = bufq_get(dksc->sc_bufq)) != NULL) {
-               if (di->di_diskstart(dksc, bp) != 0) {
-                       bufq_put(dksc->sc_bufq, bp);
-                       break;
-               }
-       }
-}
-
-void
-dk_iodone(struct dk_intf *di, struct dk_softc *dksc)
-{
-
-       DPRINTF_FOLLOW(("dk_iodone(%s, %p)\n", di->di_dkname, dksc));
-
-       /* We kick the queue in case we are able to get more work done */
-       dk_start(di, dksc);
-}
-
 int
 dk_size(struct dk_intf *di, struct dk_softc *dksc, dev_t dev)
 {
Index: sys/dev/dkvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/dkvar.h,v
retrieving revision 1.18
diff -u -p -u -b -r1.18 dkvar.h
--- sys/dev/dkvar.h     29 May 2013 23:25:55 -0000      1.18
+++ sys/dev/dkvar.h     21 May 2014 14:01:10 -0000
@@ -74,7 +74,7 @@ struct dk_intf {
        int     (*di_open)(dev_t, int, int, struct lwp *);
        int     (*di_close)(dev_t, int, int, struct lwp *);
        void    (*di_strategy)(struct buf *);
-       int     (*di_diskstart)(struct dk_softc *, struct buf *);
+       void    (*di_diskstart)(struct dk_softc *);
 };
 
 #define DK_BUSY(_dksc, _pmask)                         \
@@ -93,8 +93,6 @@ int   dk_open(struct dk_intf *, struct dk_
 int    dk_close(struct dk_intf *, struct dk_softc *, dev_t,
                 int, int, struct lwp *);
 void   dk_strategy(struct dk_intf *, struct dk_softc *, struct buf *);
-void   dk_start(struct dk_intf *, struct dk_softc *);
-void   dk_iodone(struct dk_intf *, struct dk_softc *);
 int    dk_size(struct dk_intf *, struct dk_softc *, dev_t);
 int    dk_ioctl(struct dk_intf *, struct dk_softc *, dev_t,
                 u_long, void *, int, struct lwp *);
Index: sys/arch/xen/xen/xbd_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xbd_xenbus.c,v
retrieving revision 1.62
diff -u -p -u -b -r1.62 xbd_xenbus.c
--- sys/arch/xen/xen/xbd_xenbus.c       16 Mar 2014 05:20:26 -0000      1.62
+++ sys/arch/xen/xen/xbd_xenbus.c       21 May 2014 14:01:10 -0000
@@ -168,7 +168,7 @@ static bool xbd_xenbus_suspend(device_t,
 static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
 
 static int  xbd_handler(void *);
-static int  xbdstart(struct dk_softc *, struct buf *);
+static void xbdstart(struct dk_softc *);
 static void xbd_backend_changed(void *, XenbusState);
 static void xbd_connect(struct xbd_xenbus_softc *);
 
@@ -722,9 +722,10 @@ done:
        if (more_to_do)
                goto again;
 
-       dk_iodone(di, &sc->sc_dksc);
        if (sc->sc_xbdreq_wait)
                wakeup(&sc->sc_xbdreq_wait);
+       else
+               xbdstart(&sc->sc_dksc);
        return 1;
 }
 
@@ -926,20 +927,23 @@ xbddump(dev_t dev, daddr_t blkno, void *
        return dk_dump(di, &sc->sc_dksc, dev, blkno, va, size);
 }
 
-static int
-xbdstart(struct dk_softc *dksc, struct buf *bp)
+static void
+xbdstart(struct dk_softc *dksc)
 {
        struct xbd_xenbus_softc *sc = (struct xbd_xenbus_softc *)dksc;
+       struct buf *bp;
        struct xbd_req *xbdreq;
        blkif_request_t *req;
-       int ret = 0, runqueue = 1;
        size_t bcount, off;
        paddr_t ma;
        vaddr_t va;
        int nsects, nbytes, seg;
        int notify;
 
-       DPRINTF(("xbdstart(%p): b_bcount = %ld\n", bp, (long)bp->b_bcount));
+       while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
+
+               DPRINTF(("xbdstart(%p): b_bcount = %ld\n",
+                   bp, (long)bp->b_bcount));
 
        if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
                bp->b_error = EIO;
@@ -955,28 +959,27 @@ xbdstart(struct dk_softc *dksc, struct b
        if (bp->b_rawblkno == sc->sc_xbdsize) {
                /* at end of disk; return short read */
                bp->b_resid = bp->b_bcount;
+                       (void)bufq_get(dksc->sc_bufq);
                biodone(bp);
-               return 0;
+                       continue;
        }
 
-       if (__predict_false(sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
+               if (__predict_false(
+                   sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
                /* device is suspended, do not consume buffer */
                DPRINTF(("%s: (xbdstart) device suspended\n",
                    device_xname(sc->sc_dksc.sc_dev)));
-               ret = -1;
                goto out;
        }
 
        if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
                DPRINTF(("xbdstart: ring_full\n"));
-               ret = -1;
                goto out;
        }
 
        xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
        if (__predict_false(xbdreq == NULL)) {
                DPRINTF(("xbdstart: no req\n"));
-               ret = -1; /* dk_start should not remove bp from queue */
                goto out;
        }
 
@@ -984,22 +987,26 @@ xbdstart(struct dk_softc *dksc, struct b
        xbdreq->req_data = bp->b_data;
        if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
                if (__predict_false(xbd_map_align(xbdreq) != 0)) {
-                       ret = -1;
+                               DPRINTF(("xbdstart: no align\n"));
                        goto out;
                }
        }
        /* now we're sure we'll send this buf */
+               (void)bufq_get(dksc->sc_bufq);
        disk_busy(&dksc->sc_dkdev);
+
        SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
        req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
        req->id = xbdreq->req_id;
-       req->operation = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
+               req->operation =
+                   bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
        req->sector_number = bp->b_rawblkno;
        req->handle = sc->sc_handle;
 
        va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
        off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
-       if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >= sc->sc_xbdsize) {
+               if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >=
+                   sc->sc_xbdsize) {
                bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE;
                bp->b_resid = bp->b_bcount - bcount;
        } else {
@@ -1019,12 +1026,15 @@ xbdstart(struct dk_softc *dksc, struct b
                        nbytes = bcount;
                nsects = nbytes >> XEN_BSHIFT;
                req->seg[seg].first_sect = off >> XEN_BSHIFT;
-               req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
-               KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
+                       req->seg[seg].last_sect =
+                           (off >> XEN_BSHIFT) + nsects - 1;
+                       KASSERT(req->seg[seg].first_sect <=
+                           req->seg[seg].last_sect);
                KASSERT(req->seg[seg].last_sect < 8);
                if (__predict_false(xengnt_grant_access(
                    sc->sc_xbusd->xbusd_otherend_id, ma,
-                   (bp->b_flags & B_READ) == 0, &xbdreq->req_gntref[seg])))
+                           (bp->b_flags & B_READ) == 0,
+                           &xbdreq->req_gntref[seg])))
                        panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! 
*/
                req->seg[seg].gref = xbdreq->req_gntref[seg];
                seg++;
@@ -1035,24 +1045,19 @@ xbdstart(struct dk_softc *dksc, struct b
        }
        xbdreq->req_nr_segments = req->nr_segments = seg;
        sc->sc_ring.req_prod_pvt++;
-       if (bufq_peek(sc->sc_dksc.sc_bufq)) {
-                /* we will be called again; don't notify guest yet */
-               runqueue = 0;
        }
 
 out:
-       if (runqueue) {
                RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
                if (notify)
                        hypervisor_notify_via_evtchn(sc->sc_evtchn);
-       }
-       
-       return ret;
+       return;
 
 err:
+       (void)bufq_get(dksc->sc_bufq);
        bp->b_resid = bp->b_bcount;
        biodone(bp);
-       return 0;
+       return;
 }
 
 static int


Home | Main Index | Thread Index | Old Index