Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen xbdback: implement and publish "feature-flu...



details:   https://anonhg.NetBSD.org/src/rev/67289c8a7549
branches:  trunk
changeset: 747603:67289c8a7549
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Wed Sep 23 17:48:55 2009 +0000

description:
xbdback: implement and publish "feature-flush-cache".
xbd: if feature-flush-cache is present, use it for DIOCCACHESYNC.
 If not present, make DIOCCACHESYNC return EOPNOTSUPP and warn on
 first call.
Should improve WAPBL reliability of Xen guests on a NetBSD dom0.
Unfortunably not all linux guests seems to support this feature, and using
feature-write-barrier would require a B_BARRIER flag in the buffer.

diffstat:

 sys/arch/xen/xen/xbd_xenbus.c     |  108 +++++++++++++++++++++++--
 sys/arch/xen/xen/xbdback_xenbus.c |  159 +++++++++++++++++++++++++++++++++++--
 2 files changed, 247 insertions(+), 20 deletions(-)

diffs (truncated from 461 to 300 lines):

diff -r d7ec97666d10 -r 67289c8a7549 sys/arch/xen/xen/xbd_xenbus.c
--- a/sys/arch/xen/xen/xbd_xenbus.c     Wed Sep 23 11:17:58 2009 +0000
+++ b/sys/arch/xen/xen/xbd_xenbus.c     Wed Sep 23 17:48:55 2009 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbd_xenbus.c,v 1.42 2009/09/21 21:59:30 bouyer Exp $      */
+/*      $NetBSD: xbd_xenbus.c,v 1.43 2009/09/23 17:48:55 bouyer Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.42 2009/09/21 21:59:30 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.43 2009/09/23 17:48:55 bouyer Exp $");
 
 #include "opt_xen.h"
 #include "rnd.h"
@@ -84,11 +84,24 @@
 struct xbd_req {
        SLIST_ENTRY(xbd_req) req_next;
        uint16_t req_id; /* ID passed to backend */
-       grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       int req_nr_segments; /* number of segments in this request */
-       struct buf *req_bp; /* buffer associated with this request */
-       void *req_data; /* pointer to the data buffer */
+       union {
+           struct {
+               grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+               int req_nr_segments; /* number of segments in this request */
+               struct buf *req_bp; /* buffer associated with this request */
+               void *req_data; /* pointer to the data buffer */
+           } req_rw;
+           struct {
+                   int s_error;
+                   volatile int s_done;
+           } req_sync;
+       } u;
 };
+#define req_gntref     u.req_rw.req_gntref
+#define req_nr_segments        u.req_rw.req_nr_segments
+#define req_bp         u.req_rw.req_bp
+#define req_data       u.req_rw.req_data
+#define req_sync       u.req_sync
 
 struct xbd_xenbus_softc {
        device_t sc_dev;
@@ -104,6 +117,7 @@
 
        struct xbd_req sc_reqs[XBD_RING_SIZE];
        SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
+       bool sc_xbdreq_wait; /* special waiting on xbd_req */
 
        int sc_backend_status; /* our status with backend */
 #define BLKIF_STATE_DISCONNECTED 0
@@ -119,6 +133,7 @@
        uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
        u_long sc_info; /* VDISK_* */
        u_long sc_handle; /* from backend */
+       int sc_cache_flush; /* backend supports BLKIF_OP_FLUSH_DISKCACHE */
 #if NRND > 0
        rndsource_element_t     sc_rnd_source;
 #endif
@@ -518,6 +533,7 @@
 {
        int err;
        unsigned long long sectors;
+       u_long cache_flush;
 
        err = xenbus_read_ul(NULL,
            sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
@@ -541,6 +557,14 @@
        if (err)
                panic("%s: can't read number from %s/sector-size\n", 
                    device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
+       err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+           "feature-flush-cache", &cache_flush, 10);
+       if (err)
+               cache_flush = 0;
+       if (cache_flush > 0)
+               sc->sc_cache_flush = 1;
+       else
+               sc->sc_cache_flush = 0;
 
        xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
 }
@@ -564,9 +588,16 @@
        for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
                blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
                struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
+               DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
+                   xbdreq->req_bp, (long)bp->b_bcount));
                bp = xbdreq->req_bp;
-               DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
-                   bp, (long)bp->b_bcount));
+               if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
+                       xbdreq->req_sync.s_error = rep->status;
+                       xbdreq->req_sync.s_done = 1;
+                       wakeup(xbdreq);
+                       /* caller will free the req */
+                       continue;
+               }
                for (seg = xbdreq->req_nr_segments - 1; seg >= 0; seg--) {
                        if (__predict_false(
                            xengnt_status(xbdreq->req_gntref[seg]))) {
@@ -608,13 +639,15 @@
                biodone(bp);
                SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
        }
+done:
        xen_rmb();
        sc->sc_ring.rsp_cons = i;
        RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
        if (more_to_do)
                goto again;
-done:
        dk_iodone(sc->sc_di, &sc->sc_dksc);
+       if (sc->sc_xbdreq_wait)
+               wakeup(&sc->sc_xbdreq_wait);
        return 1;
 }
 
@@ -717,6 +750,10 @@
        struct  dk_softc *dksc;
        int     error;
        struct  disk *dk;
+       int s;
+       struct xbd_req *xbdreq;
+       blkif_request_t *req;
+       int notify;
 
        DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
            dev, cmd, data, flag, l));
@@ -731,6 +768,57 @@
        case DIOCSSTRATEGY:
                error = EOPNOTSUPP;
                break;
+       case DIOCCACHESYNC:
+               if (sc->sc_cache_flush <= 0) {
+                       if (sc->sc_cache_flush == 0) {
+                               aprint_error_dev(sc->sc_dev,
+                                   "WARNING: cache flush not supported "
+                                   "by backend\n");
+                               sc->sc_cache_flush = -1;
+                       }
+                       return EOPNOTSUPP;
+               }
+
+               s = splbio();
+
+               while (RING_FULL(&sc->sc_ring)) {
+                       sc->sc_xbdreq_wait = 1;
+                       tsleep(&sc->sc_xbdreq_wait, PRIBIO, "xbdreq", 0);
+               }
+               sc->sc_xbdreq_wait = 0;
+
+               xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
+               if (__predict_false(xbdreq == NULL)) {
+                       DPRINTF(("xbdioctl: no req\n"));
+                       error = ENOMEM;
+               } else {
+                       SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+                       req = RING_GET_REQUEST(&sc->sc_ring,
+                           sc->sc_ring.req_prod_pvt);
+                       req->id = xbdreq->req_id;
+                       req->operation = BLKIF_OP_FLUSH_DISKCACHE;
+                       req->handle = sc->sc_handle;
+                       xbdreq->req_sync.s_done = 0;
+                       sc->sc_ring.req_prod_pvt++;
+                       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring,
+                           notify);
+                       if (notify)
+                               hypervisor_notify_via_evtchn(sc->sc_evtchn);
+                       /* request sent, no wait for completion */
+                       while (xbdreq->req_sync.s_done == 0) {
+                               tsleep(xbdreq, PRIBIO, "xbdsync", 0);
+                       }
+                       if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
+                               error = EOPNOTSUPP;
+                       else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
+                               error = 0;
+                       else
+                               error = EIO;
+                       SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
+                           req_next);
+               }
+               splx(s);
+               break;
        default:
                error = dk_ioctl(sc->sc_di, dksc, dev, cmd, data, flag, l);
                break;
@@ -788,7 +876,7 @@
        }
                
 
-       if (RING_FULL(&sc->sc_ring)) {
+       if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
                DPRINTF(("xbdstart: ring_full\n"));
                ret = -1;
                goto out;
diff -r d7ec97666d10 -r 67289c8a7549 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Wed Sep 23 11:17:58 2009 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Wed Sep 23 17:48:55 2009 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback_xenbus.c,v 1.24 2009/01/21 09:55:53 cegger Exp $      */
+/*      $NetBSD: xbdback_xenbus.c,v 1.25 2009/09/23 17:48:55 bouyer Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.24 2009/01/21 09:55:53 cegger Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.25 2009/09/23 17:48:55 bouyer Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -91,6 +91,31 @@
  * it's finished, set xbdi->xbdi_cont (see below) to NULL and the return
  * doesn't matter.  Otherwise it's passed as the second parameter to
  * the new value of xbdi->xbdi_cont.
+ * Here's how the call graph is supposed to be for a single I/O:
+ * xbdback_co_main()   
+ *        |           |-> xbdback_co_cache_doflush() -> stall
+ *        |          xbdback_co_cache_flush2() <-  xbdback_co_flush_done() <-
+ *        |                              |                                   |
+ *        |              |-> xbdback_co_cache_flush() -> xbdback_co_flush() --
+ * xbdback_co_main_loop() -> xbdback_co_main_done() -> xbdback_co_flush()
+ *        |                              |                      |
+ *        |                  xbdback_co_main_done2() <- xbdback_co_flush_done()
+ *        |                              |
+ *        |                  xbdback_co_main() or NULL
+ *   xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
+ *        |
+ *   xbdback_co_io_gotreq() -> xbdback_co_flush() -> xbdback_co_flush()
+ *        |                |                                |
+ *   xbdback_co_io_loop() ---        <---------------- xbdback_co_flush_done()
+ *        |                 |
+ *   xbdback_co_io_gotio()  |
+ *        |                 |
+ *   xbdback_co_io_gotio2()<-
+ *        |              |-------->  xbdback_co_io_gotfrag
+ *        |                              |
+ *   xbdback_co_io_gotfrag2() <----------|
+ *        |                 |--> xbdback_co_io_loop()
+ *   xbdback_co_main_incr()
  */
 typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
 
@@ -144,6 +169,7 @@
        grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
        /* other state */
        int xbdi_same_page; /* are we merging two segments on the same page? */
+       uint xbdi_pendingreqs; /* number of I/O in fly */
 };
 /* Manipulation of the above reference count. */
 /* XXXjld%panix.com@localhost: not MP-safe, and move the i386 asm elsewhere. */
@@ -180,16 +206,35 @@
  */
 struct xbdback_io {
        struct work xio_work;
-       struct buf xio_buf; /* our I/O */
        /* The instance pointer is duplicated for convenience. */
        struct xbdback_instance *xio_xbdi; /* our xbd instance */
-       SLIST_HEAD(, xbdback_fragment) xio_rq; /* xbd requests involved */
-       vaddr_t xio_vaddr; /* the virtual address to map the request at */
-       grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST]; /* grants to map */
-       grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];/* grants release */
-       uint16_t xio_nrma; /* number of guest pages */
-       uint16_t xio_mapped;
+       uint8_t xio_operation;
+       union {
+               struct {
+                       struct buf xio_buf; /* our I/O */
+                       /* xbd requests involved */
+                       SLIST_HEAD(, xbdback_fragment) xio_rq;
+                       /* the virtual address to map the request at */
+                       vaddr_t xio_vaddr;
+                       /* grants to map */
+                       grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
+                       /* grants release */
+                       grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
+                       uint16_t xio_nrma; /* number of guest pages */
+                       uint16_t xio_mapped;
+               } xio_rw;
+               uint64_t xio_flush_id;
+       } u;
 };
+#define xio_buf                u.xio_rw.xio_buf
+#define xio_rq         u.xio_rw.xio_rq
+#define xio_vaddr      u.xio_rw.xio_vaddr
+#define xio_gref       u.xio_rw.xio_gref
+#define xio_gh         u.xio_rw.xio_gh
+#define xio_nrma       u.xio_rw.xio_nrma
+#define xio_mapped     u.xio_rw.xio_mapped
+
+#define xio_flush_id   u.xio_flush_id
 
 /*



Home | Main Index | Thread Index | Old Index