Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen if backend supports it, use indirect segmen...



details:   https://anonhg.NetBSD.org/src/rev/c29d699eaf19
branches:  trunk
changeset: 971173:c29d699eaf19
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Thu Apr 16 16:38:43 2020 +0000

description:
if backend supports it, use indirect segment for I/O requests over 32KB

there is some overhead for backend without persistent mappings
(backend needs to map the extra indirect segment page), but this
makes it possible to queue more I/O if using 64KB block size

tested on NetBSD/amd64 DomU against Linux/amd64 Dom0

diffstat:

 sys/arch/xen/xen/xbd_xenbus.c |  162 +++++++++++++++++++++++++++++++++++++++--
 1 files changed, 152 insertions(+), 10 deletions(-)

diffs (truncated from 344 to 300 lines):

diff -r 00efc2bdf04a -r c29d699eaf19 sys/arch/xen/xen/xbd_xenbus.c
--- a/sys/arch/xen/xen/xbd_xenbus.c     Thu Apr 16 15:58:13 2020 +0000
+++ b/sys/arch/xen/xen/xbd_xenbus.c     Thu Apr 16 16:38:43 2020 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbd_xenbus.c,v 1.115 2020/04/16 09:51:40 jdolecek Exp $      */
+/*      $NetBSD: xbd_xenbus.c,v 1.116 2020/04/16 16:38:43 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.115 2020/04/16 09:51:40 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.116 2020/04/16 16:38:43 jdolecek Exp $");
 
 #include "opt_xen.h"
 
@@ -103,6 +103,12 @@
 CTASSERT((MAXPHYS <= 2*XBD_MAX_CHUNK));
 CTASSERT(XEN_BSIZE == DEV_BSIZE);
 
+struct xbd_indirect {
+       SLIST_ENTRY(xbd_indirect) in_next;
+       struct blkif_request_segment *in_addr;
+       grant_ref_t in_gntref;
+};
+
 struct xbd_req {
        SLIST_ENTRY(xbd_req) req_next;
        uint16_t req_id; /* ID passed to backend */
@@ -114,6 +120,7 @@
                grant_ref_t req_gntref[XBD_XFER_LIMIT >> PAGE_SHIFT];
                struct buf *req_bp; /* buffer associated with this request */
                void *req_data; /* pointer to the data buffer */
+               struct xbd_indirect *req_indirect;      /* indirect page */
            } req_rw;
            struct {
                int s_error;
@@ -124,6 +131,7 @@
 #define req_gntref     u.req_rw.req_gntref
 #define req_bp         u.req_rw.req_bp
 #define req_data       u.req_rw.req_data
+#define req_indirect   u.req_rw.req_indirect
 #define req_sync       u.req_sync
 
 struct xbd_xenbus_softc {
@@ -144,6 +152,9 @@
        struct xbd_req sc_reqs[XBD_RING_SIZE];
        SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
 
+       struct xbd_indirect sc_indirect[XBD_RING_SIZE];
+       SLIST_HEAD(,xbd_indirect) sc_indirect_head;
+
        vmem_addr_t sc_unalign_buffer;
        struct xbd_req *sc_unalign_used;
 
@@ -166,11 +177,13 @@
 #define BLKIF_FEATURE_CACHE_FLUSH      0x1
 #define BLKIF_FEATURE_BARRIER          0x2
 #define BLKIF_FEATURE_PERSISTENT       0x4
+#define BLKIF_FEATURE_INDIRECT         0x8
 #define BLKIF_FEATURE_BITS             \
-       "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT"
+       "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT\4INDIRECT"
        struct evcnt sc_cnt_map_unalign;
        struct evcnt sc_cnt_unalign_busy;
        struct evcnt sc_cnt_queue_full;
+       struct evcnt sc_cnt_indirect;
 };
 
 #if 0
@@ -191,9 +204,12 @@
 static void xbd_iosize(device_t, int *);
 static void xbd_backend_changed(void *, XenbusState);
 static void xbd_connect(struct xbd_xenbus_softc *);
+static void xbd_features(struct xbd_xenbus_softc *);
 
 static void xbd_diskstart_submit(struct xbd_xenbus_softc *, int,
        struct buf *bp, int, bus_dmamap_t, grant_ref_t *);
+static void xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *,
+       struct xbd_req *, struct buf *bp);
 static int  xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *);
 static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *, bool);
 
@@ -293,6 +309,8 @@
        cv_init(&sc->sc_detach_cv, "xbddetach");
        cv_init(&sc->sc_suspend_cv, "xbdsuspend");
 
+       xbd_features(sc);
+
        /* initialize free requests list */
        SLIST_INIT(&sc->sc_xbdreq_head);
        for (i = 0; i < XBD_RING_SIZE; i++) {
@@ -301,6 +319,22 @@
                    req_next);
        }
 
+       if (sc->sc_features & BLKIF_FEATURE_INDIRECT) {
+               /* initialize indirect page list */
+               for (i = 0; i < XBD_RING_SIZE; i++) {
+                       vmem_addr_t va;
+                       if (uvm_km_kmem_alloc(kmem_va_arena,
+                           PAGE_SIZE, VM_SLEEP | VM_INSTANTFIT, &va) != 0) {
+                               aprint_error_dev(self,
+                                   "can't alloc indirect pages\n");
+                               return;
+                       }
+                       sc->sc_indirect[i].in_addr = (void *)va;
+                       SLIST_INSERT_HEAD(&sc->sc_indirect_head,
+                           &sc->sc_indirect[i], in_next);
+               }
+       }
+
        sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
        sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
 
@@ -315,6 +349,8 @@
            NULL, device_xname(self), "map unaligned");
        evcnt_attach_dynamic(&sc->sc_cnt_queue_full, EVCNT_TYPE_MISC,
            NULL, device_xname(self), "queue full");
+       evcnt_attach_dynamic(&sc->sc_cnt_indirect, EVCNT_TYPE_MISC,
+           NULL, device_xname(self), "indirect segment");
 
        for (i = 0; i < XBD_RING_SIZE; i++) {
                if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
@@ -341,7 +377,6 @@
 
        if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");
-
 }
 
 static int
@@ -441,6 +476,7 @@
        evcnt_detach(&sc->sc_cnt_map_unalign);
        evcnt_detach(&sc->sc_cnt_unalign_busy);
        evcnt_detach(&sc->sc_cnt_queue_full);
+       evcnt_detach(&sc->sc_cnt_indirect);
 
        pmf_device_deregister(dev);
 
@@ -509,6 +545,22 @@
        if (error)
                goto abort_resume;
 
+       if (sc->sc_features & BLKIF_FEATURE_INDIRECT) {
+               for (int i = 0; i < XBD_RING_SIZE; i++) {
+                       vaddr_t va = (vaddr_t)sc->sc_indirect[i].in_addr;
+                       KASSERT(va != 0);
+                       KASSERT((va & PAGE_MASK) == 0);
+                       (void)pmap_extract_ma(pmap_kernel(), va, &ma);
+                       if (xengnt_grant_access(
+                           sc->sc_xbusd->xbusd_otherend_id,
+                           ma, true, &sc->sc_indirect[i].in_gntref)) {
+                               aprint_error_dev(dev,
+                                   "indirect page grant failed\n");
+                               goto abort_resume;
+                       }
+               }
+       }
+
        error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
        if (error)
                goto abort_resume;
@@ -581,7 +633,7 @@
        struct xbd_xenbus_softc *sc = device_private((device_t)arg);
        struct disk_geom *dg;
 
-       char buf[32];
+       char buf[64];
        DPRINTF(("%s: new backend state %d\n",
            device_xname(sc->sc_dksc.sc_dev), new_state));
 
@@ -662,7 +714,6 @@
 {
        int err;
        unsigned long long sectors;
-       u_long val;
 
        err = xenbus_read_ul(NULL,
            sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
@@ -691,6 +742,15 @@
                    device_xname(sc->sc_dksc.sc_dev),
                    sc->sc_xbusd->xbusd_otherend);
 
+       xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
+}
+
+static void
+xbd_features(struct xbd_xenbus_softc *sc)
+{
+       int err;
+       u_long val;
+
        err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
            "feature-flush-cache", &val, 10);
        if (err)
@@ -712,7 +772,14 @@
        if (val > 0)
                sc->sc_features |= BLKIF_FEATURE_PERSISTENT;
 
-       xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
+       err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+           "feature-max-indirect-segments", &val, 10);
+       if (err)
+               val = 0;
+       if (val > (MAXPHYS >> PAGE_SHIFT)) {
+               /* We can use indirect segments, the limit is big enough */
+               sc->sc_features |= BLKIF_FEATURE_INDIRECT;
+       }
 }
 
 static int
@@ -723,6 +790,7 @@
        RING_IDX resp_prod, i;
        int more_to_do;
        int seg;
+       grant_ref_t gntref;
 
        DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev)));
 
@@ -795,8 +863,13 @@
                         * expect the backend to release the grant
                         * immediately.
                         */
-                       KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0);
-                       xengnt_revoke_access(xbdreq->req_gntref[seg]);
+                       if (xbdreq->req_indirect) {
+                               gntref =
+                                   xbdreq->req_indirect->in_addr[seg].gref;
+                       } else
+                               gntref = xbdreq->req_gntref[seg];
+                       KASSERT(xengnt_status(gntref) == 0);
+                       xengnt_revoke_access(gntref);
                }
 
                bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap);
@@ -807,6 +880,16 @@
 
                dk_done(&sc->sc_dksc, bp);
 
+               if (xbdreq->req_indirect) {
+                       /* No persistent mappings, so check that
+                        * backend unmapped the indirect segment grant too.
+                        */
+                       KASSERT(xengnt_status(xbdreq->req_indirect->in_gntref)
+                           == 0);
+                       SLIST_INSERT_HEAD(&sc->sc_indirect_head,
+                           xbdreq->req_indirect, in_next);
+                       xbdreq->req_indirect = NULL;
+               }
                SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
        }
 
@@ -1064,7 +1147,8 @@
                goto out;
        }
 
-       if (bp->b_bcount > XBD_MAX_CHUNK) {
+       if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) == 0
+           && bp->b_bcount > XBD_MAX_CHUNK) {
                if (!SLIST_NEXT(xbdreq, req_next)) {
                        DPRINTF(("%s: need extra req\n", __func__));
                        error = EAGAIN;
@@ -1124,6 +1208,13 @@
 
        /* We are now committed to the transfer */
        SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+
+       if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) != 0 &&
+           bp->b_bcount > XBD_MAX_CHUNK) {
+               xbd_diskstart_submit_indirect(sc, xbdreq, bp);
+               goto push;
+       }
+
        xbd_diskstart_submit(sc, xbdreq->req_id,
            bp, 0, xbdreq->req_dmamap, xbdreq->req_gntref);
 
@@ -1141,6 +1232,7 @@
                    xbdreq->req_gntref);
        }
 
+push:
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
        if (notify)
                hypervisor_notify_via_evtchn(sc->sc_evtchn);
@@ -1204,6 +1296,56 @@
        sc->sc_ring.req_prod_pvt++;
 }
 
+static void
+xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *sc,
+    struct xbd_req *xbdreq, struct buf *bp)
+{
+       blkif_request_indirect_t *req;
+       paddr_t ma;
+       int nsects, nbytes, dmaseg, first_sect;
+       struct blkif_request_segment *reqseg;
+



Home | Main Index | Thread Index | Old Index