Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen add support for indirect segments, which ma...



details:   https://anonhg.NetBSD.org/src/rev/a97b64227c67
branches:  trunk
changeset: 931071:a97b64227c67
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Tue Apr 21 13:56:18 2020 +0000

description:
add support for indirect segments, which makes it possible to pass
up to MAXPHYS (implementation limit, interface allows more) using
single request

request using indirect segment requires 1 extra copy hypercall per
request, but saves 2 shared memory hypercalls (map_grant/unmap_grant),
so should be net performance boost due to less TLB flushing

this also effectively doubles disk queue size for xbd(4)

diffstat:

 sys/arch/xen/xen/xbdback_xenbus.c |  224 +++++++++++++++++++++++++++++--------
 1 files changed, 176 insertions(+), 48 deletions(-)

diffs (truncated from 383 to 300 lines):

diff -r 600bb71577f6 -r a97b64227c67 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Tue Apr 21 13:39:26 2020 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Tue Apr 21 13:56:18 2020 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback_xenbus.c,v 1.85 2020/04/20 19:29:09 jdolecek Exp $      */
+/*      $NetBSD: xbdback_xenbus.c,v 1.86 2020/04/21 13:56:18 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.85 2020/04/20 19:29:09 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.86 2020/04/21 13:56:18 jdolecek Exp $");
 
 #include <sys/atomic.h>
 #include <sys/buf.h>
@@ -73,6 +73,9 @@
 
 /* Need to alloc one extra page to account for possible mapping offset */
 #define VBD_VA_SIZE    (MAXPHYS + PAGE_SIZE)
+#define VBD_MAX_INDIRECT_SEGMENTS      VBD_VA_SIZE >> PAGE_SHIFT
+
+CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
 
 struct xbdback_io;
 struct xbdback_instance;
@@ -179,8 +182,10 @@
        RING_IDX xbdi_req_prod; /* limit on request indices */
        xbdback_cont_t xbdi_cont;
        /* _request state: track requests fetched from ring */
-       struct xbdback_request *xbdi_req; /* if NULL, ignore following */
        blkif_request_t xbdi_xen_req;
+       struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS];
+       bus_dmamap_t xbdi_seg_dmamap;
+       grant_ref_t xbdi_in_gntref;
        /* _io state: I/O associated to this instance */
        struct xbdback_io *xbdi_io;
        /* other state */
@@ -221,9 +226,9 @@
                        struct xbdback_va *xio_xv;
                        vaddr_t xio_start_offset;       /* I/O start offset */
                        /* grants to map */
-                       grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
+                       grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
                        /* grants release */
-                       grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
+                       grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
                        uint16_t xio_nrma; /* number of guest pages */
                } xio_rw;
        } u;
@@ -374,6 +379,22 @@
        xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
        xbdi->xbdi_xbusd = xbusd;
 
+       if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE,
+           1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
+           &xbdi->xbdi_seg_dmamap) != 0) {
+               printf("%s: can't create dma map for indirect segments\n",
+                   xbdi->xbdi_name);
+               goto fail;
+       }
+       if (bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
+           xbdi->xbdi_seg_dmamap, xbdi->xbdi_seg,
+           sizeof(xbdi->xbdi_seg), NULL, BUS_DMA_WAITOK) != 0) {
+               printf("%s: can't load dma map for indirect segments\n",
+                   xbdi->xbdi_name);
+               goto fail;
+       }
+       KASSERT(xbdi->xbdi_seg_dmamap->dm_nsegs == 1);
+
        SLIST_INIT(&xbdi->xbdi_va_free);
        for (i = 0; i < BLKIF_RING_SIZE; i++) {
                xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
@@ -457,6 +478,9 @@
                }
        }
 
+       bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
+       bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
+
        mutex_destroy(&xbdi->xbdi_lock);
        cv_destroy(&xbdi->xbdi_cv);
        kmem_free(xbdi, sizeof(*xbdi));
@@ -804,6 +828,13 @@
                    xbusd->xbusd_path, err);
                goto abort;
        }
+       err = xenbus_printf(xbt, xbusd->xbusd_path,
+           "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
+       if (err) {
+               printf("xbdback: failed to write %s/feature-indirect: %d\n",
+                   xbusd->xbusd_path, err);
+               goto abort;
+       }
        err = xenbus_transaction_end(xbt, 0);
        if (err == EAGAIN)
                goto again;
@@ -936,39 +967,35 @@
  * the ring.
  */
 static void *
-xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj) 
+xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused) 
 {
-       blkif_request_t *req;
+       blkif_request_t *req, *reqn;
        blkif_x86_32_request_t *req32;
        blkif_x86_64_request_t *req64;
+       blkif_request_indirect_t *rin;
 
-       (void)obj;
-       req = &xbdi->xbdi_xen_req;
        if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
+               req = &xbdi->xbdi_xen_req;
+               memset(req, 0, sizeof(*req));
+
                switch(xbdi->xbdi_proto) {
                case XBDIP_NATIVE:
-                       memcpy(req, RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
-                           xbdi->xbdi_ring.ring_n.req_cons),
-                           sizeof(blkif_request_t));
+                       reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
+                           xbdi->xbdi_ring.ring_n.req_cons);
+                       req->operation = reqn->operation;
+                       req->id = reqn->id;
                        break;
                case XBDIP_32:
                        req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
                            xbdi->xbdi_ring.ring_n.req_cons);
                        req->operation = req32->operation;
-                       req->nr_segments = req32->nr_segments;
-                       req->handle = req32->handle;
                        req->id = req32->id;
-                       req->sector_number = req32->sector_number;
                        break;
-                           
                case XBDIP_64:
                        req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
                            xbdi->xbdi_ring.ring_n.req_cons);
                        req->operation = req64->operation;
-                       req->nr_segments = req64->nr_segments;
-                       req->handle = req64->handle;
                        req->id = req64->id;
-                       req->sector_number = req64->sector_number;
                        break;
                }
                __insn_barrier();
@@ -978,7 +1005,23 @@
                        xbdi->xbdi_req_prod,
                        xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
                        req->id));
-               switch(req->operation) {
+               switch (req->operation) {
+               case BLKIF_OP_INDIRECT:
+                       /* just check indirect_op, rest is handled later */
+                       rin = (blkif_request_indirect_t *)
+                           RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
+                               xbdi->xbdi_ring.ring_n.req_cons);
+                       if (rin->indirect_op != BLKIF_OP_READ &&
+                           rin->indirect_op != BLKIF_OP_WRITE) {
+                               if (ratecheck(&xbdi->xbdi_lasterr_time,
+                                   &xbdback_err_intvl)) {
+                                       printf("%s: unknown ind operation %d\n",
+                                           xbdi->xbdi_name,
+                                           rin->indirect_op);
+                               }
+                               goto fail;
+                       }
+                       /* FALLTHROUGH */
                case BLKIF_OP_READ:
                case BLKIF_OP_WRITE:
                        xbdi->xbdi_cont = xbdback_co_io;
@@ -993,6 +1036,7 @@
                                printf("%s: unknown operation %d\n",
                                    xbdi->xbdi_name, req->operation);
                        }
+fail:
                        xbdback_send_reply(xbdi, req->id, req->operation,
                            BLKIF_RSP_ERROR);
                        xbdi->xbdi_cont = xbdback_co_main_incr;
@@ -1046,6 +1090,7 @@
 {
        int work_to_do;
 
+       KASSERT(xbdi->xbdi_io == NULL);
        RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
        if (work_to_do)
                xbdi->xbdi_cont = xbdback_co_main;
@@ -1094,31 +1139,22 @@
  * then get the segment information directly from the ring request.
  */
 static void *
-xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
+xbdback_co_io(struct xbdback_instance *xbdi, void *obj __unused)
 {      
        int i, error;
-       blkif_request_t *req;
+       blkif_request_t *req, *reqn;
        blkif_x86_32_request_t *req32;
        blkif_x86_64_request_t *req64;
+       blkif_request_indirect_t *rinn;
+       blkif_x86_32_request_indirect_t *rin32;
+       blkif_x86_64_request_indirect_t *rin64;
 
-       (void)obj;
+       req = &xbdi->xbdi_xen_req;
 
        /* some sanity checks */
-       req = &xbdi->xbdi_xen_req;
-       if (req->nr_segments < 1 ||
-           req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-               if (ratecheck(&xbdi->xbdi_lasterr_time,
-                   &xbdback_err_intvl)) {
-                       printf("%s: invalid number of segments: %d\n",
-                              xbdi->xbdi_name,
-                              xbdi->xbdi_xen_req.nr_segments);
-               }
-               error = EINVAL;
-               goto end;
-       }
-
        KASSERT(req->operation == BLKIF_OP_READ ||
-           req->operation == BLKIF_OP_WRITE);
+           req->operation == BLKIF_OP_WRITE ||
+           req->operation == BLKIF_OP_INDIRECT);
        if (req->operation == BLKIF_OP_WRITE) {
                if (xbdi->xbdi_ro) {
                        error = EROFS;
@@ -1127,28 +1163,90 @@
        }
 
        /* copy request segments */
-       switch(xbdi->xbdi_proto) {
+       switch (xbdi->xbdi_proto) {
        case XBDIP_NATIVE:
-               /* already copied in xbdback_co_main_loop */
+               reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
+                   xbdi->xbdi_ring.ring_n.req_cons);
+               req->handle = reqn->handle;
+               req->sector_number = reqn->sector_number;
+               if (reqn->operation == BLKIF_OP_INDIRECT) {
+                       rinn = (blkif_request_indirect_t *)reqn;
+                       req->operation = rinn->indirect_op;
+                       req->nr_segments = (uint8_t)rinn->nr_segments;
+                       if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
+                               goto bad_nr_segments;
+                       xbdi->xbdi_in_gntref = rinn->indirect_grefs[0];
+                       /* first_sect and segment grefs fetched later */
+               } else {
+                       req->nr_segments = reqn->nr_segments;
+                       if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
+                               goto bad_nr_segments;
+                       for (i = 0; i < req->nr_segments; i++)
+                               xbdi->xbdi_seg[i] = reqn->seg[i];
+                       xbdi->xbdi_in_gntref = 0;
+               }
                break;
        case XBDIP_32:
                req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
                    xbdi->xbdi_ring.ring_n.req_cons);
-               for (i = 0; i < req->nr_segments; i++)
-                       req->seg[i] = req32->seg[i];
+               req->handle = req32->handle;
+               req->sector_number = req32->sector_number;
+               if (req32->operation == BLKIF_OP_INDIRECT) {
+                       rin32 = (blkif_x86_32_request_indirect_t *)req32;
+                       req->operation = rin32->indirect_op;
+                       req->nr_segments = (uint8_t)rin32->nr_segments;
+                       if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
+                               goto bad_nr_segments;
+                       xbdi->xbdi_in_gntref = rin32->indirect_grefs[0];
+                       /* first_sect and segment grefs fetched later */
+               } else {
+                       req->nr_segments = req32->nr_segments;
+                       if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
+                               goto bad_nr_segments;
+                       for (i = 0; i < req->nr_segments; i++)
+                               xbdi->xbdi_seg[i] = req32->seg[i];
+                       xbdi->xbdi_in_gntref = 0;
+               }
                break;
        case XBDIP_64:
                req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
                    xbdi->xbdi_ring.ring_n.req_cons);
-               for (i = 0; i < req->nr_segments; i++)
-                       req->seg[i] = req64->seg[i];
+               req->handle = req64->handle;
+               req->sector_number = req64->sector_number;
+               if (req64->operation == BLKIF_OP_INDIRECT) {
+                       rin64 = (blkif_x86_64_request_indirect_t *)req64;
+                       req->nr_segments = (uint8_t)rin64->nr_segments;
+                       if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
+                               goto bad_nr_segments;
+                       xbdi->xbdi_in_gntref = rin64->indirect_grefs[0];
+                       /* first_sect and segment grefs fetched later */
+               } else {
+                       req->nr_segments = req64->nr_segments;
+                       if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)



Home | Main Index | Thread Index | Old Index