Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/ic Fix a performance issue where one busy queue can ...



details:   https://anonhg.NetBSD.org/src/rev/617bc709100a
branches:  trunk
changeset: 452336:617bc709100a
user:      jmcneill <jmcneill%NetBSD.org@localhost>
date:      Fri Jun 28 15:08:47 2019 +0000

description:
Fix a performance issue where one busy queue can starve all other queues.

In normal operations with multiple queues, the nvme driver will attempt
to schedule I/O requests on the submitting CPU. This breaks down when any
one of the queues becomes full; the driver returns EAGAIN to the disk
layer, which causes the disk layer to stop submitting more requests until
the blocked request is consumed. When space becomes available in the full
queue, it pulls the next buffer from the bufq and fills the queue again,
until finally hitting EAGAIN and preventing other queues from processing
requests.

Two changes here to fix the problem:

 - When processing requests from the bufq, attempt to assign them to the
   queue associated with the CPU that originated the request.
 - If that queue is busy, try to find another queue with available space
   before returning EAGAIN. This way, only when all queues are full will
   the disk layer stop submitting more requests.

Now for some real numbers. On a Rockchip RK3399 board (6 CPUs), with 6
concurrent readers:

Old code:
        4294967296 bytes transferred in 52.420 secs (81933752 bytes/sec)
        4294967296 bytes transferred in 53.969 secs (79582117 bytes/sec)
        4294967296 bytes transferred in 55.391 secs (77539082 bytes/sec)
        4294967296 bytes transferred in 55.649 secs (77179595 bytes/sec)
        4294967296 bytes transferred in 56.102 secs (76556402 bytes/sec)
        4294967296 bytes transferred in 72.901 secs (58915066 bytes/sec)

New code:
        4294967296 bytes transferred in 37.171 secs (115546186 bytes/sec)
        4294967296 bytes transferred in 37.611 secs (114194445 bytes/sec)
        4294967296 bytes transferred in 37.655 secs (114061009 bytes/sec)
        4294967296 bytes transferred in 38.247 secs (112295534 bytes/sec)
        4294967296 bytes transferred in 38.496 secs (111569183 bytes/sec)
        4294967296 bytes transferred in 38.595 secs (111282997 bytes/sec)

diffstat:

 sys/dev/ic/nvme.c    |  13 ++++++-------
 sys/dev/ic/nvmevar.h |  18 +++++++++++++++---
 2 files changed, 21 insertions(+), 10 deletions(-)

diffs (95 lines):

diff -r c9df81eaaf51 -r 617bc709100a sys/dev/ic/nvme.c
--- a/sys/dev/ic/nvme.c Fri Jun 28 14:56:45 2019 +0000
+++ b/sys/dev/ic/nvme.c Fri Jun 28 15:08:47 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvme.c,v 1.43 2019/06/14 04:48:34 mrg Exp $    */
+/*     $NetBSD: nvme.c,v 1.44 2019/06/28 15:08:47 jmcneill Exp $       */
 /*     $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
 
 /*
@@ -18,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.43 2019/06/14 04:48:34 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.44 2019/06/28 15:08:47 jmcneill Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -645,7 +645,7 @@
     struct buf *bp, void *data, size_t datasize,
     int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done)
 {
-       struct nvme_queue *q = nvme_get_q(sc);
+       struct nvme_queue *q = nvme_get_q(sc, bp, false);
        struct nvme_ccb *ccb;
        bus_dmamap_t dmap;
        int i, error;
@@ -787,7 +787,7 @@
 int
 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags)
 {
-       struct nvme_queue *q = nvme_get_q(sc);
+       struct nvme_queue *q = nvme_get_q(sc, NULL, true);
        struct nvme_ccb *ccb;
        int result = 0;
 
@@ -797,8 +797,7 @@
        }
 
        ccb = nvme_ccb_get(q, true);
-       if (ccb == NULL)
-               return EAGAIN;
+       KASSERT(ccb != NULL);
 
        ccb->ccb_done = nvme_ns_sync_done;
        ccb->ccb_cookie = &result;
@@ -1155,7 +1154,7 @@
            (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts)))
                return EINVAL;
 
-       q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc);
+       q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc, NULL, true);
        ccb = nvme_ccb_get(q, true);
        KASSERT(ccb != NULL);
 
diff -r c9df81eaaf51 -r 617bc709100a sys/dev/ic/nvmevar.h
--- a/sys/dev/ic/nvmevar.h      Fri Jun 28 14:56:45 2019 +0000
+++ b/sys/dev/ic/nvmevar.h      Fri Jun 28 15:08:47 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvmevar.h,v 1.19 2019/04/24 23:39:23 mlelstv Exp $     */
+/*     $NetBSD: nvmevar.h,v 1.20 2019/06/28 15:08:47 jmcneill Exp $    */
 /*     $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
 
 /*
@@ -23,6 +23,7 @@
 #include <sys/mutex.h>
 #include <sys/pool.h>
 #include <sys/queue.h>
+#include <sys/buf.h>
 
 struct nvme_dmamem {
        bus_dmamap_t            ndm_map;
@@ -167,9 +168,20 @@
 void   nvme_softintr_msi(void *);
 
 static __inline struct nvme_queue *
-nvme_get_q(struct nvme_softc *sc)
+nvme_get_q(struct nvme_softc *sc, struct buf *bp, bool waitok)
 {
-       return sc->sc_q[cpu_index(curcpu()) % sc->sc_nq];
+       struct cpu_info *ci = (bp && bp->b_ci) ? bp->b_ci : curcpu();
+
+       /*
+        * Find a queue with available ccbs, preferring the originating CPU's queue.
+        */
+
+       for (u_int qoff = 0; qoff < sc->sc_nq; qoff++) {
+               struct nvme_queue *q = sc->sc_q[(cpu_index(ci) + qoff) % sc->sc_nq];
+               if (!SIMPLEQ_EMPTY(&q->q_ccb_list) || waitok)
+                       return q;
+       }
+       return NULL;
 }
 
 /*



Home | Main Index | Thread Index | Old Index