Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Enhance disk metrics by calculating a weighted sum that is i...



details:   https://anonhg.NetBSD.org/src/rev/4800e3841085
branches:  trunk
changeset: 822170:4800e3841085
user:      mlelstv <mlelstv%NetBSD.org@localhost>
date:      Sun Mar 05 23:07:12 2017 +0000

description:
Enhance disk metrics by calculating a weighted sum that is incremented
by the number of concurrent I/O requests. Also introduce a new disk_wait()
function to measure requests waiting in a bufq.
iostat -y now reports data about waiting and active requests.

So far only drivers using dksubr and dk, ccd, wd and xbd collect data about
waiting requests.

diffstat:

 share/man/man9/disk.9         |   76 ++++++++++++++---------
 sys/arch/xen/xen/xbd_xenbus.c |   10 +-
 sys/dev/ata/wd.c              |    5 +-
 sys/dev/ccd.c                 |    7 +-
 sys/dev/dksubr.c              |   10 ++-
 sys/dev/dkwedge/dk.c          |    5 +-
 sys/kern/subr_disk.c          |   14 +++-
 sys/kern/subr_iostat.c        |  135 ++++++++++++++++++++++++++++++++++-------
 sys/sys/disk.h                |    3 +-
 sys/sys/iostat.h              |   23 ++++++-
 usr.bin/vmstat/drvstats.c     |   72 +++++++++++++++------
 usr.bin/vmstat/drvstats.h     |    7 +-
 usr.sbin/iostat/iostat.8      |   28 ++++++++-
 usr.sbin/iostat/iostat.c      |  112 ++++++++++++++++++++++++++++++++--
 14 files changed, 400 insertions(+), 107 deletions(-)

diffs (truncated from 1050 to 300 lines):

diff -r f96f9b08d6f2 -r 4800e3841085 share/man/man9/disk.9
--- a/share/man/man9/disk.9     Sun Mar 05 22:24:29 2017 +0000
+++ b/share/man/man9/disk.9     Sun Mar 05 23:07:12 2017 +0000
@@ -1,4 +1,4 @@
-.\"    $NetBSD: disk.9,v 1.43 2017/01/23 11:42:03 abhinav Exp $
+.\"    $NetBSD: disk.9,v 1.44 2017/03/05 23:07:12 mlelstv Exp $
 .\"
 .\" Copyright (c) 1995, 1996 Jason R. Thorpe.
 .\" All rights reserved.
@@ -40,6 +40,7 @@
 .Nm disk_begindetach ,
 .Nm disk_detach ,
 .Nm disk_destroy ,
+.Nm disk_wait ,
 .Nm disk_busy ,
 .Nm disk_unbusy ,
 .Nm disk_isbusy ,
@@ -61,6 +62,8 @@
 .Ft void
 .Fn disk_destroy "struct disk *"
 .Ft void
+.Fn disk_wait "struct disk *"
+.Ft void
 .Fn disk_busy "struct disk *"
 .Ft void
 .Fn disk_unbusy "struct disk *" "long bcount" "int read"
@@ -167,19 +170,25 @@
 .It Fn disk_destroy
 Release resources used by the disk structure when it is no longer
 required.
+.It Fn disk_wait
+Disk timings are measured by counting the number of queued
+requests (wait counter) and requests issued to the hardware (busy counter)
+and keeping timestamp when the counters change. The time interval between
+two changes of a counter is accumulated into a total and also multiplied
+by the counter value and the accumulated into a sum. Both values can be
+used to determine how much time is spent in the driver queue or in-flight
+to the hardware as well as the average number of requests in either state.
+.Fn disk_wait
+increment the disk's wait counter and handles the accumulation.
 .It Fn disk_busy
-Increment the disk's
-.Dq busy counter .
-If this counter goes from 0 to 1, set the timestamp corresponding to
-this transfer.
+Decrements the disk's wait counter and increments the disk's
+.Dq busy counter ,
+and handles either accumulation. If the wait counter is still zero, it
+is assumed that the driver hasn't been updated to call
+.Fn disk_wait ,
+then only the values from the busy counter are available.
 .It Fn disk_unbusy
-Decrement a disk's busy counter.
-If the count drops below zero, panic.
-Get the current time, subtract it from the disk's timestamp, and add
-the difference to the disk's running total.
-Set the disk's timestamp to the current time.
-If the provided byte count is greater than 0, add it to the disk's
-running total and increment the number of transfers performed by the disk.
+Decrement the disk's busy counter and handles the accumulation.
 The third argument
 .Ar read
 specifies the direction of I/O;
@@ -212,6 +221,7 @@
 .Fn disk_begindetach ,
 .Fn disk_detach ,
 .Fn disk_destroy ,
+.Fn disk_wait ,
 .Fn disk_busy ,
 .Fn disk_unbusy ,
 and
@@ -403,8 +413,9 @@
 .Pp
 Once the disk is attached, metrics may be gathered on that disk.
 In order to gather metrics data, the driver must tell the framework when
-the disk starts and stops operations.
+the disk queues, starts and stops operations.
 This functionality is provided by the
+.Fn disk_wait ,
 .Fn disk_busy
 and
 .Fn disk_unbusy
@@ -413,6 +424,7 @@
 .Nm struct disk
 is part of device driver private data it needs to be guarded.
 Mutual exclusion must be done by driver
+.Fn disk_wait ,
 .Fn disk_busy
 and
 .Fn disk_unbusy
@@ -423,8 +435,22 @@
 sent, e.g.:
 .Bd -literal
 void
-foostart(sc)
-       struct foo_softc *sc;
+foostrategy(struct buf *bp)
+{
+       [ . . . ]
+
+       mutex_enter(\*[Am]sc-\*[Gt]sc_dk_mtx);
+       disk_wait(\*[Am]sc-\*[Gt]sc_dk);
+
+       /* Put buffer onto drive's transfer queue */
+
+       mutex_exit(\*[Am]sc-\*[Gt]sc_dk_mtx);
+
+       foostart(sc);
+}
+
+void
+foostart(struct foo_softc *sc)
 {
        [ . . . ]
 
@@ -444,26 +470,15 @@
 }
 .Ed
 .Pp
-When
-.Fn disk_busy
-is called, a timestamp is taken if the disk's busy counter moves from
-0 to 1, indicating the disk has gone from an idle to non-idle state.
-At the end of a transaction, the
+The routine
 .Fn disk_unbusy
-routine should be called.
-This routine performs some consistency checks,
-such as ensuring that the calls to
+performs some consistency checks, such as ensuring that the calls to
 .Fn disk_busy
 and
 .Fn disk_unbusy
 are balanced.
-This routine also performs the actual metrics calculation.
-A timestamp is taken and the difference from the timestamp taken in
-.Fn disk_busy
-is added to the disk's total running time.
-The disk's timestamp is then updated in case there is more than one
-pending transfer on the disk.
-A byte count is also added to the disk's running total, and if greater than
+It also performs the final steps of the metrics calcuation.
+A byte count is added to the disk's running total, and if greater than
 zero, the number of transfers the disk has performed is incremented.
 The third argument
 .Ar read
@@ -506,6 +521,7 @@
 is used to get status of disk device it returns true if device is
 currently busy and false if it is not.
 Like
+.Fn disk_wait ,
 .Fn disk_busy
 and
 .Fn disk_unbusy
diff -r f96f9b08d6f2 -r 4800e3841085 sys/arch/xen/xen/xbd_xenbus.c
--- a/sys/arch/xen/xen/xbd_xenbus.c     Sun Mar 05 22:24:29 2017 +0000
+++ b/sys/arch/xen/xen/xbd_xenbus.c     Sun Mar 05 23:07:12 2017 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbd_xenbus.c,v 1.75 2015/10/25 07:51:16 maxv Exp $      */
+/*      $NetBSD: xbd_xenbus.c,v 1.76 2017/03/05 23:07:12 mlelstv Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.75 2015/10/25 07:51:16 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.76 2017/03/05 23:07:12 mlelstv Exp $");
 
 #include "opt_xen.h"
 
@@ -327,7 +327,7 @@
                sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL;
                /* wait for requests to complete */
                while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
-                   sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
+                   disk_isbusy(&sc->sc_dksc.sc_dkdev))
                        tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach", hz/2);
 
                xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
@@ -392,7 +392,7 @@
        s = splbio();
        /* wait for requests to complete, then suspend device */
        while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
-           sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
+           disk_isbusy(&sc->sc_dksc.sc_dkdev))
                tsleep(xbd_xenbus_suspend, PRIBIO, "xbdsuspend", hz/2);
 
        hypervisor_mask_event(sc->sc_evtchn);
@@ -530,7 +530,7 @@
                        sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
                /* wait for requests to complete */
                while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
-                   sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
+                   disk_isbusy(&sc->sc_dksc.sc_dkdev))
                        tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach", hz/2);
                splx(s);
                xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
diff -r f96f9b08d6f2 -r 4800e3841085 sys/dev/ata/wd.c
--- a/sys/dev/ata/wd.c  Sun Mar 05 22:24:29 2017 +0000
+++ b/sys/dev/ata/wd.c  Sun Mar 05 23:07:12 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wd.c,v 1.427 2016/11/20 02:35:19 pgoyette Exp $ */
+/*     $NetBSD: wd.c,v 1.428 2017/03/05 23:07:12 mlelstv Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.427 2016/11/20 02:35:19 pgoyette Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.428 2017/03/05 23:07:12 mlelstv Exp $");
 
 #include "opt_ata.h"
 
@@ -605,6 +605,7 @@
 
        /* Queue transfer on drive, activate drive and controller if idle. */
        s = splbio();
+       disk_wait(&wd->sc_dk);
        bufq_put(wd->sc_q, bp);
        wdstart(wd);
        splx(s);
diff -r f96f9b08d6f2 -r 4800e3841085 sys/dev/ccd.c
--- a/sys/dev/ccd.c     Sun Mar 05 22:24:29 2017 +0000
+++ b/sys/dev/ccd.c     Sun Mar 05 23:07:12 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ccd.c,v 1.168 2016/11/20 02:35:19 pgoyette Exp $       */
+/*     $NetBSD: ccd.c,v 1.169 2017/03/05 23:07:12 mlelstv Exp $        */
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc.
@@ -88,7 +88,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.168 2016/11/20 02:35:19 pgoyette Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.169 2017/03/05 23:07:12 mlelstv Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -815,10 +815,11 @@
 
        KASSERT(mutex_owned(cs->sc_iolock));
 
-       disk_busy(&cs->sc_dkdev);
        bp = bufq_get(cs->sc_bufq);
        KASSERT(bp != NULL);
 
+       disk_busy(&cs->sc_dkdev);
+
 #ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdstart(%s, %p)\n", cs->sc_xname, bp);
diff -r f96f9b08d6f2 -r 4800e3841085 sys/dev/dksubr.c
--- a/sys/dev/dksubr.c  Sun Mar 05 22:24:29 2017 +0000
+++ b/sys/dev/dksubr.c  Sun Mar 05 23:07:12 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: dksubr.c,v 1.95 2017/02/25 15:19:00 mlelstv Exp $ */
+/* $NetBSD: dksubr.c,v 1.96 2017/03/05 23:07:12 mlelstv Exp $ */
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 1999, 2002, 2008 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.95 2017/02/25 15:19:00 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.96 2017/03/05 23:07:12 mlelstv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -339,6 +339,7 @@
         * Queue buffer only
         */
        mutex_enter(&dksc->sc_iolock);
+       disk_wait(&dksc->sc_dkdev);
        bufq_put(dksc->sc_bufq, bp);
        mutex_exit(&dksc->sc_iolock);
 
@@ -375,8 +376,10 @@
 
        mutex_enter(&dksc->sc_iolock);
 
-       if (bp != NULL)
+       if (bp != NULL) {
+               disk_wait(&dksc->sc_dkdev);
                bufq_put(dksc->sc_bufq, bp);
+       }
 
        /*
         * If another thread is running the queue, increment
@@ -417,6 +420,7 @@
                        if (error == EAGAIN) {
                                dksc->sc_deferred = bp;
                                disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
+                               disk_wait(&dksc->sc_dkdev);
                                break;



Home | Main Index | Thread Index | Old Index