Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/kern according to benchmark extracting pkgsrc.tar, using...



details:   https://anonhg.NetBSD.org/src/rev/70bb9a1dd9d8
branches:  trunk
changeset: 357908:70bb9a1dd9d8
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Sat Dec 02 17:29:55 2017 +0000

description:
according to benchmark extracting pkgsrc.tar, using FUA and hence waiting
for each transfer to write through to the medium is way slower than just
letting the drive use a cached write and doing DIOCCACHESYNC on the end

Results were (fs block 32KB / frag 4KB, partition aligned on 32KB boundary):
HDD at siisata(4):  no-FUA: 108 sec w/FUA: 294 sec
SSD at ahcisata(4): no-FUA:  73 sec w/FUA: 502 sec

change the flag so that FUA is only used for the commit block write;
for journal data write, only pass DPO, rely on the cache flush to get them
to media

diffstat:

 sys/kern/vfs_wapbl.c |  51 +++++++++++++++++++++++++++------------------------
 1 files changed, 27 insertions(+), 24 deletions(-)

diffs (161 lines):

diff -r 8b3c39ab2473 -r 70bb9a1dd9d8 sys/kern/vfs_wapbl.c
--- a/sys/kern/vfs_wapbl.c      Sat Dec 02 15:36:24 2017 +0000
+++ b/sys/kern/vfs_wapbl.c      Sat Dec 02 17:29:55 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vfs_wapbl.c,v 1.100 2017/10/27 12:25:15 joerg Exp $    */
+/*     $NetBSD: vfs_wapbl.c,v 1.101 2017/12/02 17:29:55 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
 #define WAPBL_INTERNAL
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.100 2017/10/27 12:25:15 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.101 2017/12/02 17:29:55 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -239,10 +239,9 @@
                (wapbl_allow_dpofua && ISSET((wl)->wl_dkcache, DKCACHE_FUA))
 #define WAPBL_JFLAGS(wl)       \
                (WAPBL_USE_FUA(wl) ? (wl)->wl_jwrite_flags : 0)
-#define WAPBL_MFLAGS(wl)       \
-               (WAPBL_USE_FUA(wl) ? (wl)->wl_mwrite_flags : 0)
+#define WAPBL_JDATA_FLAGS(wl)  \
+               (WAPBL_JFLAGS(wl) & B_MEDIA_DPO)        /* only DPO */
        int wl_jwrite_flags;    /* r:   journal write flags */
-       int wl_mwrite_flags;    /* r:   metadata write flags */
 };
 
 #ifdef WAPBL_DEBUG_PRINT
@@ -444,10 +443,8 @@
        }
 
        /* Use FUA instead of cache flush if available */
-       if (ISSET(wl->wl_dkcache, DKCACHE_FUA)) {
+       if (ISSET(wl->wl_dkcache, DKCACHE_FUA))
                wl->wl_jwrite_flags |= B_MEDIA_FUA;
-               wl->wl_mwrite_flags |= B_MEDIA_FUA;
-       }
 
        /* Use DPO for journal writes if available */
        if (ISSET(wl->wl_dkcache, DKCACHE_DPO))
@@ -998,7 +995,7 @@
        KASSERT(TAILQ_FIRST(&wl->wl_iobufs) == bp);
        TAILQ_REMOVE(&wl->wl_iobufs, bp, b_wapbllist);
 
-       bp->b_flags = B_WRITE | WAPBL_JFLAGS(wl);
+       bp->b_flags |= B_WRITE;
        bp->b_cflags = BC_BUSY; /* mandatory, asserted by biowait() */
        bp->b_oflags = 0;
        bp->b_bcount = bp->b_resid;
@@ -1043,7 +1040,7 @@
                error = biowait(bp);
 
                /* reset for reuse */
-               bp->b_blkno = bp->b_resid = 0;
+               bp->b_blkno = bp->b_resid = bp->b_flags = 0;
                TAILQ_INSERT_TAIL(&wl->wl_iobufs, bp, b_wapbllist);
                found = true;
 
@@ -1067,7 +1064,8 @@
  *     wapbl_buffered_flush.
  */
 static int
-wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn,
+    int bflags)
 {
        size_t resid;
        struct buf *bp;
@@ -1096,8 +1094,10 @@
         * If this write goes to an empty buffer we have to
         * save the disk block address first.
         */
-       if (bp->b_blkno == 0)
+       if (bp->b_blkno == 0) {
                bp->b_blkno = pbn;
+               bp->b_flags |= bflags;
+       }
 
        /*
         * Remaining space so this buffer ends on a buffer size boundary.
@@ -1164,7 +1164,8 @@
 #ifdef _KERNEL
                pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-               error = wapbl_buffered_write(data, slen, wl, pbn);
+               error = wapbl_buffered_write(data, slen, wl, pbn,
+                   WAPBL_JDATA_FLAGS(wl));
                if (error)
                        return error;
                data = (uint8_t *)data + slen;
@@ -1175,7 +1176,8 @@
 #ifdef _KERNEL
        pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-       error = wapbl_buffered_write(data, len, wl, pbn);
+       error = wapbl_buffered_write(data, len, wl, pbn,
+           WAPBL_JDATA_FLAGS(wl));
        if (error)
                return error;
        off += len;
@@ -1925,9 +1927,6 @@
                bp->b_iodone = wapbl_biodone;
                bp->b_private = we;
 
-               /* make sure the block is saved sync when FUA in use */
-               bp->b_flags |= WAPBL_MFLAGS(wl);
-
                bremfree(bp);
                wapbl_remove_buf_locked(wl, bp);
                mutex_exit(&wl->wl_mtx);
@@ -2399,8 +2398,8 @@
        int force = 1;
        int error;
 
-       /* Skip full cache sync if disabled, or when using FUA */
-       if (!wapbl_flush_disk_cache || WAPBL_USE_FUA(wl)) {
+       /* Skip full cache sync if disabled */
+       if (!wapbl_flush_disk_cache) {
                return 0;
        }
        if (verbose) {
@@ -2459,8 +2458,10 @@
        if (error)
                return error;
        /*
-        * flush disk cache to ensure that blocks we've written are actually
+        * Flush disk cache to ensure that blocks we've written are actually
         * written to the stable storage before the commit header.
+        * This flushes to disk not only journal blocks, but also all
+        * metadata blocks, written asynchronously since previous commit.
         *
         * XXX Calc checksum here, instead we do this for now
         */
@@ -2489,7 +2490,7 @@
 #ifdef _KERNEL
        pbn = btodb(pbn << wc->wc_log_dev_bshift);
 #endif
-       error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+       error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn, WAPBL_JFLAGS(wl));
        if (error)
                return error;
        error = wapbl_buffered_flush(wl, true);
@@ -2497,10 +2498,12 @@
                return error;
 
        /*
-        * flush disk cache to ensure that the commit header is actually
-        * written before meta data blocks.
+        * Flush disk cache to ensure that the commit header is actually
+        * written before meta data blocks. Commit block is written using
+        * FUA when enabled, in that case this flush is not needed.
         */
-       wapbl_cache_sync(wl, "2");
+       if (!WAPBL_USE_FUA(wl))
+               wapbl_cache_sync(wl, "2");
 
        /*
         * If the generation number was zero, write it out a second time.



Home | Main Index | Thread Index | Old Index