tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

wapbl_flush() speedup



The attached diff tries to coalesce writes to the journal in MAXPHYS
sized and aligned blocks.

Background is Edgar Fuß (see some long threads on tech-kern) seeing
very bad I/O speed for a wapbl-enabled file system on a raid5
when wapbl_flush() has to emit a long journal.

On a little test raid5 I get a speed increase of factor 3-4.

Comments or objections anyone?

--
J. Hannken-Illjes - hannken%eis.cs.tu-bs.de@localhost - TU Braunschweig 
(Germany)
Index: vfs_wapbl.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_wapbl.c,v
retrieving revision 1.53
diff -p -u -2 -r1.53 vfs_wapbl.c
--- vfs_wapbl.c 17 Nov 2012 10:10:17 -0000      1.53
+++ vfs_wapbl.c 30 Nov 2012 13:39:23 -0000
@@ -185,4 +185,8 @@ struct wapbl {
        SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
                                                   accounting */
+
+       u_char *wl_buffer;      /* l:   buffer for wapbl_buffered_write() */
+       daddr_t wl_buffer_addr; /* l:   buffer base address */
+       size_t wl_buffer_len;   /* l:   buffer current use */
 };
 
@@ -490,4 +494,7 @@ wapbl_start(struct wapbl ** wlp, struct 
            wl->wl_dealloclim);
 
+       wl->wl_buffer = wapbl_alloc(MAXPHYS);
+       wl->wl_buffer_len = 0;
+
        wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
 
@@ -538,4 +545,5 @@ wapbl_start(struct wapbl ** wlp, struct 
        wapbl_free(wl->wl_dealloclens,
            sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+       wapbl_free(wl->wl_buffer, MAXPHYS);
        wapbl_inodetrk_free(wl);
        wapbl_free(wl, sizeof(*wl));
@@ -717,4 +725,5 @@ wapbl_stop(struct wapbl *wl, int force)
        wapbl_free(wl->wl_dealloclens,
            sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+       wapbl_free(wl->wl_buffer, MAXPHYS);
        wapbl_inodetrk_free(wl);
 
@@ -792,4 +801,70 @@ wapbl_read(void *data, size_t len, struc
 
 /*
+ * Flush buffered data if any.
+ */
+static int
+wapbl_buffered_flush(struct wapbl *wl)
+{
+       int error;
+
+       if (wl->wl_buffer_len == 0)
+               return 0;
+
+       error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_len,
+           wl->wl_devvp, wl->wl_buffer_addr, B_WRITE);
+       wl->wl_buffer_len = 0;
+
+       return error;
+}
+
+/*
+ * Write data to the log.
+ * Try to coalesce writes and emit MAXPHYS aligned blocks.
+ */
+static int
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+{
+       int error;
+       size_t resid;
+
+       /*
+        * If not adjacent to buffered dat flush first.
+        */
+       if (wl->wl_buffer_len > 0 &&
+           pbn != wl->wl_buffer_addr + btodb(wl->wl_buffer_len)) {
+               error = wapbl_buffered_flush(wl);
+               if (error)
+                       return error;
+       }
+       if (wl->wl_buffer_len == 0)
+               wl->wl_buffer_addr = pbn;
+       /*
+        * Remaining space so this buffer ends on a MAXPHYS boundary.
+        */
+       resid = MAXPHYS - dbtob(wl->wl_buffer_addr % btodb(MAXPHYS)) -
+           wl->wl_buffer_len;
+       KASSERT(resid > 0);
+       KASSERT(dbtob(btodb(resid)) == resid);
+       if (len >= resid) {
+               memcpy(wl->wl_buffer + wl->wl_buffer_len, data, resid);
+               wl->wl_buffer_len += resid;
+               error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_len,
+                   wl->wl_devvp, wl->wl_buffer_addr, B_WRITE);
+               data = (uint8_t *)data + resid;
+               len -= resid;
+               wl->wl_buffer_addr = pbn + btodb(resid);
+               wl->wl_buffer_len = 0;
+               if (error)
+                       return error;
+       }
+       if (len > 0) {
+               memcpy(wl->wl_buffer + wl->wl_buffer_len, data, len);
+               wl->wl_buffer_len += len;
+       }
+
+       return 0;
+}
+
+/*
  * Off is byte offset returns new offset for next write
  * handles log wraparound
@@ -814,5 +889,5 @@ wapbl_circ_write(struct wapbl *wl, void 
                pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-               error = wapbl_write(data, slen, wl->wl_devvp, pbn);
+               error = wapbl_buffered_write(data, slen, wl, pbn);
                if (error)
                        return error;
@@ -825,5 +900,5 @@ wapbl_circ_write(struct wapbl *wl, void 
        pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-       error = wapbl_write(data, len, wl->wl_devvp, pbn);
+       error = wapbl_buffered_write(data, len, wl, pbn);
        if (error)
                return error;
@@ -1968,4 +2043,7 @@ wapbl_write_commit(struct wapbl *wl, off
        daddr_t pbn;
 
+       error = wapbl_buffered_flush(wl);
+       if (error)
+               return error;
        /*
         * flush disk cache to ensure that blocks we've written are actually
@@ -1999,5 +2077,8 @@ wapbl_write_commit(struct wapbl *wl, off
        pbn = btodb(pbn << wc->wc_log_dev_bshift);
 #endif
-       error = wapbl_write(wc, wc->wc_len, wl->wl_devvp, pbn);
+       error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+       if (error)
+               return error;
+       error = wapbl_buffered_flush(wl);
        if (error)
                return error;


Home | Main Index | Thread Index | Old Index