tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
wapbl_flush() speedup
The attached diff tries to coalesce writes to the journal in MAXPHYS
sized and aligned blocks.
Background is Edgar Fuß (see some long threads on tech-kern) seeing
very bad I/O speed for a wapbl-enabled file system on a raid5
when wapbl_flush() has to emit a long journal.
On a little test raid5 I get a speed increase of factor 3-4.
Comments or objections anyone?
--
J. Hannken-Illjes - hannken%eis.cs.tu-bs.de@localhost - TU Braunschweig
(Germany)
Index: vfs_wapbl.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_wapbl.c,v
retrieving revision 1.53
diff -p -u -2 -r1.53 vfs_wapbl.c
--- vfs_wapbl.c 17 Nov 2012 10:10:17 -0000 1.53
+++ vfs_wapbl.c 30 Nov 2012 13:39:23 -0000
@@ -185,4 +185,8 @@ struct wapbl {
SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
accounting */
+
+ u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */
+ daddr_t wl_buffer_addr; /* l: buffer base address */
+ size_t wl_buffer_len; /* l: buffer current use */
};
@@ -490,4 +494,7 @@ wapbl_start(struct wapbl ** wlp, struct
wl->wl_dealloclim);
+ wl->wl_buffer = wapbl_alloc(MAXPHYS);
+ wl->wl_buffer_len = 0;
+
wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
@@ -538,4 +545,5 @@ wapbl_start(struct wapbl ** wlp, struct
wapbl_free(wl->wl_dealloclens,
sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+ wapbl_free(wl->wl_buffer, MAXPHYS);
wapbl_inodetrk_free(wl);
wapbl_free(wl, sizeof(*wl));
@@ -717,4 +725,5 @@ wapbl_stop(struct wapbl *wl, int force)
wapbl_free(wl->wl_dealloclens,
sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+ wapbl_free(wl->wl_buffer, MAXPHYS);
wapbl_inodetrk_free(wl);
@@ -792,4 +801,70 @@ wapbl_read(void *data, size_t len, struc
/*
+ * Flush buffered data if any.
+ */
+static int
+wapbl_buffered_flush(struct wapbl *wl)
+{
+ int error;
+
+ if (wl->wl_buffer_len == 0)
+ return 0;
+
+ error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_len,
+ wl->wl_devvp, wl->wl_buffer_addr, B_WRITE);
+ wl->wl_buffer_len = 0;
+
+ return error;
+}
+
+/*
+ * Write data to the log.
+ * Try to coalesce writes and emit MAXPHYS aligned blocks.
+ */
+static int
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+{
+ int error;
+ size_t resid;
+
+ /*
+ * If not adjacent to buffered dat flush first.
+ */
+ if (wl->wl_buffer_len > 0 &&
+ pbn != wl->wl_buffer_addr + btodb(wl->wl_buffer_len)) {
+ error = wapbl_buffered_flush(wl);
+ if (error)
+ return error;
+ }
+ if (wl->wl_buffer_len == 0)
+ wl->wl_buffer_addr = pbn;
+ /*
+ * Remaining space so this buffer ends on a MAXPHYS boundary.
+ */
+ resid = MAXPHYS - dbtob(wl->wl_buffer_addr % btodb(MAXPHYS)) -
+ wl->wl_buffer_len;
+ KASSERT(resid > 0);
+ KASSERT(dbtob(btodb(resid)) == resid);
+ if (len >= resid) {
+ memcpy(wl->wl_buffer + wl->wl_buffer_len, data, resid);
+ wl->wl_buffer_len += resid;
+ error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_len,
+ wl->wl_devvp, wl->wl_buffer_addr, B_WRITE);
+ data = (uint8_t *)data + resid;
+ len -= resid;
+ wl->wl_buffer_addr = pbn + btodb(resid);
+ wl->wl_buffer_len = 0;
+ if (error)
+ return error;
+ }
+ if (len > 0) {
+ memcpy(wl->wl_buffer + wl->wl_buffer_len, data, len);
+ wl->wl_buffer_len += len;
+ }
+
+ return 0;
+}
+
+/*
* Off is byte offset returns new offset for next write
* handles log wraparound
@@ -814,5 +889,5 @@ wapbl_circ_write(struct wapbl *wl, void
pbn = btodb(pbn << wl->wl_log_dev_bshift);
#endif
- error = wapbl_write(data, slen, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(data, slen, wl, pbn);
if (error)
return error;
@@ -825,5 +900,5 @@ wapbl_circ_write(struct wapbl *wl, void
pbn = btodb(pbn << wl->wl_log_dev_bshift);
#endif
- error = wapbl_write(data, len, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(data, len, wl, pbn);
if (error)
return error;
@@ -1968,4 +2043,7 @@ wapbl_write_commit(struct wapbl *wl, off
daddr_t pbn;
+ error = wapbl_buffered_flush(wl);
+ if (error)
+ return error;
/*
* flush disk cache to ensure that blocks we've written are actually
@@ -1999,5 +2077,8 @@ wapbl_write_commit(struct wapbl *wl, off
pbn = btodb(pbn << wc->wc_log_dev_bshift);
#endif
- error = wapbl_write(wc, wc->wc_len, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+ if (error)
+ return error;
+ error = wapbl_buffered_flush(wl);
if (error)
return error;
Home |
Main Index |
Thread Index |
Old Index