tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: wapbl_flush() speedup
On Dec 5, 2012, at 4:48 PM, Edgar Fuß <ef%math.uni-bonn.de@localhost> wrote:
>> The attached diff tries to coalesce writes to the journal in MAXPHYS
>> sized and aligned blocks.
> First, many thanks to hannken@ for ending my monthlong performance nightmare!
>
> Btw., this also explains my tstile lockups: writing the journal could take
> tens of seconds; because the journal was locked, most actions on the fs would
> stall for that time.
>
>> Comments or objections anyone?
> I have a few questions. But I am by far no file system expert. So identifiers
> confusing me may be evident for an expert or it may be just my ignorance that
> some invariants are not obvious to me.
>
>> + daddr_t wl_buffer_addr; /* l: buffer base address */
> With "base address" I was thinking of a memory address, not a disc address.
> Is it common practice to use xxx_addr for daddrs?
>
>> + size_t wl_buffer_len; /* l: buffer current use */
> "len" made me think of allocated length, not used length.
> Again, that use may be common practice.
>
>> + * If not adjacent to buffered dat flush first.
> s/dat/data/
>
>> + pbn != wl->wl_buffer_addr + btodb(wl->wl_buffer_len)) {
> Is it obvious to anyone but me why wl_buffer_addr is initialized at this
> point?
> I'm now sure it is, it just took me some time and a private mail to learn.
>
>> + KASSERT(resid > 0);
> It took me even more time to learn why resid is non-negative here.
> Would the explanation be worth a comment?
> However, I'm still not sure why it can't be zero.
>
>> + error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_len,
>> + wl->wl_devvp, wl->wl_buffer_addr, B_WRITE);
> I suppose it's on purpose not to use wapbl_write() here.
Yes, wapbl_buffered_write is a helper like wapbl_write and there is
no need to go through wapbl_write.
An updated diff (names and comments) is attached.
--
J. Hannken-Illjes - hannken%eis.cs.tu-bs.de@localhost - TU Braunschweig
(Germany)
Index: vfs_wapbl.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_wapbl.c,v
retrieving revision 1.53
diff -p -u -2 -r1.53 vfs_wapbl.c
--- vfs_wapbl.c 17 Nov 2012 10:10:17 -0000 1.53
+++ vfs_wapbl.c 5 Dec 2012 17:08:33 -0000
@@ -185,4 +185,8 @@ struct wapbl {
SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
accounting */
+
+ u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */
+ daddr_t wl_buffer_dblk; /* l: buffer disk block address */
+ size_t wl_buffer_used; /* l: buffer current use */
};
@@ -490,4 +494,7 @@ wapbl_start(struct wapbl ** wlp, struct
wl->wl_dealloclim);
+ wl->wl_buffer = wapbl_alloc(MAXPHYS);
+ wl->wl_buffer_used = 0;
+
wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
@@ -538,4 +545,5 @@ wapbl_start(struct wapbl ** wlp, struct
wapbl_free(wl->wl_dealloclens,
sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+ wapbl_free(wl->wl_buffer, MAXPHYS);
wapbl_inodetrk_free(wl);
wapbl_free(wl, sizeof(*wl));
@@ -717,4 +725,5 @@ wapbl_stop(struct wapbl *wl, int force)
wapbl_free(wl->wl_dealloclens,
sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+ wapbl_free(wl->wl_buffer, MAXPHYS);
wapbl_inodetrk_free(wl);
@@ -792,4 +801,78 @@ wapbl_read(void *data, size_t len, struc
/*
+ * Flush buffered data if any.
+ */
+static int
+wapbl_buffered_flush(struct wapbl *wl)
+{
+ int error;
+
+ if (wl->wl_buffer_used == 0)
+ return 0;
+
+ error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
+ wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
+ wl->wl_buffer_used = 0;
+
+ return error;
+}
+
+/*
+ * Write data to the log.
+ * Try to coalesce writes and emit MAXPHYS aligned blocks.
+ */
+static int
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+{
+ int error;
+ size_t resid;
+
+ /*
+ * If not adjacent to buffered data flush first. Disk block
+ * address is always valid for non-empty buffer.
+ */
+ if (wl->wl_buffer_used > 0 &&
+ pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) {
+ error = wapbl_buffered_flush(wl);
+ if (error)
+ return error;
+ }
+ /*
+ * If this write goes to an empty buffer we have to
+ * save the disk block address first.
+ */
+ if (wl->wl_buffer_used == 0)
+ wl->wl_buffer_dblk = pbn;
+ /*
+ * Remaining space so this buffer ends on a MAXPHYS boundary.
+ *
+ * Cannot become less or equal zero as the buffer would have been
+ * flushed on the last call then.
+ */
+ resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) -
+ wl->wl_buffer_used;
+ KASSERT(resid > 0);
+ KASSERT(dbtob(btodb(resid)) == resid);
+ if (len >= resid) {
+ memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid);
+ wl->wl_buffer_used += resid;
+ error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
+ wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
+ data = (uint8_t *)data + resid;
+ len -= resid;
+ wl->wl_buffer_dblk = pbn + btodb(resid);
+ wl->wl_buffer_used = 0;
+ if (error)
+ return error;
+ }
+ if (len > 0) {
+ memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len);
+ wl->wl_buffer_used += len;
+ }
+
+ return 0;
+}
+
+/*
* Off is byte offset returns new offset for next write
* handles log wraparound
@@ -814,5 +897,5 @@ wapbl_circ_write(struct wapbl *wl, void
pbn = btodb(pbn << wl->wl_log_dev_bshift);
#endif
- error = wapbl_write(data, slen, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(data, slen, wl, pbn);
if (error)
return error;
@@ -825,5 +908,5 @@ wapbl_circ_write(struct wapbl *wl, void
pbn = btodb(pbn << wl->wl_log_dev_bshift);
#endif
- error = wapbl_write(data, len, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(data, len, wl, pbn);
if (error)
return error;
@@ -1968,4 +2051,7 @@ wapbl_write_commit(struct wapbl *wl, off
daddr_t pbn;
+ error = wapbl_buffered_flush(wl);
+ if (error)
+ return error;
/*
* flush disk cache to ensure that blocks we've written are actually
@@ -1999,5 +2085,8 @@ wapbl_write_commit(struct wapbl *wl, off
pbn = btodb(pbn << wc->wc_log_dev_bshift);
#endif
- error = wapbl_write(wc, wc->wc_len, wl->wl_devvp, pbn);
+ error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+ if (error)
+ return error;
+ error = wapbl_buffered_flush(wl);
if (error)
return error;
Home |
Main Index |
Thread Index |
Old Index