Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/rump/librump/rumpvfs rework bio hypercalls, part 2:



details:   https://anonhg.NetBSD.org/src/rev/dbd19be41221
branches:  trunk
changeset: 786466:dbd19be41221
user:      pooka <pooka%NetBSD.org@localhost>
date:      Mon Apr 29 13:07:37 2013 +0000

description:
rework bio hypercalls, part 2:

Nuke all the policy hacks (r/w, mmap, directio) from the paravirtualized
block driver and let the hypervisor decide how it wants to optimize
the I/O.  It can prepare for this based on if a file is opened with
the RUMPUSER_OPEN_BIO flag.

mmap was not faster than r/w except in a niche case (yes, it made a
good measurement), and directio was never on by default since
it was tricky at best to decide on the kernel side of things if
directio will do the right thing.

diffstat:

 sys/rump/librump/rumpvfs/rumpblk.c |  307 +-----------------------------------
 1 files changed, 10 insertions(+), 297 deletions(-)

diffs (truncated from 424 to 300 lines):

diff -r 64b7861686bc -r dbd19be41221 sys/rump/librump/rumpvfs/rumpblk.c
--- a/sys/rump/librump/rumpvfs/rumpblk.c        Mon Apr 29 12:59:55 2013 +0000
+++ b/sys/rump/librump/rumpvfs/rumpblk.c        Mon Apr 29 13:07:37 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rumpblk.c,v 1.49 2013/04/29 12:56:03 pooka Exp $       */
+/*     $NetBSD: rumpblk.c,v 1.50 2013/04/29 13:07:37 pooka Exp $       */
 
 /*
  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
@@ -34,25 +34,10 @@
  *
  * We provide fault injection.  The driver can be made to fail
  * I/O occasionally.
- *
- * The driver also provides an optimization for regular files by
- * using memory-mapped I/O.  This avoids kernel access for every
- * I/O operation.  It also gives finer-grained control of how to
- * flush data.  Additionally, in case the rump kernel dumps core,
- * we get way less carnage.
- *
- * However, it is quite costly in writing large amounts of
- * file data, since old contents cannot merely be overwritten, but
- * must be paged in first before replacing (i.e. r/m/w).  Ideally,
- * we should use directio.  The problem is that directio can fail
- * silently causing improper file system semantics (i.e. unflushed
- * data).  Therefore, default to mmap for now.  Even so, directio
- * _should_ be safe and can be enabled by compiling this module
- * with -DHAS_DIRECTIO.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.49 2013/04/29 12:56:03 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.50 2013/04/29 13:07:37 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -72,68 +57,29 @@
 #include "rump_private.h"
 #include "rump_vfs_private.h"
 
-/*
- * O_DIRECT is the fastest alternative, but since it falls back to
- * non-direct writes silently, I am not sure it will always be 100% safe.
- * Use it and play with it, but do that with caution.
- */
-#if 0
-#define HAS_ODIRECT
-#endif
-
 #if 0
 #define DPRINTF(x) printf x
 #else
 #define DPRINTF(x)
 #endif
 
-/* Default: 16 x 1MB windows */
-unsigned memwinsize = (1<<20);
-unsigned memwincnt = 16;
-
-#define STARTWIN(off)          ((off) & ~((off_t)memwinsize-1))
-#define INWIN(win,off)         ((win)->win_off == STARTWIN(off))
-#define WINSIZE(rblk, win)     (MIN((rblk->rblk_hostsize-win->win_off), \
-                                     memwinsize))
-#define WINVALID(win)          ((win)->win_off != (off_t)-1)
-#define WINVALIDATE(win)       ((win)->win_off = (off_t)-1)
-struct blkwin {
-       off_t win_off;
-       void *win_mem;
-       int win_refcnt;
-
-       TAILQ_ENTRY(blkwin) win_lru;
-};
-
 #define RUMPBLK_SIZE 16
 static struct rblkdev {
        char *rblk_path;
        int rblk_fd;
        int rblk_mode;
-#ifdef HAS_ODIRECT
-       int rblk_dfd;
-#endif
+
        uint64_t rblk_size;
        uint64_t rblk_hostoffset;
        uint64_t rblk_hostsize;
        int rblk_ftype;
 
-       /* for mmap */
-       int rblk_mmflags;
-       kmutex_t rblk_memmtx;
-       kcondvar_t rblk_memcv;
-       TAILQ_HEAD(winlru, blkwin) rblk_lruq;
-       bool rblk_waiting;
-
        struct disklabel rblk_label;
 } minors[RUMPBLK_SIZE];
 
 static struct evcnt ev_io_total;
 static struct evcnt ev_io_async;
 
-static struct evcnt ev_memblk_hits;
-static struct evcnt ev_memblk_busy;
-
 static struct evcnt ev_bwrite_total;
 static struct evcnt ev_bwrite_async;
 static struct evcnt ev_bread_total;
@@ -209,105 +155,6 @@
        lp->d_checksum = 0; /* XXX */
 }
 
-static struct blkwin *
-getwindow(struct rblkdev *rblk, off_t off, int *wsize, int *error)
-{
-       struct blkwin *win;
-
-       mutex_enter(&rblk->rblk_memmtx);
- retry:
-       /* search for window */
-       TAILQ_FOREACH(win, &rblk->rblk_lruq, win_lru) {
-               if (INWIN(win, off) && WINVALID(win))
-                       break;
-       }
-
-       /* found?  return */
-       if (win) {
-               ev_memblk_hits.ev_count++;
-               TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru);
-               goto good;
-       }
-
-       /*
-        * Else, create new window.  If the least recently used is not
-        * currently in use, reuse that.  Otherwise we need to wait.
-        */
-       win = TAILQ_LAST(&rblk->rblk_lruq, winlru);
-       if (win->win_refcnt == 0) {
-               TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru);
-               mutex_exit(&rblk->rblk_memmtx);
-
-               if (WINVALID(win)) {
-                       DPRINTF(("win %p, unmap mem %p, off 0x%" PRIx64 "\n",
-                           win, win->win_mem, win->win_off));
-                       rumpuser_unmap(win->win_mem, WINSIZE(rblk, win));
-                       WINVALIDATE(win);
-               }
-
-               win->win_off = STARTWIN(off);
-               win->win_mem = rumpuser_filemmap(rblk->rblk_fd, win->win_off,
-                   WINSIZE(rblk, win), rblk->rblk_mmflags, error);
-               DPRINTF(("win %p, off 0x%" PRIx64 ", mem %p\n",
-                   win, win->win_off, win->win_mem));
-
-               mutex_enter(&rblk->rblk_memmtx);
-               if (win->win_mem == NULL) {
-                       WINVALIDATE(win);
-                       TAILQ_INSERT_TAIL(&rblk->rblk_lruq, win, win_lru);
-                       mutex_exit(&rblk->rblk_memmtx);
-                       return NULL;
-               }
-       } else {
-               DPRINTF(("memwin wait\n"));
-               ev_memblk_busy.ev_count++;
-
-               rblk->rblk_waiting = true;
-               cv_wait(&rblk->rblk_memcv, &rblk->rblk_memmtx);
-               goto retry;
-       }
-
- good:
-       KASSERT(win);
-       win->win_refcnt++;
-       TAILQ_INSERT_HEAD(&rblk->rblk_lruq, win, win_lru);
-       mutex_exit(&rblk->rblk_memmtx);
-       *wsize = MIN(*wsize, memwinsize - (off-win->win_off));
-       KASSERT(*wsize);
-
-       return win;
-}
-
-static void
-putwindow(struct rblkdev *rblk, struct blkwin *win)
-{
-
-       mutex_enter(&rblk->rblk_memmtx);
-       if (--win->win_refcnt == 0 && rblk->rblk_waiting) {
-               rblk->rblk_waiting = false;
-               cv_broadcast(&rblk->rblk_memcv);
-       }
-       KASSERT(win->win_refcnt >= 0);
-       mutex_exit(&rblk->rblk_memmtx);
-}
-
-static void
-wincleanup(struct rblkdev *rblk)
-{
-       struct blkwin *win;
-
-       while ((win = TAILQ_FIRST(&rblk->rblk_lruq)) != NULL) {
-               TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru);
-               if (WINVALID(win)) {
-                       DPRINTF(("cleanup win %p addr %p\n",
-                           win, win->win_mem));
-                       rumpuser_unmap(win->win_mem, WINSIZE(rblk, win));
-               }
-               kmem_free(win, sizeof(*win));
-       }
-       rblk->rblk_mmflags = 0;
-}
-
 int
 rumpblk_init(void)
 {
@@ -335,24 +182,6 @@
                blkfail = 0;
        }
 
-       if (rumpuser_getenv("RUMP_BLKWINSIZE", buf, sizeof(buf), &error) == 0) {
-               printf("rumpblk: ");
-               tmp = strtoul(buf, NULL, 10);
-               if (tmp && !(tmp & (tmp-1)))
-                       memwinsize = tmp;
-               else
-                       printf("invalid RUMP_BLKWINSIZE %d, ", tmp);
-               printf("using %d for memwinsize\n", memwinsize);
-       }
-       if (rumpuser_getenv("RUMP_BLKWINCOUNT", buf, sizeof(buf), &error) == 0){
-               printf("rumpblk: ");
-               tmp = strtoul(buf, NULL, 10);
-               if (tmp)
-                       memwincnt = tmp;
-               else
-                       printf("invalid RUMP_BLKWINCOUNT %d, ", tmp);
-               printf("using %d for memwincount\n", memwincnt);
-       }
        if (rumpuser_getenv("RUMP_BLKSECTSHIFT", buf, sizeof(buf), &error)==0){
                printf("rumpblk: ");
                tmp = strtoul(buf, NULL, 10);
@@ -367,8 +196,6 @@
 
        memset(minors, 0, sizeof(minors));
        for (i = 0; i < RUMPBLK_SIZE; i++) {
-               mutex_init(&minors[i].rblk_memmtx, MUTEX_DEFAULT, IPL_NONE);
-               cv_init(&minors[i].rblk_memcv, "rblkmcv");
                minors[i].rblk_fd = -1;
        }
 
@@ -384,11 +211,6 @@
        evcnt_attach_dynamic(&ev_bwrite_async, EVCNT_TYPE_MISC, NULL,
            "rumpblk", "bytes written async");
 
-       evcnt_attach_dynamic(&ev_memblk_hits, EVCNT_TYPE_MISC, NULL,
-           "rumpblk", "window hits");
-       evcnt_attach_dynamic(&ev_memblk_busy, EVCNT_TYPE_MISC, NULL,
-           "rumpblk", "all windows busy");
-
        if (blkfail) {
                return devsw_attach("rumpblk",
                    &rumpblk_bdevsw_fail, &rumpblkmaj,
@@ -490,7 +312,6 @@
        rblk = &minors[i];
        backend_close(rblk);
 
-       wincleanup(rblk);
        free(rblk->rblk_path, M_TEMP);
        memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label));
        rblk->rblk_path = NULL;
@@ -512,74 +333,11 @@
                if (error)
                        return error;
                rblk->rblk_mode = FREAD;
-
-#ifdef HAS_ODIRECT
-               rblk->rblk_dfd = rumpuser_open(path,
-                   RUMPUSER_OPEN_RDONLY | RUMPUSER_OPEN_DIRECT, &error);
-               if (error) {
-                       close(fd);
-                       return error;
-               }
-#endif
        } else {
                rblk->rblk_mode = FREAD|FWRITE;
-
-#ifdef HAS_ODIRECT
-               rblk->rblk_dfd = rumpuser_open(path,
-                   RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_DIRECT, &error);
-               if (error) {
-                       close(fd);
-                       return error;
-               }
-#endif
        }
 
-       if (rblk->rblk_ftype == RUMPUSER_FT_REG) {
-               uint64_t fsize= rblk->rblk_hostsize, off= rblk->rblk_hostoffset;
-               struct blkwin *win;
-               int i, winsize;
-
-               /*
-                * Use mmap to access a regular file.  Allocate and
-                * cache initial windows here.  Failure to allocate one



Home | Main Index | Thread Index | Old Index