Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/uvm change code to take advantage of direct...



details:   https://anonhg.NetBSD.org/src/rev/79bf7a6f6375
branches:  trunk
changeset: 319170:79bf7a6f6375
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Sat May 19 15:13:26 2018 +0000

description:
change code to take advantage of direct map when available, avoiding the need
to map pages into kernel

this improves performance of UBC-based (read(2)/write(2)) I/O especially
for cached block I/O - sequential read on my NVMe goes from 1.7 GB/s to 1.9 GB/s
for non-cached, and from 2.2 GB/s to 5.6 GB/s for cached read

the new code is conditional now and off for now, so that it can be tested further;
can be turned on by adjusting ubc_direct variable to true

part of fix for PR kern/53124

diffstat:

 sys/uvm/uvm_bio.c |  241 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 223 insertions(+), 18 deletions(-)

diffs (truncated from 340 to 300 lines):

diff -r 6f5e4dcfe2b8 -r 79bf7a6f6375 sys/uvm/uvm_bio.c
--- a/sys/uvm/uvm_bio.c Sat May 19 15:03:26 2018 +0000
+++ b/sys/uvm/uvm_bio.c Sat May 19 15:13:26 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $    */
+/*     $NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $    */
 
 /*
  * Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_ubc.h"
@@ -48,9 +48,9 @@
 
 #include <uvm/uvm.h>
 
-/*
- * global data structures
- */
+#ifdef PMAP_DIRECT
+#  define UBC_USE_PMAP_DIRECT
+#endif
 
 /*
  * local functions
@@ -59,6 +59,13 @@
 static int     ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
                          int, int, vm_prot_t, int);
 static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
+#ifdef UBC_USE_PMAP_DIRECT
+static int __noinline ubc_uiomove_direct(struct uvm_object *, struct uio *, vsize_t,
+                         int, int);
+static void __noinline ubc_zerorange_direct(struct uvm_object *, off_t, size_t, int);
+
+bool ubc_direct = false; /* XXX */
+#endif
 
 /*
  * local data structues
@@ -149,15 +156,12 @@
 void
 ubc_init(void)
 {
-       struct ubc_map *umap;
-       vaddr_t va;
-       int i;
-
        /*
         * Make sure ubc_winshift is sane.
         */
        if (ubc_winshift < PAGE_SHIFT)
                ubc_winshift = PAGE_SHIFT;
+       ubc_winsize = 1 << ubc_winshift;
 
        /*
         * init ubc_object.
@@ -174,10 +178,7 @@
        if (ubc_object.umap == NULL)
                panic("ubc_init: failed to allocate ubc_map");
 
-       if (ubc_winshift < PAGE_SHIFT) {
-               ubc_winshift = PAGE_SHIFT;
-       }
-       va = (vaddr_t)1L;
+       vaddr_t va = (vaddr_t)1L;
 #ifdef PMAP_PREFER
        PMAP_PREFER(0, &va, 0, 0);      /* kernel is never topdown */
        ubc_nqueues = va >> ubc_winshift;
@@ -185,13 +186,13 @@
                ubc_nqueues = 1;
        }
 #endif
-       ubc_winsize = 1 << ubc_winshift;
        ubc_object.inactive = kmem_alloc(UBC_NQUEUES *
            sizeof(struct ubc_inactive_head), KM_SLEEP);
-       for (i = 0; i < UBC_NQUEUES; i++) {
+       for (int i = 0; i < UBC_NQUEUES; i++) {
                TAILQ_INIT(&ubc_object.inactive[i]);
        }
-       for (i = 0; i < ubc_nwins; i++) {
+       for (int i = 0; i < ubc_nwins; i++) {
+               struct ubc_map *umap;
                umap = &ubc_object.umap[i];
                TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
                                  umap, inactive);
@@ -199,7 +200,7 @@
 
        ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true,
            &ubc_object.hashmask);
-       for (i = 0; i <= ubc_object.hashmask; i++) {
+       for (int i = 0; i <= ubc_object.hashmask; i++) {
                LIST_INIT(&ubc_object.hash[i]);
        }
 
@@ -562,6 +563,7 @@
            (uintptr_t)umap, umap->refcount, (uintptr_t)va, flags);
 
        if (flags & UBC_FAULTBUSY) {
+               // XXX add offset from slot_offset?
                int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
                struct vm_page *pgs[npages];
                int gpflags =
@@ -732,6 +734,12 @@
        KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
            ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
 
+#ifdef UBC_USE_PMAP_DIRECT
+       if (ubc_direct) {
+               return ubc_uiomove_direct(uobj, uio, todo, advice, flags);
+       }
+#endif
+
        off = uio->uio_offset;
        error = 0;
        while (todo > 0) {
@@ -769,13 +777,20 @@
 void
 ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
 {
-       void *win;
+
+#ifdef UBC_USE_PMAP_DIRECT
+       if (ubc_direct) {
+               ubc_zerorange_direct(uobj, off, len, flags);
+               return;
+       }
+#endif
 
        /*
         * XXXUBC invent kzero() and use it
         */
 
        while (len) {
+               void *win;
                vsize_t bytelen = len;
 
                win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE);
@@ -787,6 +802,196 @@
        }
 }
 
+#ifdef UBC_USE_PMAP_DIRECT
+/* Copy data using direct map */
+
+/*
+ * ubc_alloc_direct:  allocate a file mapping window using direct map
+ */
+static int __noinline
+ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp,
+    int advice, int flags, struct vm_page **pgs, int *npages)
+{
+       voff_t pgoff;
+       int error;
+       int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO | PGO_ALLPAGES;
+       int access_type = VM_PROT_READ;
+
+       if (flags & UBC_WRITE) {
+               if (flags & UBC_FAULTBUSY)
+                       gpflags |= PGO_OVERWRITE;
+#if 0
+               KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj));
+#endif
+
+               gpflags |= PGO_PASTEOF;
+               access_type |= VM_PROT_WRITE;
+       }
+
+       pgoff = (offset & PAGE_MASK);
+       *lenp = MIN(*lenp, ubc_winsize - pgoff);
+
+again:
+       *npages = (*lenp + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       KASSERT((*npages * PAGE_SIZE) <= ubc_winsize);
+       KASSERT(*lenp + pgoff <= ubc_winsize);
+       memset(pgs, 0, *npages * sizeof(pgs[0]));
+
+       mutex_enter(uobj->vmobjlock);
+       error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
+           npages, 0, access_type, advice, gpflags);
+       UVMHIST_LOG(ubchist, "alloc_direct getpages %jd", error, 0, 0, 0);
+       if (error) {
+               if (error == EAGAIN) {
+                       kpause("ubc_alloc_directg", false, hz >> 2, NULL);
+                       goto again;
+               }
+               return error;
+       }
+
+       mutex_enter(uobj->vmobjlock);
+       for (int i = 0; i < *npages; i++) {
+               struct vm_page *pg = pgs[i];
+
+               KASSERT(pg != NULL);
+               KASSERT(pg != PGO_DONTCARE);
+               KASSERT((pg->flags & PG_FAKE) == 0 || (gpflags & PGO_OVERWRITE));
+               KASSERT(pg->uobject->vmobjlock == uobj->vmobjlock);
+
+               /* Avoid breaking loan if possible, only do it on write */
+               if ((flags & UBC_WRITE) && pg->loan_count != 0) {
+                       pg = uvm_loanbreak(pg);
+                       if (pg == NULL) {
+                               uvm_page_unbusy(pgs, *npages);
+                               mutex_exit(uobj->vmobjlock);
+                               uvm_wait("ubc_alloc_directl");
+                               goto again;
+                       }
+                       pgs[i] = pg;
+               }
+
+               /* Page must be writable by now */
+               KASSERT((pg->flags & PG_RDONLY) == 0 || (flags & UBC_WRITE) == 0);
+
+               mutex_enter(&uvm_pageqlock);
+               uvm_pageactivate(pg);
+               mutex_exit(&uvm_pageqlock);
+
+               /* Page will be changed, no longer clean */
+               /* XXX do this AFTER the write? */
+               if (flags & UBC_WRITE)
+                       pg->flags &= ~(PG_FAKE|PG_CLEAN);
+       }
+       mutex_exit(uobj->vmobjlock);
+
+       return 0;
+}
+
+static int
+ubc_uiomove_process(void *win, size_t len, void *arg)
+{
+       struct uio *uio = (struct uio *)arg;
+
+       return uiomove(win, len, uio);
+}
+
+static int
+ubc_zerorange_process(void *win, size_t len, void *arg)
+{
+       memset(win, 0, len);
+       return 0;
+}
+
+static int __noinline
+ubc_uiomove_direct(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
+    int flags)
+{
+       const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
+       voff_t off;
+       int error, npages;
+       struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT];
+
+       KASSERT(todo <= uio->uio_resid);
+       KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
+           ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
+
+       off = uio->uio_offset;
+       error = 0;
+       while (todo > 0) {
+               vsize_t bytelen = todo;
+
+               error = ubc_alloc_direct(uobj, off, &bytelen, advice, flags,
+                   pgs, &npages);
+               if (error != 0) {
+                       /* can't do anything, failed to get the pages */
+                       break;
+               }
+
+               if (error == 0) {
+                       error = uvm_direct_process(pgs, npages, off, bytelen,
+                           ubc_uiomove_process, uio);
+               }
+               if (error != 0 && overwrite) {
+                       /*
+                        * if we haven't initialized the pages yet,
+                        * do it now.  it's safe to use memset here
+                        * because we just mapped the pages above.
+                        */
+                       printf("%s: error=%d\n", __func__, error);
+                       (void) uvm_direct_process(pgs, npages, off, bytelen,
+                           ubc_zerorange_process, NULL);
+               }
+
+               mutex_enter(uobj->vmobjlock);
+               uvm_page_unbusy(pgs, npages);
+               mutex_exit(uobj->vmobjlock);
+
+               off += bytelen;
+               todo -= bytelen;
+
+               if (error != 0 && ISSET(flags, UBC_PARTIALOK)) {
+                       break;
+               }
+       }
+
+       return error;



Home | Main Index | Thread Index | Old Index