Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Add some experimental page-loaning for writes on sockets...



details:   https://anonhg.NetBSD.org/src/rev/3a80fa4d495b
branches:  trunk
changeset: 526358:3a80fa4d495b
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Thu May 02 17:55:48 2002 +0000

description:
Add some experimental page-loaning for writes on sockets.  It is disabled
by default, and can be enabled by adding the SOSEND_LOAN option to your
kernel config.  The SOSEND_COUNTERS option can be used to provide some
instrumentation.

Use of this option, combined with an application that does large enough
writes, gets us zero-copy on the TCP and UDP transmit path.

diffstat:

 sys/conf/files         |    5 +-
 sys/kern/uipc_socket.c |  285 +++++++++++++++++++++++++++++++++++++++++++++++-
 sys/sys/socketvar.h    |    3 +-
 3 files changed, 284 insertions(+), 9 deletions(-)

diffs (truncated from 417 to 300 lines):

diff -r d352786a8cbc -r 3a80fa4d495b sys/conf/files
--- a/sys/conf/files    Thu May 02 17:44:32 2002 +0000
+++ b/sys/conf/files    Thu May 02 17:55:48 2002 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files,v 1.523 2002/04/26 02:05:09 ad Exp $
+#      $NetBSD: files,v 1.524 2002/05/02 17:55:48 thorpej Exp $
 
 #      @(#)files.newconf       7.5 (Berkeley) 5/10/93
 
@@ -14,6 +14,9 @@
 defflag                                UCONSOLE
 defflag        opt_pipe.h              PIPE_SOCKETPAIR PIPE_NODIRECT
 
+defflag        opt_sock_counters.h     SOSEND_COUNTERS
+defflag                                SOSEND_LOAN
+
 defflag                                MULTIPROCESSOR
 
 defflag        opt_config.h            INCLUDE_CONFIG_FILE INCLUDE_JUST_CONFIG
diff -r d352786a8cbc -r 3a80fa4d495b sys/kern/uipc_socket.c
--- a/sys/kern/uipc_socket.c    Thu May 02 17:44:32 2002 +0000
+++ b/sys/kern/uipc_socket.c    Thu May 02 17:55:48 2002 +0000
@@ -1,4 +1,40 @@
-/*     $NetBSD: uipc_socket.c,v 1.63 2002/04/06 08:04:17 matt Exp $    */
+/*     $NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2002 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the NetBSD
+ *     Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
@@ -36,7 +72,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.63 2002/04/06 08:04:17 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $");
+
+#include "opt_sock_counters.h"
+#include "opt_sosend_loan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -53,19 +92,219 @@
 #include <sys/resourcevar.h>
 #include <sys/pool.h>
 
+#include <uvm/uvm.h>
+
 struct pool    socket_pool;
 
 extern int     somaxconn;                      /* patchable (XXX sysctl) */
 int            somaxconn = SOMAXCONN;
 
+#ifdef SOSEND_COUNTERS
+#include <sys/device.h>
+
+struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "sosend", "loan big");
+struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "sosend", "copy big");
+struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "sosend", "copy small");
+struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "sosend", "kva limit");
+
+#define        SOSEND_COUNTER_INCR(ev)         (ev)->ev_count++
+
+#else
+
+#define        SOSEND_COUNTER_INCR(ev)         /* nothing */
+
+#endif /* SOSEND_COUNTERS */
+
 void
 soinit(void)
 {
 
        pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
            "sockpl", NULL);
+
+#ifdef SOSEND_COUNTERS
+       evcnt_attach_static(&sosend_loan_big);
+       evcnt_attach_static(&sosend_copy_big);
+       evcnt_attach_static(&sosend_copy_small);
+       evcnt_attach_static(&sosend_kvalimit);
+#endif /* SOSEND_COUNTERS */
 }
 
+#ifdef SOSEND_LOAN
+
+struct mbuf *so_pendfree;
+
+int somaxkva = 16 * 1024 * 1024;
+int socurkva;
+int sokvawaiters;
+
+#define        SOCK_LOAN_THRESH        4096
+#define        SOCK_LOAN_CHUNK         65536
+
+static void
+sodoloanfree(caddr_t buf, u_int size)
+{
+       struct vm_page **pgs;
+       vaddr_t va, sva, eva;
+       vsize_t len;
+       paddr_t pa;
+       int i, npgs;
+
+       eva = round_page((vaddr_t) buf + size);
+       sva = trunc_page((vaddr_t) buf);
+       len = eva - sva;
+       npgs = len >> PAGE_SHIFT;
+
+       pgs = alloca(npgs * sizeof(*pgs));
+
+       for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
+               if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
+                       panic("sodoloanfree: va 0x%lx not mapped", va);
+               pgs[i] = PHYS_TO_VM_PAGE(pa);
+       }
+
+       pmap_kremove(sva, len);
+       pmap_update(pmap_kernel());
+       uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE);
+       uvm_km_free(kernel_map, sva, len);
+       socurkva -= len;
+       if (sokvawaiters)
+               wakeup(&socurkva);
+}
+
+static size_t
+sodopendfree(struct socket *so)
+{
+       struct mbuf *m;
+       size_t rv = 0;
+       int s;
+
+       s = splvm();
+
+       for (;;) {
+               m = so_pendfree;
+               if (m == NULL)
+                       break;
+               so_pendfree = m->m_next;
+               splx(s);
+
+               rv += m->m_ext.ext_size;
+               sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+               s = splvm();
+               pool_cache_put(&mbpool_cache, m);
+       }
+
+       for (;;) {
+               m = so->so_pendfree;
+               if (m == NULL)
+                       break;
+               so->so_pendfree = m->m_next;
+               splx(s);
+
+               rv += m->m_ext.ext_size;
+               sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+               s = splvm();
+               pool_cache_put(&mbpool_cache, m);
+       }
+
+       splx(s);
+       return (rv);
+}
+
+static void
+soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg)
+{
+       struct socket *so = arg;
+       int s;
+
+       if (m == NULL) {
+               sodoloanfree(buf, size);
+               return;
+       }
+
+       s = splvm();
+       m->m_next = so->so_pendfree;
+       so->so_pendfree = m;
+       splx(s);
+       if (sokvawaiters)
+               wakeup(&socurkva);
+}
+
+static long
+sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space)
+{
+       struct iovec *iov = uio->uio_iov;
+       vaddr_t sva, eva;
+       vsize_t len;
+       struct vm_page **pgs;
+       vaddr_t lva, va;
+       int npgs, s, i, error;
+
+       if (uio->uio_segflg != UIO_USERSPACE)
+               return (0);
+
+       if (iov->iov_len < (size_t) space)
+               space = iov->iov_len;
+       if (space > SOCK_LOAN_CHUNK)
+               space = SOCK_LOAN_CHUNK;
+
+       eva = round_page((vaddr_t) iov->iov_base + space);
+       sva = trunc_page((vaddr_t) iov->iov_base);
+       len = eva - sva;
+       npgs = len >> PAGE_SHIFT;
+
+       while (socurkva + len > somaxkva) {
+               if (sodopendfree(so))
+                       continue;
+               SOSEND_COUNTER_INCR(&sosend_kvalimit);
+               s = splvm();
+               sokvawaiters++;
+               (void) tsleep(&socurkva, PVM, "sokva", 0);
+               sokvawaiters--;
+               splx(s);
+       }
+
+       lva = uvm_km_valloc_wait(kernel_map, len);
+       if (lva == 0)
+               return (0);
+       socurkva += len;
+
+       pgs = alloca(npgs * sizeof(*pgs));
+
+       error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
+           pgs, UVM_LOAN_TOPAGE);
+       if (error) {
+               uvm_km_free(kernel_map, lva, len);
+               socurkva -= len;
+               return (0);
+       }
+
+       for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
+               pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ);
+       pmap_update(pmap_kernel());
+
+       lva += (vaddr_t) iov->iov_base & PAGE_MASK;
+
+       MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
+
+       uio->uio_resid -= space;
+       /* uio_offset not updated, not set/used for write(2) */
+       uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space;
+       uio->uio_iov->iov_len -= space;
+       if (uio->uio_iov->iov_len == 0) {
+               uio->uio_iov++;
+               uio->uio_iovcnt--;
+       }
+
+       return (space);
+}
+
+#endif /* SOSEND_LOAN */
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -151,6 +390,9 @@
 void
 sofree(struct socket *so)
 {



Home | Main Index | Thread Index | Old Index