Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/rump/librump/rumpkern Add uniprocessor versions of mutex...



details:   https://anonhg.NetBSD.org/src/rev/a55a98ddd21e
branches:  trunk
changeset: 754982:a55a98ddd21e
user:      pooka <pooka%NetBSD.org@localhost>
date:      Tue May 18 16:29:36 2010 +0000

description:
Add uniprocessor versions of mutex/rw/cv.  They work only on virtual
unicpu configurations (i.e. RUMP_NCPU==1), but are massively faster
than the multiprocessor versions since the fast path does not have
to perform any cache coherent operations.  _Applications_ with
lock-happy kernel paths, i.e. _not_ lock microbenchmarks, measure
up to tens of percents speedup on my Core2 Duo.  Every globally
atomic state required by normal locks/atomic ops implies a hideous
speed penalty even for the fast path.

While this requires a unicpu configuration, it should be noted that
we are talking about a virtual unicpu configuration.  The host can
have as many processors as it desires, and the speed benefit of
virtual unicpu is still there.  It's pretty obvious that in terms
of scalability simple workload partitioning and replication into
multiple kernels wins hands down over complicated locking or
locklessing algorithms which depend on globally atomic state.

diffstat:

 sys/rump/librump/rumpkern/Makefile.rumpkern |   11 +-
 sys/rump/librump/rumpkern/locks_up.c        |  430 ++++++++++++++++++++++++++++
 2 files changed, 438 insertions(+), 3 deletions(-)

diffs (truncated from 459 to 300 lines):

diff -r 601148dd515c -r a55a98ddd21e sys/rump/librump/rumpkern/Makefile.rumpkern
--- a/sys/rump/librump/rumpkern/Makefile.rumpkern       Tue May 18 15:16:10 2010 +0000
+++ b/sys/rump/librump/rumpkern/Makefile.rumpkern       Tue May 18 16:29:36 2010 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile.rumpkern,v 1.85 2010/05/18 15:12:19 pooka Exp $
+#      $NetBSD: Makefile.rumpkern,v 1.86 2010/05/18 16:29:36 pooka Exp $
 #
 
 .include "${RUMPTOP}/Makefile.rump"
@@ -16,8 +16,13 @@
 # Source modules, first the ones specifically implemented for librump.
 # 
 SRCS=  rump.c rumpcopy.c emul.c intr.c klock.c kobj_rename.c   \
-       locks.c ltsleep.c memalloc.c scheduler.c signals.c      \
-       sleepq.c sysproxy_socket.c threads.c vm.c
+       ltsleep.c memalloc.c scheduler.c signals.c sleepq.c     \
+       sysproxy_socket.c threads.c vm.c
+
+# Multiprocessor or uniprocessor locking.  TODO: select right
+# locking at runtime.
+#SRCS+=        locks.c
+SRCS+= locks_up.c
 
 vers.c: ${RUMPTOP}/../conf/newvers.sh ${RUMPTOP}/../conf/osrelease.sh
        ${_MKMSG_CREATE} vers.c
diff -r 601148dd515c -r a55a98ddd21e sys/rump/librump/rumpkern/locks_up.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/rump/librump/rumpkern/locks_up.c      Tue May 18 16:29:36 2010 +0000
@@ -0,0 +1,430 @@
+/*     $NetBSD: locks_up.c,v 1.1 2010/05/18 16:29:36 pooka Exp $       */
+
+/*
+ * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Virtual uniprocessor rump kernel version of locks.  Since the entire
+ * kernel is running on only one CPU in the system, there is no need 
+ * to perform slow cache-coherent MP locking operations.  This speeds
+ * up things quite dramatically and is a good example of that two
+ * disjoint kernels running simultaneously in an MP system can be
+ * massively faster than one with fine-grained locking.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.1 2010/05/18 16:29:36 pooka Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+
+#include <rump/rumpuser.h>
+
+#include "rump_private.h"
+
+struct upmtx {
+       struct lwp *upm_owner;
+       int upm_wanted;
+       struct rumpuser_cv *upm_rucv;
+};
+#define UPMTX(mtx) struct upmtx *upm = *(struct upmtx **)mtx
+
+static inline void
+checkncpu(void)
+{
+
+       if (__predict_false(ncpu != 1))
+               panic("UP lock implementation requires RUMP_NCPU == 1");
+}
+
+void
+mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
+{
+       struct upmtx *upm;
+
+       CTASSERT(sizeof(kmutex_t) >= sizeof(void *));
+       checkncpu();
+
+       /*
+        * XXX: pool_cache would be nice, but not easily possible,
+        * as pool cache init wants to call mutex_init() ...
+        */
+       upm = rumpuser_malloc(sizeof(*upm), 1);
+       memset(upm, 0, sizeof(*upm));
+       rumpuser_cv_init(&upm->upm_rucv);
+       memcpy(mtx, &upm, sizeof(void *));
+}
+
+void
+mutex_destroy(kmutex_t *mtx)
+{
+       UPMTX(mtx);
+
+       KASSERT(upm->upm_owner == NULL);
+       KASSERT(upm->upm_wanted == 0);
+       rumpuser_cv_destroy(upm->upm_rucv);
+       rumpuser_free(upm);
+}
+
+void
+mutex_enter(kmutex_t *mtx)
+{
+       UPMTX(mtx);
+
+       /* fastpath? */
+       if (mutex_tryenter(mtx))
+               return;
+
+       /*
+        * No?  bummer, do it the slow and painful way then.
+        */
+       upm->upm_wanted++;
+       while (!mutex_tryenter(mtx)) {
+               rump_schedlock_cv_wait(upm->upm_rucv);
+       }
+       upm->upm_wanted--;
+
+       KASSERT(upm->upm_wanted >= 0);
+}
+
+void
+mutex_spin_enter(kmutex_t *mtx)
+{
+
+       mutex_enter(mtx);
+}
+
+int
+mutex_tryenter(kmutex_t *mtx)
+{
+       UPMTX(mtx);
+
+       if (upm->upm_owner)
+               return 0;
+
+       upm->upm_owner = curlwp;
+       return 1;
+}
+
+void
+mutex_exit(kmutex_t *mtx)
+{
+       UPMTX(mtx);
+
+       if (upm->upm_wanted) {
+               rumpuser_cv_signal(upm->upm_rucv); /* CPU is our interlock */
+       }
+       upm->upm_owner = NULL;
+}
+
+void
+mutex_spin_exit(kmutex_t *mtx)
+{
+
+       mutex_exit(mtx);
+}
+
+int
+mutex_owned(kmutex_t *mtx)
+{
+       UPMTX(mtx);
+
+       return upm->upm_owner == curlwp;
+}
+
+struct uprw {
+       struct lwp *uprw_owner;
+       int uprw_readers;
+       uint16_t uprw_rwant;
+       uint16_t uprw_wwant;
+       struct rumpuser_cv *uprw_rucv_reader;
+       struct rumpuser_cv *uprw_rucv_writer;
+};
+
+#define UPRW(rw) struct uprw *uprw = *(struct uprw **)rw
+
+/* reader/writer locks */
+
+void
+rw_init(krwlock_t *rw)
+{
+       struct uprw *uprw;
+
+       CTASSERT(sizeof(krwlock_t) >= sizeof(void *));
+       checkncpu();
+
+       uprw = rumpuser_malloc(sizeof(*uprw), 0);
+       memset(uprw, 0, sizeof(*uprw));
+       rumpuser_cv_init(&uprw->uprw_rucv_reader);
+       rumpuser_cv_init(&uprw->uprw_rucv_writer);
+       memcpy(rw, &uprw, sizeof(void *));
+}
+
+void
+rw_destroy(krwlock_t *rw)
+{
+       UPRW(rw);
+
+       rumpuser_cv_destroy(uprw->uprw_rucv_reader);
+       rumpuser_cv_destroy(uprw->uprw_rucv_writer);
+       rumpuser_free(uprw);
+}
+
+/* take rwlock.  prefer writers over readers (see rw_tryenter and rw_exit) */
+void
+rw_enter(krwlock_t *rw, const krw_t op)
+{
+       UPRW(rw);
+       struct rumpuser_cv *rucv;
+       uint16_t *wp;
+
+       if (rw_tryenter(rw, op))
+               return;
+
+       /* lagpath */
+       if (op == RW_READER) {
+               rucv = uprw->uprw_rucv_reader;
+               wp = &uprw->uprw_rwant;
+       } else {
+               rucv = uprw->uprw_rucv_writer;
+               wp = &uprw->uprw_wwant;
+       }
+
+       (*wp)++;
+       while (!rw_tryenter(rw, op)) {
+               rump_schedlock_cv_wait(rucv);
+       }
+       (*wp)--;
+}
+
+int
+rw_tryenter(krwlock_t *rw, const krw_t op)
+{
+       UPRW(rw);
+
+       switch (op) {
+       case RW_READER:
+               if (uprw->uprw_owner == NULL && uprw->uprw_wwant == 0) {
+                       uprw->uprw_readers++;
+                       return 1;
+               }
+               break;
+       case RW_WRITER:
+               if (uprw->uprw_owner == NULL && uprw->uprw_readers == 0) {
+                       uprw->uprw_owner = curlwp;
+                       return 1;
+               }
+               break;
+       }
+
+       return 0;
+}
+
+void
+rw_exit(krwlock_t *rw)
+{
+       UPRW(rw);
+
+       if (uprw->uprw_readers > 0) {
+               uprw->uprw_readers--;
+       } else {
+               KASSERT(uprw->uprw_owner == curlwp);
+               uprw->uprw_owner = NULL;
+       }
+
+       if (uprw->uprw_wwant) {
+               rumpuser_cv_signal(uprw->uprw_rucv_writer);
+       } else if (uprw->uprw_rwant) {
+               rumpuser_cv_signal(uprw->uprw_rucv_reader);
+       }
+}
+
+int
+rw_tryupgrade(krwlock_t *rw)
+{
+       UPRW(rw);
+



Home | Main Index | Thread Index | Old Index