Subject: Re: mutex fault
To: Manuel Bouyer <bouyer@antioche.eu.org>
From: Kazushi (Jam) Marukawa <jam@pobox.com>
List: tech-kern
Date: 12/19/2007 00:04:25
Hi Manuel,

   > > Anyway, it's running, so I will leave it as is for a while
   > > to see whether it crashes or not.  If you have any idea to
   > > try, please let me know.  Thanks.
   > 
   > I'm running a LOCKDEBUG kernel now as well, and I didn't see a crash
   > either.

Is your kernel crashed?  Mine didn't crash.  It looks like
this LOCKDEBUG kernel won't crash, so I stopped this test.

I'm now thinking Xen3 dom0 kernel on multi cores machine is
having race condition issues.  Therefore, I made following
patch to enable some pre-exist codes to avoid lock race
conditions.  With this patch, my machine has not been
crashed yet for 2 days, looks like pretty working.

I know this is not the answer.  I just patched a hole.  This
doesn't fix the source of problem.  Manuel, is it possible
to investigate race condition issues in Xen3 dom0 kernel?
Thanks.

----

Little bit of descriptions, why I started thinking Xen3 dom0
kernel on multi cores machine is having race condition issues:

I sent small patch for locore.S at Jun 27th.  My machine
needed CLI and STI to avoid race conditions although those
instructions are removed with a decision like those were not
required anymore after kernel modifications.  I don't know
why, but my machine just needed them to run correctly.
Others didn't have problem without CLI and STI, but only my
machine had the problem.

I once asked whether I need to enable MULTIPROCESSOR to use
netbsd dom0 kernel since my machine was been having dual
cores.  The answer was no.  I heard Xen3 was taking care of
race conditions.

Now, I'm doubting it since I had CLI/STI race condition
problem before, and I've been having this mutex race
condition problem.  That's all.

---

Here is my patch.  I just enabled codes for MULTIPROCESSOR
environment.  I modified i386 and amd64 codes, but never
tested it on amd64.  I also modified ic/XXX codes.  I guess
ic/XXX stuff are not needed, but I'm not sure.  So, I just
left them as what I currently modified.

Index: arch/amd64/amd64/lock_stubs.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/lock_stubs.S,v
retrieving revision 1.8
diff -u -r1.8 lock_stubs.S
--- arch/amd64/amd64/lock_stubs.S	22 Nov 2007 16:16:42 -0000	1.8
+++ arch/amd64/amd64/lock_stubs.S	18 Dec 2007 14:30:54 -0000
@@ -53,7 +53,7 @@
 
 #include "assym.h"
 
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 #define	FULL
 #endif
 
Index: arch/i386/i386/lock_stubs.S
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/lock_stubs.S,v
retrieving revision 1.8
diff -u -r1.8 lock_stubs.S
--- arch/i386/i386/lock_stubs.S	11 Nov 2007 01:27:43 -0000	1.8
+++ arch/i386/i386/lock_stubs.S	18 Dec 2007 14:30:56 -0000
@@ -52,7 +52,7 @@
 
 #include "assym.h"
 
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 #define	FULL
 #endif
 
Index: dev/ic/atppc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/atppc.c,v
retrieving revision 1.24
diff -u -r1.24 atppc.c
--- dev/ic/atppc.c	19 Oct 2007 11:59:48 -0000	1.24
+++ dev/ic/atppc.c	18 Dec 2007 14:31:02 -0000
@@ -181,7 +181,7 @@
 			lsc->sc_dev.dv_xname));
 	}
 
-#if defined (MULTIPROCESSOR) || defined (LOCKDEBUG)
+#if defined (MULTIPROCESSOR) || defined (LOCKDEBUG) || defined(XEN3)
 	/* Initialize lock structure */
 	simple_lock_init(&(lsc->sc_lock));
 #endif
Index: dev/ic/atppcvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/atppcvar.h,v
retrieving revision 1.9
diff -u -r1.9 atppcvar.h
--- dev/ic/atppcvar.h	19 Oct 2007 11:59:48 -0000	1.9
+++ dev/ic/atppcvar.h	18 Dec 2007 14:31:02 -0000
@@ -79,18 +79,18 @@
 
 
 /* Locking for atppc device */
-#if defined(MULTIPROCESSOR) || defined (LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined (LOCKDEBUG) || defined(XEN3)
 #include <sys/lock.h>
 #define ATPPC_SC_LOCK(sc) (&((sc)->sc_lock))
 #define ATPPC_LOCK_INIT(sc) simple_lock_init(ATPPC_SC_LOCK((sc)))
 #define ATPPC_LOCK(sc) simple_lock(ATPPC_SC_LOCK((sc)))
 #define ATPPC_UNLOCK(sc) simple_unlock(ATPPC_SC_LOCK((sc)))
-#else /* !(MULTIPROCESSOR) && !(LOCKDEBUG) */
+#else /* !(MULTIPROCESSOR) && !(LOCKDEBUG) && !(XEN3) */
 #define ATPPC_LOCK_INIT(sc)
 #define ATPPC_LOCK(sc)
 #define ATPPC_UNLOCK(sc)
 #define ATPPC_SC_LOCK(sc) NULL
-#endif /* MULTIPROCESSOR || LOCKDEBUG */
+#endif /* MULTIPROCESSOR || LOCKDEBUG || XEN3 */
 
 /* Single softintr callback entry */
 struct atppc_handler_node {
@@ -104,7 +104,7 @@
 	/* Generic device attributes */
 	struct device sc_dev;
 
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 	/* Simple lock */
 	struct simplelock sc_lock;
 #endif
Index: dev/ic/sl811hs.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/sl811hs.c,v
retrieving revision 1.16
diff -u -r1.16 sl811hs.c
--- dev/ic/sl811hs.c	6 Nov 2007 21:51:07 -0000	1.16
+++ dev/ic/sl811hs.c	18 Dec 2007 14:31:03 -0000
@@ -300,7 +300,7 @@
 	uint8_t 	ptype;		/* Pipe type */
 };
 
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 #define SLHCI_WAITLOCK 1
 #endif
 
Index: kern/kern_mutex.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_mutex.c,v
retrieving revision 1.23
diff -u -r1.23 kern_mutex.c
--- kern/kern_mutex.c	21 Nov 2007 10:19:10 -0000	1.23
+++ kern/kern_mutex.c	18 Dec 2007 14:31:04 -0000
@@ -69,7 +69,7 @@
  * more than an splraiseipl() and splx() pair.
  */
 
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 #define	FULL
 #endif
 
@@ -456,7 +456,7 @@
 	 * Handle spin mutexes.
 	 */
 	if (MUTEX_SPIN_P(mtx)) {
-#if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
+#if defined(LOCKDEBUG) && (defined(MULTIPROCESSOR) || defined(XEN3))
 		u_int spins = 0;
 #endif
 		MUTEX_SPIN_SPLRAISE(mtx);
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.69
diff -u -r1.69 lwp.h
--- sys/lwp.h	12 Nov 2007 23:12:00 -0000	1.69
+++ sys/lwp.h	18 Dec 2007 14:31:05 -0000
@@ -294,7 +294,7 @@
 static inline void
 lwp_lock(lwp_t *l)
 {
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 	kmutex_t *old;
 
 	mutex_spin_enter(old = l->l_mutex);
Index: sys/simplelock.h
===================================================================
RCS file: /cvsroot/src/sys/sys/simplelock.h,v
retrieving revision 1.5
diff -u -r1.5 simplelock.h
--- sys/simplelock.h	11 Oct 2007 19:49:04 -0000	1.5
+++ sys/simplelock.h	18 Dec 2007 14:31:06 -0000
@@ -113,7 +113,7 @@
 
 #ifdef _KERNEL
 
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
 #define	simple_lock_init(alp)	__cpu_simple_lock_init(&(alp)->lock_data)
 #define	simple_lock(alp)	__cpu_simple_lock(&(alp)->lock_data)
 #define	simple_lock_held(alp)	(__SIMPLELOCK_LOCKED_P(&(alp)->lock_data))

-- Kazushi
UFO's are for real: the Air Force doesn't exist.