Subject: libpthread hacks
To: None <tech-kern@netbsd.org, tech-userlevel@netbsd.org>
From: Charles M. Hannum <abuse@spamalicious.com>
List: tech-kern
Date: 01/06/2005 03:46:32
--Boundary-00=_YSL3BTxtJ6Qm03U
Content-Type: text/plain;
  charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

Here's a set of hacks to libpthread, that:

* add more assertions, and
* implement klugy workarounds for the two problems I've mentioned recently.

The mlock()s made Azureus stable for ~64 hours on a memory-starved machine 
where it only ran for ~10-15 minutes before.  I'm expecting the changed test 
in pthread__sched_bulk() to fix the infinite loop and deadlock problems I've 
seen with Azureus as well.

In addition, I am no longer seeing the UI lock up in the Furthur network 
client.

Obviously the mlock()s are a kluge.  There is still also potential for upcall 
exhaustion.  But if you need a quick fix, this might do it.

--Boundary-00=_YSL3BTxtJ6Qm03U
Content-Type: text/x-diff;
  charset="us-ascii";
  name="pthread.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename="pthread.diff"

Index: pthread.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread.c,v
retrieving revision 1.36
diff -u -r1.36 pthread.c
--- pthread.c	12 Aug 2004 10:54:13 -0000	1.36
+++ pthread.c	6 Jan 2005 03:41:21 -0000
@@ -51,6 +51,7 @@
 #include <unistd.h>
 #include <sys/param.h>
 #include <sys/sysctl.h>
+#include <sys/mman.h>
 
 #include <sched.h>
 #include "pthread.h"
@@ -134,6 +135,7 @@
 	int i, mib[2], ncpu;
 	size_t len;
 	extern int __isthreaded;
+	int ret;
 
 	mib[0] = CTL_HW;
 	mib[1] = HW_NCPU; 
@@ -164,6 +166,8 @@
 	pthread_attr_init(&pthread_default_attr);
 	PTQ_INIT(&pthread__allqueue);
 	PTQ_INIT(&pthread__deadqueue);
+	ret = mlock(&pthread__deadqueue, sizeof(pthread__deadqueue));
+	pthread__assert(ret == 0);
 	PTQ_INIT(&pthread__runqueue);
 	PTQ_INIT(&pthread__idlequeue);
 	for (i = 0; i < pthread__maxconcurrency; i++)
@@ -360,6 +364,8 @@
 				free(name);
 			return ret;
 		}
+		ret = mlock(newthread, sizeof(struct __pthread_st));
+		pthread__assert(ret == 0);
 	}
 
 	/* 2. Set up state. */
@@ -1077,6 +1083,8 @@
 	char buf[1024];
 	int len;
 
+	SDPRINTF(("(af)\n"));
+
 	/*
 	 * snprintf should not acquire any locks, or we could
 	 * end up deadlocked if the assert caller held locks.
Index: pthread_debug.h
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread_debug.h,v
retrieving revision 1.8
diff -u -r1.8 pthread_debug.h
--- pthread_debug.h	14 Mar 2004 01:19:42 -0000	1.8
+++ pthread_debug.h	6 Jan 2005 03:41:21 -0000
@@ -93,7 +93,7 @@
 }
 
 #define PTHREAD__DEBUG_SHMKEY	(0x000f)
-#define PTHREAD__DEBUG_SHMSIZE	(1<<18)
+#define PTHREAD__DEBUG_SHMSIZE	(1<<20)
 
 struct	pthread_msgbuf {
 #define BUF_MAGIC	0x090976
Index: pthread_lock.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread_lock.c,v
retrieving revision 1.12
diff -u -r1.12 pthread_lock.c
--- pthread_lock.c	14 Mar 2004 12:49:31 -0000	1.12
+++ pthread_lock.c	6 Jan 2005 03:41:21 -0000
@@ -192,6 +192,7 @@
 		 */
 		if (thread->pt_next != NULL) {
 			PTHREADD_ADD(PTHREADD_SPINPREEMPT);
+			pthread__assert(thread->pt_blockgen == thread->pt_unblockgen);
 			pthread__switch(thread, thread->pt_next);
 		}
 		/* try again */
@@ -224,6 +225,7 @@
 		/* See above. */
 		if (thread->pt_next != NULL) {
 			PTHREADD_ADD(PTHREADD_SPINPREEMPT);
+			pthread__assert(thread->pt_blockgen == thread->pt_unblockgen);
 			pthread__switch(thread, thread->pt_next);
 		}
 	}
@@ -255,6 +257,7 @@
 	
 	if ((thread->pt_spinlocks == 0) && (thread->pt_next != NULL)) {
 		PTHREADD_ADD(PTHREADD_SPINPREEMPT);
+//		pthread__assert(thread->pt_blockgen == thread->pt_unblockgen);
 		pthread__switch(thread, thread->pt_next);
 	}
 }
Index: pthread_run.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread_run.c,v
retrieving revision 1.17
diff -u -r1.17 pthread_run.c
--- pthread_run.c	14 Mar 2004 01:19:42 -0000	1.17
+++ pthread_run.c	6 Jan 2005 03:41:24 -0000
@@ -265,7 +265,7 @@
 		next = qhead->pt_next;
 		pthread__assert(qhead->pt_spinlocks == 0);
 		pthread__assert(qhead->pt_type != PT_THREAD_UPCALL);
-		if (qhead->pt_unblockgen & 1)
+		if (qhead->pt_unblockgen != qhead->pt_blockgen)
 			qhead->pt_unblockgen++;
 		if (qhead->pt_type == PT_THREAD_NORMAL) {
 			qhead->pt_state = PT_STATE_RUNNABLE;
Index: pthread_sa.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread_sa.c,v
retrieving revision 1.33
diff -u -r1.33 pthread_sa.c
--- pthread_sa.c	12 Oct 2004 22:17:56 -0000	1.33
+++ pthread_sa.c	6 Jan 2005 03:41:25 -0000
@@ -134,6 +134,7 @@
 		pthread__assert(t->pt_vpid == sas[1]->sa_cpu);
 		t->pt_blockedlwp = sas[1]->sa_id;
 		t->pt_blockgen += 2;
+//		pthread__assert(t->pt_blockgen <= t->pt_unblockgen + 2);
 		if (t->pt_cancel)
 			_lwp_wakeup(t->pt_blockedlwp);
 #ifdef PTHREAD__DEBUG
@@ -181,6 +182,8 @@
 		 * a signal will be on a run queue, and not in upcall limbo.
 		 */
 		si = arg;
+		SDPRINTF(("(up %p) signal %d %p\n", self,
+			     ev ? sas[1]->sa_id : -1, si));
 		if (ev)
 			pthread__signal(self, pthread__sa_id(sas[1]), si);
 		else
@@ -220,6 +223,7 @@
 			     pthread__uc_pc(UC(self->pt_next))));
 		self->pt_switchtouc = UC(self);
 		self->pt_switchto = self;
+		pthread__assert(self->pt_blockgen == self->pt_unblockgen);
 		pthread__switch(self, self->pt_next);
 		/*NOTREACHED*/
 		pthread__abort();
@@ -272,7 +276,11 @@
 		}
 		if (victim->pt_type == PT_THREAD_UPCALL) {
 			/* Case 1: Upcall. Must be resumed. */
-				SDPRINTF((" upcall"));
+			SDPRINTF((" upcall"));
+			if (victim->pt_spinlocks > 0) {
+				SDPRINTF((" lockholder %d",
+				    victim->pt_spinlocks));
+			}
 			resume = 1;
 			if (victim->pt_next) {
 				/*
@@ -348,11 +356,15 @@
 			pthread__assert(victim->pt_next == NULL);
 			victim->pt_next = nextint;
 			nextint = victim;
+			for (; victim; victim = victim->pt_next)
+				pthread__assert(victim->pt_next != nextint);
 		} else {
 			pthread__assert(victim->pt_parent == NULL);
 			pthread__assert(victim->pt_next == NULL);
 			victim->pt_next = nextsched;
 			nextsched = victim;
+			for (; victim; victim = victim->pt_next)
+				pthread__assert(victim->pt_next != nextsched);
 		}
 		SDPRINTF(("\n"));
 	}
@@ -454,6 +466,8 @@
 				if (victim->pt_switchto) {
 					/* We're done with you. */
 					SDPRINTF((" recyclable"));
+					pthread__assert(victim != prev);
+					pthread__assert(!prev || prev->pt_next);
 					/*
 					 * Clear trap context, which is
 					 * no longer useful.
@@ -580,6 +594,7 @@
 				     self, intqueue, PUC(intqueue), 
 				     pthread__uc_pc(UC(intqueue)), 
 				     pthread__uc_sp(UC(intqueue))));
+			pthread__assert(self->pt_blockgen == self->pt_unblockgen);
 			pthread__switch(self, intqueue);
 			SDPRINTF(("(rl %p) returned from chain\n",
 			    self));
@@ -597,6 +612,7 @@
 				     PUC(self->pt_next),
 				     pthread__uc_pc(UC(self->pt_next)), 
 				     pthread__uc_sp(UC(self->pt_next))));
+			pthread__assert(self->pt_blockgen == self->pt_unblockgen);
 			pthread__switch(self, self->pt_next);
 		}
 

--Boundary-00=_YSL3BTxtJ6Qm03U--