Subject: Re: lock debugging
To: None <tech-kern@netbsd.org>
From: Pavel Cahyna <pavel@netbsd.org>
List: tech-kern
Date: 11/17/2006 19:04:27
--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Fri, Nov 17, 2006 at 05:49:54AM -0500, Jed Davis wrote:
> For diagnosing problems with locking (e.g., lots of processes getting
> stuck on vnlock), it would be nice to be able to dump, for each lock
> on the system that's being slept on, which lwp holds it and which are
> sleeping on it.  Finding the thread at the root (or, for vnlock, leaf)
> of the trouble should then be easier.

I've took a bit different approach. If lockmgr() wants to sleep to wait
for a lock, we first note the LWP which currently holds the lock. We look
if that LWP is waiting for another lock. If it does, we repeat until we
hit a LWP which does not wait for a lock, or the original LWP. In the
second case, this is a deadlock and we panic.

I also added a function to traverse the chain of LWPs and print the last
one (the one which does not wait for a lock, which you've called the root
of the trouble). Could be useful for cases where the problem is not caused
by a deadlock. The function is lklast(struct lwp *), it can be called from
ddb.

> Given a struck lock*, the identity of the holder is right there; but
> getting from a sleeping lwp to the lock it's sleeping on seems harder.
> A quick glance shows that the sleep address will point to the struct
> lock (except in the LK_DRAIN case when it's the lk_flags field, it
> looks like), but it might also point to something else entirely.

I've added a new flag to struct lwp, L_LOCK, which is set if the LWP is
waiting for a lock and it is guaranteed that l_wchan point to a 
struct lock.

I've also changed the reference to lock holder in struct lock from 
{ pid_t, lwpid_t } pair to struct lwp *, because with a struct lwp *,
getting the PID and LWPID is easy, but the inverse is more difficult.

lockdebug.diff contains those changes, deadlock.diff contains two syscalls
which take two locks in different order to provoke deadlock. uncomment the
lockmgr() calls from the second one for deadlock, otherwise it produces a
process stuck on a lock held by another process - good for trying the
lklast() function. deadlock.c is a test program to trigger it from
userland.

Pavel

--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="lockdebug.diff"

Index: kern/kern_lock.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lock.c,v
retrieving revision 1.99
diff -u -p -r1.99 kern_lock.c
--- kern/kern_lock.c	7 Sep 2006 02:06:47 -0000	1.99
+++ kern/kern_lock.c	17 Nov 2006 17:21:58 -0000
@@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,
 #include "opt_ddb.h"
 
 #include <sys/param.h>
+#include <sys/lwp.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/systm.h>
@@ -209,6 +210,56 @@ do {									\
 
 #define	RETURN_ADDRESS		((uintptr_t)__builtin_return_address(0))
 
+static int lkfollow(volatile const struct lock *, struct lwp *, int);
+void lklast(struct lwp *);
+
+void lklast(struct lwp *l)
+{
+	if (l->l_stat == LSSLEEP && (l->l_flag & L_LOCK) &&
+	    l->l_wchan ) {
+		if (lkfollow((volatile const struct lock *)l->l_wchan, l, 1))
+			printf("deadlock detected\n");
+	}
+}
+		
+/* for lkp, find the LWP which holds it. If this LWP is waiting for a
+ * lock, find that lock and repeat until we reach the LWP "begin", or we
+ * reach a LWP which is not waiting for a lock. if report_end, print
+ * the last LWP if it is not waiting for a lock. otherwise, be silent
+ * in that case and print the whole chain of locks if we reached
+ * "begin". */
+
+static int lkfollow(volatile const struct lock *lkp, struct lwp* begin, int report_end)
+{
+	struct lwp *l;
+	if (lkp->lk_timo == 0 && !(lkp->lk_flags & LK_SPIN)) {
+		l = lkp->lk_lwp_p;
+		if (l == NULL)
+			return 0;
+		if (l == begin) {
+			printf("lock %p:", lkp);
+			lockmgr_printinfo(lkp);
+			printf("\n");
+			return 1;
+		}
+		if (l->l_stat == LSSLEEP && (l->l_flag & L_LOCK) &&
+		    l->l_wchan ) {
+			if (lkfollow((volatile const struct lock *)l->l_wchan, begin, report_end)) {
+				printf("lock %p:", lkp);
+				lockmgr_printinfo(lkp);
+				printf("\n");
+				return 1;
+			}
+		} else if (report_end) {
+			printf("last lock %p held by:\n", lkp);
+			printf("LWP %d.%d=%p\n", l->l_proc->p_pid, l->l_lid, l);
+		}
+	}
+	
+	return 0;
+}
+			
+		
 /*
  * Acquire a resource.
  */
@@ -261,12 +312,21 @@ acquire(volatile struct lock **lkpp, int
 				lkp->lk_waitcount++;
 				lkp->lk_flags |= LK_WAIT_NONZERO;
 			}
+			if (lkfollow(lkp, curlwp, 0))
+				panic("acquire: deadlock detected");
+
 			/* XXX Cast away volatile. */
 			LOCKSTAT_START_TIMER(slptime);
+			if (!drain) {
+				curlwp->l_flag |= L_LOCK;
+			}
 			error = ltsleep(drain ?
 			    (volatile const void *)&lkp->lk_flags :
 			    (volatile const void *)lkp, lkp->lk_prio,
 			    lkp->lk_wmesg, lkp->lk_timo, &lkp->lk_interlock);
+			if (!drain) {
+				curlwp->l_flag &= ~L_LOCK;
+			}
 			LOCKSTAT_STOP_TIMER(slptime);
 			LOCKSTAT_EVENT_RA((void *)(uintptr_t)lkp,
 			    LB_LOCKMGR | LB_SLEEP, 1, slptime, ra);
@@ -294,20 +354,20 @@ acquire(volatile struct lock **lkpp, int
 	return error;
 }
 
-#define	SETHOLDER(lkp, pid, lid, cpu_id)				\
-do {									\
-	if ((lkp)->lk_flags & LK_SPIN)					\
-		(lkp)->lk_cpu = cpu_id;					\
-	else {								\
-		(lkp)->lk_lockholder = pid;				\
-		(lkp)->lk_locklwp = lid;				\
-	}								\
+#define	SETHOLDER(lkp, lwp, cpu_id)		\
+do {						\
+	if ((lkp)->lk_flags & LK_SPIN)		\
+		(lkp)->lk_cpu = cpu_id;		\
+	else {					\
+		(lkp)->lk_lwp_p = lwp;		\
+	}					\
 } while (/*CONSTCOND*/0)
 
 #define	WEHOLDIT(lkp, pid, lid, cpu_id)					\
 	(((lkp)->lk_flags & LK_SPIN) != 0 ?				\
 	 ((lkp)->lk_cpu == (cpu_id)) :					\
-	 ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid)))
+	 ((lkp)->lk_lwp_p == NULL ? (pid) == LK_NOPROC && (lid) == 0 :	\
+	 ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid))))
 
 #define	WAKEUP_WAITER(lkp)						\
 do {									\
@@ -417,7 +477,7 @@ lockinit(struct lock *lkp, int prio, con
 	if (flags & LK_SPIN)
 		lkp->lk_cpu = LK_NOCPU;
 	else {
-		lkp->lk_lockholder = LK_NOPROC;
+		lkp->lk_lwp_p = NULL;
 		lkp->lk_newlock = NULL;
 		lkp->lk_prio = prio;
 		lkp->lk_timo = timo;
@@ -665,7 +725,7 @@ lockmgr(volatile struct lock *lkp, u_int
 		lkp->lk_exclusivecount = 0;
 		lkp->lk_recurselevel = 0;
 		lkp->lk_flags &= ~LK_HAVE_EXCL;
-		SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+		SETHOLDER(lkp, NULL, LK_NOCPU);
 #if defined(LOCKDEBUG)
 		lkp->lk_unlock_file = file;
 		lkp->lk_unlock_line = line;
@@ -729,7 +789,7 @@ lockmgr(volatile struct lock *lkp, u_int
 				break;
 			}
 			lkp->lk_flags |= LK_HAVE_EXCL;
-			SETHOLDER(lkp, pid, lid, cpu_num);
+			SETHOLDER(lkp, l, cpu_num);
 #if defined(LOCKDEBUG)
 			lkp->lk_lock_file = file;
 			lkp->lk_lock_line = line;
@@ -801,7 +861,7 @@ lockmgr(volatile struct lock *lkp, u_int
 			break;
 		}
 		lkp->lk_flags |= LK_HAVE_EXCL;
-		SETHOLDER(lkp, pid, lid, cpu_num);
+		SETHOLDER(lkp, l, cpu_num);
 #if defined(LOCKDEBUG)
 		lkp->lk_lock_file = file;
 		lkp->lk_lock_line = line;
@@ -835,7 +895,7 @@ lockmgr(volatile struct lock *lkp, u_int
 			COUNT(lkp, l, cpu_num, -1);
 			if (lkp->lk_exclusivecount == 0) {
 				lkp->lk_flags &= ~LK_HAVE_EXCL;
-				SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+				SETHOLDER(lkp, NULL, LK_NOCPU);
 #if defined(LOCKDEBUG)
 				lkp->lk_unlock_file = file;
 				lkp->lk_unlock_line = line;
@@ -880,7 +940,7 @@ lockmgr(volatile struct lock *lkp, u_int
 		if (error)
 			break;
 		lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
-		SETHOLDER(lkp, pid, lid, cpu_num);
+		SETHOLDER(lkp, l, cpu_num);
 #if defined(LOCKDEBUG)
 		lkp->lk_lock_file = file;
 		lkp->lk_lock_line = line;
@@ -952,7 +1012,7 @@ spinlock_release_all(volatile struct loc
 		lkp->lk_exclusivecount = 0;
 		COUNT_CPU(cpu_num, -count);
 		lkp->lk_flags &= ~LK_HAVE_EXCL;
-		SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+		SETHOLDER(lkp, NULL, LK_NOCPU);
 #if defined(LOCKDEBUG)
 		lkp->lk_unlock_file = file;
 		lkp->lk_unlock_line = line;
@@ -1011,7 +1071,7 @@ spinlock_acquire_count(volatile struct l
 	    RETURN_ADDRESS);
 	lkp->lk_flags &= ~LK_WANT_EXCL;
 	lkp->lk_flags |= LK_HAVE_EXCL;
-	SETHOLDER(lkp, LK_NOPROC, 0, cpu_num);
+	SETHOLDER(lkp, NULL, cpu_num);
 #if defined(LOCKDEBUG)
 	lkp->lk_lock_file = file;
 	lkp->lk_lock_line = line;
@@ -1033,7 +1093,7 @@ spinlock_acquire_count(volatile struct l
  * routines to display ststus about contained locks.
  */
 void
-lockmgr_printinfo(volatile struct lock *lkp)
+lockmgr_printinfo(volatile const struct lock *lkp)
 {
 
 	if (lkp->lk_sharecount)
Index: sys/lock.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lock.h,v
retrieving revision 1.66
diff -u -p -r1.66 lock.h
--- sys/lock.h	7 Sep 2006 01:59:23 -0000	1.66
+++ sys/lock.h	17 Nov 2006 17:21:58 -0000
@@ -132,8 +132,7 @@ struct lock {
 	union {
 		struct {
 			/* pid of exclusive lock holder */
-			pid_t lk_sleep_lockholder;
-			lwpid_t lk_sleep_locklwp;
+			struct lwp * lk_sleep_lwp;
 
 			/* priority at which to sleep */
 			int lk_sleep_prio;
@@ -153,8 +152,9 @@ struct lock {
 		} lk_un_spin;
 	} lk_un;
 
-#define	lk_lockholder	lk_un.lk_un_sleep.lk_sleep_lockholder
-#define	lk_locklwp	lk_un.lk_un_sleep.lk_sleep_locklwp
+#define	lk_lockholder	lk_un.lk_un_sleep.lk_sleep_lwp->l_proc->p_pid
+#define	lk_locklwp	lk_un.lk_un_sleep.lk_sleep_lwp->l_lid
+#define	lk_lwp_p	lk_un.lk_un_sleep.lk_sleep_lwp
 #define	lk_prio		lk_un.lk_un_sleep.lk_sleep_prio
 #define	lk_timo		lk_un.lk_un_sleep.lk_sleep_timo
 #define	lk_newlock	lk_un.lk_un_sleep.lk_newlock
@@ -181,7 +181,7 @@ struct lock {
 	  0,				/* recurselevel */		\
 	  0,				/* waitcount */			\
 	  (wmesg),			/* waitmesg */			\
-	  { .lk_un_sleep = { 0, 0, (prio), (timo), NULL } },		\
+	  { .lk_un_sleep = { NULL, (prio), (timo), NULL } },		\
 	}
 #else
 #define	LOCK_INITIALIZER(prio, wmesg, timo, flags)			\
@@ -192,7 +192,7 @@ struct lock {
 	  0,				/* recurselevel */		\
 	  0,				/* waitcount */			\
 	  (wmesg),			/* waitmesg */			\
-	  { .lk_un_sleep = { 0, 0, (prio), (timo), NULL } },		\
+	  { .lk_un_sleep = { NULL, (prio), (timo), NULL } },		\
 	  NULL,				/* lk_lock_file */		\
 	  NULL,				/* lk_unlock_file */		\
 	  0,				/* lk_lock_line */		\
@@ -322,7 +322,7 @@ int	lockmgr(volatile struct lock *, u_in
 #endif /* LOCKDEBUG */
 void	transferlockers(struct lock *, struct lock *);
 int	lockstatus(struct lock *);
-void	lockmgr_printinfo(volatile struct lock *);
+void	lockmgr_printinfo(volatile const struct lock *);
 
 #if defined(LOCKDEBUG)
 void	spinlock_switchcheck(void);
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.41
diff -u -p -r1.41 lwp.h
--- sys/lwp.h	30 Jul 2006 21:58:11 -0000	1.41
+++ sys/lwp.h	17 Nov 2006 17:21:58 -0000
@@ -118,6 +118,8 @@ extern struct lwp lwp0;			/* LWP for pro
 #define	L_SELECT	0x00000040 /* [*] Selecting; wakeup/waiting danger. */
 #define	L_SINTR		0x00000080 /* [*] Sleep is interruptible. */
 #define	L_SA		0x00000400 /* [*] Scheduler activations LWP */
+#define L_LOCK		0x00000800 /* Waiting for a lock. l_wchan
+				    * points to a valid struct lock. */
 #define	L_SA_UPCALL	0x00200000 /* SA upcall is pending */
 #define	L_SA_BLOCKING	0x00400000 /* Blocking in tsleep() */
 #define	L_DETACHED	0x00800000 /* Won't be waited for. */

--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="deadlock.diff"

Index: sys/syscall.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscall.h,v
retrieving revision 1.174
diff -u -p -r1.174 syscall.h
--- sys/syscall.h	1 Sep 2006 21:04:45 -0000	1.174
+++ sys/syscall.h	17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscall.h,v 1.174 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
 
 /*
  * System call numbers.
@@ -1100,6 +1100,12 @@
 /* syscall: "__fhstat40" ret: "int" args: "const void *" "size_t" "struct stat *" */
 #define	SYS___fhstat40	398
 
-#define	SYS_MAXSYSCALL	399
+/* syscall: "lock_1" ret: "void" args: */
+#define	SYS_lock_1	399
+
+/* syscall: "lock_2" ret: "void" args: */
+#define	SYS_lock_2	400
+
+#define	SYS_MAXSYSCALL	401
 #define	SYS_NSYSENT	512
 #endif /* _SYS_SYSCALL_H_ */
Index: sys/syscallargs.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscallargs.h,v
retrieving revision 1.156
diff -u -p -r1.156 syscallargs.h
--- sys/syscallargs.h	1 Sep 2006 21:04:45 -0000	1.156
+++ sys/syscallargs.h	17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscallargs.h,v 1.156 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
 
 /*
  * System call argument lists.
@@ -2445,4 +2445,8 @@ int	sys___fhstatvfs140(struct lwp *, voi
 
 int	sys___fhstat40(struct lwp *, void *, register_t *);
 
+int	sys_lock_1(struct lwp *, void *, register_t *);
+
+int	sys_lock_2(struct lwp *, void *, register_t *);
+
 #endif /* _SYS_SYSCALLARGS_H_ */
Index: kern/init_sysent.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_sysent.c,v
retrieving revision 1.181
diff -u -p -r1.181 init_sysent.c
--- kern/init_sysent.c	1 Sep 2006 21:04:45 -0000	1.181
+++ kern/init_sysent.c	17 Nov 2006 17:21:57 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: init_sysent.c,v 1.181 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
 
 /*
  * System call switch table.
@@ -8,7 +8,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_sysent.c,v 1.181 2006/09/01 21:04:45 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_nfsserver.h"
@@ -1049,9 +1049,9 @@ struct sysent sysent[] = {
 	{ 3, s(struct sys___fhstat40_args), 0,
 	    sys___fhstat40 },			/* 398 = __fhstat40 */
 	{ 0, 0, 0,
-	    sys_nosys },			/* 399 = filler */
+	    sys_lock_1 },			/* 399 = lock_1 */
 	{ 0, 0, 0,
-	    sys_nosys },			/* 400 = filler */
+	    sys_lock_2 },			/* 400 = lock_2 */
 	{ 0, 0, 0,
 	    sys_nosys },			/* 401 = filler */
 	{ 0, 0, 0,
Index: kern/sys_deadlock.c
===================================================================
RCS file: kern/sys_deadlock.c
diff -N kern/sys_deadlock.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ kern/sys_deadlock.c	17 Nov 2006 17:21:58 -0000
@@ -0,0 +1,37 @@
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/syscallargs.h>
+#include <sys/lock.h>
+
+
+static struct lock lock1 = LOCK_INITIALIZER(PRIBIO, "lock1", 0, 0);
+
+static struct lock lock2 = LOCK_INITIALIZER(PRIBIO, "lock2", 0, 0);
+
+int
+sys_lock_1(struct lwp *l, void *v, register_t *retval)
+{
+	lockmgr(&lock1, LK_EXCLUSIVE, NULL);
+
+	while(1) {
+		lockmgr(&lock2, LK_EXCLUSIVE, NULL);
+		yield();
+		lockmgr(&lock2, LK_RELEASE, NULL);
+		yield();
+	}
+	return 0;
+}
+
+int
+sys_lock_2(struct lwp *l, void *v, register_t *retval)
+{
+	lockmgr(&lock2, LK_EXCLUSIVE, NULL);
+
+	while(1) {
+		// lockmgr(&lock1, LK_EXCLUSIVE, NULL);
+		yield();
+		// lockmgr(&lock1, LK_RELEASE, NULL);
+		yield();
+	}
+	return 0;
+}
Index: kern/syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.c,v
retrieving revision 1.177
diff -u -p -r1.177 syscalls.c
--- kern/syscalls.c	1 Sep 2006 22:23:18 -0000	1.177
+++ kern/syscalls.c	17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscalls.c,v 1.177 2006/09/01 22:23:18 matt Exp $ */
+/* $NetBSD$ */
 
 /*
  * System call names.
@@ -8,7 +8,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.177 2006/09/01 22:23:18 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ktrace.h"
@@ -534,4 +534,6 @@ const char *const syscallnames[] = {
 	"__fhopen40",			/* 396 = __fhopen40 */
 	"__fhstatvfs140",			/* 397 = __fhstatvfs140 */
 	"__fhstat40",			/* 398 = __fhstat40 */
+	"lock_1",			/* 399 = lock_1 */
+	"lock_2",			/* 400 = lock_2 */
 };
Index: kern/syscalls.master
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.master,v
retrieving revision 1.159
diff -u -p -r1.159 syscalls.master
--- kern/syscalls.master	1 Sep 2006 20:58:18 -0000	1.159
+++ kern/syscalls.master	17 Nov 2006 17:21:58 -0000
@@ -797,3 +797,5 @@
 			    size_t fh_size, struct statvfs *buf, int flags); }
 398	STD		{ int sys___fhstat40(const void *fhp, \
 			    size_t fh_size, struct stat *sb); }
+399	STD		{ void sys_lock_1(void); }
+400	STD		{ void sys_lock_2(void); }

--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="deadlock.c"

#include <sys/syscall.h>
#include <unistd.h>

int main(void)
{
	if (fork()) {
		syscall(399);
	} else {
		syscall(400);
	}
	return 1;
}

--YZ5djTAD1cGYuMQK--