Subject: kern/24551: switching with held simple_lock in sys_pipe.c on i386/MP system
To: None <gnats-bugs@gnats.netbsd.org>
From: None <he@netbsd.org>
List: netbsd-bugs
Date: 02/24/2004 20:48:12
>Number:         24551
>Category:       kern
>Synopsis:       switching with held simple_lock in sys_pipe.c on i386/MP system
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Feb 24 19:49:00 UTC 2004
>Closed-Date:
>Last-Modified:
>Originator:     Havard Eidnes
>Release:        NetBSD 1.6ZK Feb 24 2004 (i.e. current sources)
>Organization:
	
>Environment:
System: NetBSD splitter-pine.urc.uninett.no 1.6ZK NetBSD 1.6ZK (SPLITTER-PINE) #1: Tue Feb 24 18:58:03 CET 2004  he@splitter-pine.urc.uninett.no:/usr/obj/sys/arch/i386/compile/SPLITTER-PINE i386
Architecture: i386
Machine: i386
>Description:
	A GENERIC.MP + LOCKDEBUG + DIAGNOSTIC kernel just paniced with

switching with held simple_lock 0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359

	Console log from panic'ed system:

NetBSD/i386 (splitter-pine.urc.uninett.no) (console)

login: Feb 24 19:04:03 splitter-pine getty[537]: /dev/ttyE0: Device not configured

switching with held simple_lock 0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359
simple_lock_switchcheck(cda7a214,0,cdab5d9c,c0362585,c07816b4) at netbsd:simple_lock_switchcheck+0x15
mi_switch(cda7a214,0,c037096c,cda7a214,0) at netbsd:mi_switch+0x4e
ltsleep(c07e1f64,10,c06e7fab,64,0) at netbsd:ltsleep+0x47b
pipelock(c24b64cc,0,c24b6268,1,0) at netbsd:pipelock+0x7f
pipe_direct_write(cdb4f4b8,c24b64cc,cdab5ec4,c2224000,c24b64e8) at netbsd:pipe_direct_write+0x24e
pipe_write(cdb4f4b8,cdb4f4e0,cdab5ec4,c2523200,1) at netbsd:pipe_write+0x197
dofilewrite(cda7ec88,6,cdb4f4b8,8057000,2000) at netbsd:dofilewrite+0x85
sys_write(cda7a214,cdab5f64,cdab5f5c,4,0) at netbsd:sys_write+0x6f
syscall_plain(cdab5fa8,1f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
Stopped in pid 294.1 (scp) at   netbsd:cpu_Debugger+0x4:        leave
db{3}> machine cpu 0
using CPU 0
db{3}> trace
__cpu_simple_lock(c07816b4,ccb51a7c,c2224800,c0100f66,ccb284a4) at netbsd:__cpu_simple_lock+0x1d
_simple_lock(c07816b4,c07362c0,475,c2224800,c2224800) at netbsd:_simple_lock+0x74
schedclock(ccb284a4,ccb284a4,c0100f66,ccb51acc,ccb51acc) at netbsd:schedclock+0x58
hardclock(ccb51ae0,d,c07cbb80,ccb284a4,2562) at netbsd:hardclock+0x593
lapic_clockintr(0,0,10,30,400010) at netbsd:lapic_clockintr+0x48
Xintr_lapic_ltimer() at netbsd:Xintr_lapic_ltimer+0x76
kernel: page fault trap, code=0
Faulted in DDB; continuing...
db{3}> show reg
ds          0x10
es          0x10
fs          0x30
gs          0x10
edi         0x475
esi         0xc
ebp         0xccb51a34  usb_all_tasks+0xc353ddc
ebx         0xc07816b4  sched_lock
edx         0xc07816b4  sched_lock
ecx         0xc
eax         0x1
eip         0xc040d169  __cpu_simple_lock+0x1d
cs          0x8
eflags      0x246
esp         0xcc2bbfbc  usb_all_tasks+0xbabe364
ss          0xcc2b0010  usb_all_tasks+0xbab23b8
netbsd:__cpu_simple_lock+0x1d:  decl    %eax
db{3}> machine cpu 1
using CPU 1
db{3}> trace
__cpu_simple_lock(c07816b4,cdaefc8c,cdb1be18,c03715e4,c07816b4) at netbsd:__cpu_simple_lock+0x1d
_simple_lock(c07816b4,c07362c0,475,c2222000,c2222000) at netbsd:_simple_lock+0x74
schedclock(cda7a8c8,c0784980,c0740d00,94,ffffffff) at netbsd:schedclock+0x58
hardclock(cdb1be94,403ba13f,bda42,0,0) at netbsd:hardclock+0x593
lapic_clockintr(0,0,c07e0010,cdb10030,c0370010) at netbsd:lapic_clockintr+0x48
Xintr_lapic_ltimer() at netbsd:Xintr_lapic_ltimer+0x76
--- interrupt ---
0:
db{3}> show reg
ds          0x10
es          0x10
fs          0x30
gs          0x10
edi         0x475
esi         0xc
ebp         0xcdb1bde8  usb_all_tasks+0xd31e190
ebx         0xc07816b4  sched_lock
edx         0xc07816b4  sched_lock
ecx         0xc
eax         0x1
eip         0xc040d169  __cpu_simple_lock+0x1d
cs          0x8
eflags      0x246
esp         0xcc29ffbc  usb_all_tasks+0xbaa2364
ss          0xcc290010  usb_all_tasks+0xba923b8
netbsd:__cpu_simple_lock+0x1d:  decl    %eax
db{3}> machine cpu 2
using CPU 2
db{3}> trace
acquire(c07e0d20,cdb09d9c,400000,0,600) at netbsd:acquire+0x49
_spinlock_acquire_count(c07e0d20,1,c07362c0,3d2,c24b64e8) at netbsd:_spinlock_acquire_count+0x99
mi_switch(cda7a73c,0,330,0,c24b64cc) at netbsd:mi_switch+0x1a7
preempt(1,c040e640,cdb09e60,c041a683,c0784980) at netbsd:preempt+0x75
uiomove(cdbf3000,2000,cdb09ec4,0,0) at netbsd:uiomove+0x146
pipe_read(cdb4f788,cdb4f7b0,cdb09ec4,c2523200,1) at netbsd:pipe_read+0x1fd
dofileread(cda7e8f8,0,cdb4f788,bfbfd310,2000) at netbsd:dofileread+0x80
sys_read(cda7a73c,cdb09f64,cdb09f5c,3,0) at netbsd:sys_read+0x6c
syscall_plain(cdb09fa8,80a001f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
db{3}> show reg
ds          0x10
es          0x10
fs          0x30
gs          0x10
edi         0
esi         0x335f4fe
ebp         0xcdb09d6c  usb_all_tasks+0xd30c114
ebx         0xc07e0d20  kernel_lock
edx         0x80000000
ecx         0xd
eax         0x480400
eip         0xc0360e1d  acquire+0x49
cs          0x8
eflags      0x246
esp         0xcc2d7fbc  usb_all_tasks+0xbada364
ss          0xcc2d0010  usb_all_tasks+0xbad23b8
netbsd:acquire+0x49:    incl    %esi
db{3}> machine cpu 3
using CPU 3
db{3}> trace
cpu_Debugger(c07816b4,c06e6fd6,cdab5d9c,c03713f9,d) at netbsd:cpu_Debugger+0x4
simple_lock_switchcheck(cda7a214,0,cdab5d9c,c0362585,c07816b4) at netbsd:simple_lock_switchcheck+0x15
mi_switch(cda7a214,0,c037096c,cda7a214,0) at netbsd:mi_switch+0x4e
ltsleep(c07e1f64,10,c06e7fab,64,0) at netbsd:ltsleep+0x47b
pipelock(c24b64cc,0,c24b6268,1,0) at netbsd:pipelock+0x7f
pipe_direct_write(cdb4f4b8,c24b64cc,cdab5ec4,c2224000,c24b64e8) at netbsd:pipe_direct_write+0x24e
pipe_write(cdb4f4b8,cdb4f4e0,cdab5ec4,c2523200,1) at netbsd:pipe_write+0x197
dofilewrite(cda7ec88,6,cdb4f4b8,8057000,2000) at netbsd:dofilewrite+0x85
sys_write(cda7a214,cdab5f64,cdab5f5c,4,0) at netbsd:sys_write+0x6f
syscall_plain(cdab5fa8,1f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
db{3}> show reg
ds          0x10
es          0x10
fs          0x30
gs          0x10
edi         0xcda7ec88  usb_all_tasks+0xd281030
esi         0
ebp         0xcdab5d3c  usb_all_tasks+0xd2b80e4
ebx         0xc078c800  cpu_info_primary
edx         0xc06e5f25  copyright+0x10a45
ecx         0xc0821ec0  usb_all_tasks+0x24268
eax         0x1
eip         0xc04129c0  cpu_Debugger+0x4
cs          0x8
eflags      0x202
esp         0xcdab5d3c  usb_all_tasks+0xd2b80e4
ss          0x10
netbsd:cpu_Debugger+0x4:        leave
db{3}> call simple_lock_dump
all simple locks:
0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359
0xc07816b4 CPU 3 /usr/src/sys/kern/kern_synch.c:421
0xcdac1804 CPU 0 /usr/src/sys/miscfs/genfs/genfs_vnops.c:1180
0xc078c800
db{3}> reboot 4

>How-To-Repeat:
	I'm not sure what triggers this.

>Fix:
	I don't know how to fix this, but I think it is clear that the
	sequence of first doing simple_lock() and then doing tsleep()
	is what triggers this check with LOCKDEBUG.
>Release-Note:
>Audit-Trail:
>Unformatted: