Subject: kern/24551: switching with held simple_lock in sys_pipe.c on i386/MP system
To: None <gnats-bugs@gnats.netbsd.org>
From: None <he@netbsd.org>
List: netbsd-bugs
Date: 02/24/2004 20:48:12
>Number: 24551
>Category: kern
>Synopsis: switching with held simple_lock in sys_pipe.c on i386/MP system
>Confidential: no
>Severity: serious
>Priority: high
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Feb 24 19:49:00 UTC 2004
>Closed-Date:
>Last-Modified:
>Originator: Havard Eidnes
>Release: NetBSD 1.6ZK Feb 24 2004 (i.e. current sources)
>Organization:
>Environment:
System: NetBSD splitter-pine.urc.uninett.no 1.6ZK NetBSD 1.6ZK (SPLITTER-PINE) #1: Tue Feb 24 18:58:03 CET 2004 he@splitter-pine.urc.uninett.no:/usr/obj/sys/arch/i386/compile/SPLITTER-PINE i386
Architecture: i386
Machine: i386
>Description:
A GENERIC.MP + LOCKDEBUG + DIAGNOSTIC kernel just paniced with
switching with held simple_lock 0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359
Console log from panic'ed system:
NetBSD/i386 (splitter-pine.urc.uninett.no) (console)
login: Feb 24 19:04:03 splitter-pine getty[537]: /dev/ttyE0: Device not configured
switching with held simple_lock 0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359
simple_lock_switchcheck(cda7a214,0,cdab5d9c,c0362585,c07816b4) at netbsd:simple_lock_switchcheck+0x15
mi_switch(cda7a214,0,c037096c,cda7a214,0) at netbsd:mi_switch+0x4e
ltsleep(c07e1f64,10,c06e7fab,64,0) at netbsd:ltsleep+0x47b
pipelock(c24b64cc,0,c24b6268,1,0) at netbsd:pipelock+0x7f
pipe_direct_write(cdb4f4b8,c24b64cc,cdab5ec4,c2224000,c24b64e8) at netbsd:pipe_direct_write+0x24e
pipe_write(cdb4f4b8,cdb4f4e0,cdab5ec4,c2523200,1) at netbsd:pipe_write+0x197
dofilewrite(cda7ec88,6,cdb4f4b8,8057000,2000) at netbsd:dofilewrite+0x85
sys_write(cda7a214,cdab5f64,cdab5f5c,4,0) at netbsd:sys_write+0x6f
syscall_plain(cdab5fa8,1f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
Stopped in pid 294.1 (scp) at netbsd:cpu_Debugger+0x4: leave
db{3}> machine cpu 0
using CPU 0
db{3}> trace
__cpu_simple_lock(c07816b4,ccb51a7c,c2224800,c0100f66,ccb284a4) at netbsd:__cpu_simple_lock+0x1d
_simple_lock(c07816b4,c07362c0,475,c2224800,c2224800) at netbsd:_simple_lock+0x74
schedclock(ccb284a4,ccb284a4,c0100f66,ccb51acc,ccb51acc) at netbsd:schedclock+0x58
hardclock(ccb51ae0,d,c07cbb80,ccb284a4,2562) at netbsd:hardclock+0x593
lapic_clockintr(0,0,10,30,400010) at netbsd:lapic_clockintr+0x48
Xintr_lapic_ltimer() at netbsd:Xintr_lapic_ltimer+0x76
kernel: page fault trap, code=0
Faulted in DDB; continuing...
db{3}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0x475
esi 0xc
ebp 0xccb51a34 usb_all_tasks+0xc353ddc
ebx 0xc07816b4 sched_lock
edx 0xc07816b4 sched_lock
ecx 0xc
eax 0x1
eip 0xc040d169 __cpu_simple_lock+0x1d
cs 0x8
eflags 0x246
esp 0xcc2bbfbc usb_all_tasks+0xbabe364
ss 0xcc2b0010 usb_all_tasks+0xbab23b8
netbsd:__cpu_simple_lock+0x1d: decl %eax
db{3}> machine cpu 1
using CPU 1
db{3}> trace
__cpu_simple_lock(c07816b4,cdaefc8c,cdb1be18,c03715e4,c07816b4) at netbsd:__cpu_simple_lock+0x1d
_simple_lock(c07816b4,c07362c0,475,c2222000,c2222000) at netbsd:_simple_lock+0x74
schedclock(cda7a8c8,c0784980,c0740d00,94,ffffffff) at netbsd:schedclock+0x58
hardclock(cdb1be94,403ba13f,bda42,0,0) at netbsd:hardclock+0x593
lapic_clockintr(0,0,c07e0010,cdb10030,c0370010) at netbsd:lapic_clockintr+0x48
Xintr_lapic_ltimer() at netbsd:Xintr_lapic_ltimer+0x76
--- interrupt ---
0:
db{3}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0x475
esi 0xc
ebp 0xcdb1bde8 usb_all_tasks+0xd31e190
ebx 0xc07816b4 sched_lock
edx 0xc07816b4 sched_lock
ecx 0xc
eax 0x1
eip 0xc040d169 __cpu_simple_lock+0x1d
cs 0x8
eflags 0x246
esp 0xcc29ffbc usb_all_tasks+0xbaa2364
ss 0xcc290010 usb_all_tasks+0xba923b8
netbsd:__cpu_simple_lock+0x1d: decl %eax
db{3}> machine cpu 2
using CPU 2
db{3}> trace
acquire(c07e0d20,cdb09d9c,400000,0,600) at netbsd:acquire+0x49
_spinlock_acquire_count(c07e0d20,1,c07362c0,3d2,c24b64e8) at netbsd:_spinlock_acquire_count+0x99
mi_switch(cda7a73c,0,330,0,c24b64cc) at netbsd:mi_switch+0x1a7
preempt(1,c040e640,cdb09e60,c041a683,c0784980) at netbsd:preempt+0x75
uiomove(cdbf3000,2000,cdb09ec4,0,0) at netbsd:uiomove+0x146
pipe_read(cdb4f788,cdb4f7b0,cdb09ec4,c2523200,1) at netbsd:pipe_read+0x1fd
dofileread(cda7e8f8,0,cdb4f788,bfbfd310,2000) at netbsd:dofileread+0x80
sys_read(cda7a73c,cdb09f64,cdb09f5c,3,0) at netbsd:sys_read+0x6c
syscall_plain(cdb09fa8,80a001f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
db{3}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0
esi 0x335f4fe
ebp 0xcdb09d6c usb_all_tasks+0xd30c114
ebx 0xc07e0d20 kernel_lock
edx 0x80000000
ecx 0xd
eax 0x480400
eip 0xc0360e1d acquire+0x49
cs 0x8
eflags 0x246
esp 0xcc2d7fbc usb_all_tasks+0xbada364
ss 0xcc2d0010 usb_all_tasks+0xbad23b8
netbsd:acquire+0x49: incl %esi
db{3}> machine cpu 3
using CPU 3
db{3}> trace
cpu_Debugger(c07816b4,c06e6fd6,cdab5d9c,c03713f9,d) at netbsd:cpu_Debugger+0x4
simple_lock_switchcheck(cda7a214,0,cdab5d9c,c0362585,c07816b4) at netbsd:simple_lock_switchcheck+0x15
mi_switch(cda7a214,0,c037096c,cda7a214,0) at netbsd:mi_switch+0x4e
ltsleep(c07e1f64,10,c06e7fab,64,0) at netbsd:ltsleep+0x47b
pipelock(c24b64cc,0,c24b6268,1,0) at netbsd:pipelock+0x7f
pipe_direct_write(cdb4f4b8,c24b64cc,cdab5ec4,c2224000,c24b64e8) at netbsd:pipe_direct_write+0x24e
pipe_write(cdb4f4b8,cdb4f4e0,cdab5ec4,c2523200,1) at netbsd:pipe_write+0x197
dofilewrite(cda7ec88,6,cdb4f4b8,8057000,2000) at netbsd:dofilewrite+0x85
sys_write(cda7a214,cdab5f64,cdab5f5c,4,0) at netbsd:sys_write+0x6f
syscall_plain(cdab5fa8,1f,1f,1f,bfbf001f) at netbsd:syscall_plain+0x17e
db{3}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0xcda7ec88 usb_all_tasks+0xd281030
esi 0
ebp 0xcdab5d3c usb_all_tasks+0xd2b80e4
ebx 0xc078c800 cpu_info_primary
edx 0xc06e5f25 copyright+0x10a45
ecx 0xc0821ec0 usb_all_tasks+0x24268
eax 0x1
eip 0xc04129c0 cpu_Debugger+0x4
cs 0x8
eflags 0x202
esp 0xcdab5d3c usb_all_tasks+0xd2b80e4
ss 0x10
netbsd:cpu_Debugger+0x4: leave
db{3}> call simple_lock_dump
all simple locks:
0xc24b64cc CPU 3 /usr/src/sys/kern/sys_pipe.c:359
0xc07816b4 CPU 3 /usr/src/sys/kern/kern_synch.c:421
0xcdac1804 CPU 0 /usr/src/sys/miscfs/genfs/genfs_vnops.c:1180
0xc078c800
db{3}> reboot 4
>How-To-Repeat:
I'm not sure what triggers this.
>Fix:
I don't know how to fix this, but I think it is clear that the
sequence of first doing simple_lock() and then doing tsleep()
is what triggers this check with LOCKDEBUG.
>Release-Note:
>Audit-Trail:
>Unformatted: