NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/38707: scheduler related deadlock during build.sh



>Number:         38707
>Category:       kern
>Synopsis:       scheduler related deadlock during build.sh
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue May 20 17:40:00 +0000 2008
>Originator:     Andrew Doran
>Release:        4.99.63
>Organization:
The NetBSD Project
>Environment:
n/a
>Description:
db{0}> mach cpu 0
using CPU 0
db{0}> bt
breakpoint(0,3f8,5,c045b2fd,c0acd520,c4883f0c,c0acd520,c4886012,c4887000,7fa) at
 netbsd:breakpoint+0x4
comintr(c4883e00,d0ede9ec,0,0,0,0,0,0,0,0) at netbsd:comintr+0x575
DDB lost frame for netbsd:Xintr_ioapic_edge7+0xa9, trying 0xcf417f74
Xintr_ioapic_edge7() at netbsd:Xintr_ioapic_edge7+0xa9
--- interrupt ---
--- switch to interrupt stack ---
x86_pause(ce2c4b40,d0e1aa20,d0edeacc,c0450af6,d05c97a0,0,d0edeaac,c0452936,c0acc
dc0,d05c97a0) at netbsd:x86_pause+0x2
sleepq_remove(c0accdc0,d05c97a0,0,d81623e8,c0accdc0,c,d0edeacc,c043323b,c0accdc0
,d81623e8) at netbsd:sleepq_remove+0xce
sleepq_wake(c0accdc0,d81623e8,1,d0e1aa20,d0e1aa20,d7a83c40,d0edeccc,c0439f6d,d81
623e8,bfbffff0) at netbsd:sleepq_wake+0x46
cv_broadcast(d81623e8,bfbffff0,10,d0edecb0,ce368000,d0e18a5c,d0edeafc,c03fcb02,c
28c6c68,c28c6c68) at netbsd:cv_broadcast+0x5b
execve1(d7a83c40,8063f0c,8063f8c,bb905204,c0439040,c0a180cc,d0eded3c,c05185d0,d7
a83c40,d0eded00) at netbsd:execve1+0xbbd
sys_execve(d7a83c40,d0eded00,d0eded28,8063f0c,8063f8c,bb905204,b8,d7a83c40,d0ede
da0,0) at netbsd:sys_execve+0x31
syscall(d0eded48,b3,ab,bfbf001f,806001f,8063f0c,8063f8c,bfbfe338,8063f0c,6b7f6bf
f) at netbsd:syscall+0xa0

db{0}> mach cpu 1
using CPU 1
db{0}> bt
mutex_vector_enter(ce2c4f40,a18108,d1eed068,d1eadda0,d1eadd48,c0a181b0,d1eadd3c,
c05185d0,d205d160,d1eadd00) at netbsd:mutex_vector_enter+0x28a
sys_getpid_with_ppid(d205d160,d1eadd00,d1eadd28,bfbfe4e0,2,bbbc8b40,bfbfe4e8,0,0
,bbbf4715) at netbsd:sys_getpid_with_ppid+0x2a
syscall(d1eadd48,b3,ab,1f,1f,bfbfe5b4,80632e0,bfbfe558,80630fc,8054e03) at netbs
d:syscall+0xa0

db{0}> mach cpu 2
using CPU 2
db{0}> bt
x86_pause(ce2c4f40,0,d623ecb8,c0475f89,33,d07ee540,d623ec8c,d78538bc,1,d0b7fe40)
 at netbsd:x86_pause+0x2
exit1(d1eef3a0,0,d623ecec,0,0,c0a180cc,d623ed3c,c05185d0,d1eef3a0,d623ed00) at n
etbsd:exit1+0x237
sys_exit(d1eef3a0,d623ed00,d623ed28,0,bbbd522c,bfbfe040,bbba8dff,ffffffff,0,0) a
t netbsd:sys_exit+0x4d
syscall(d623ed48,b3,ab,1f,1f,bfbfe094,8446640,bfbfe040,bbbd522c,844a2d0) at netb
sd:syscall+0xa0

db{0}> mach cpu 3
using CPU 3
db{0}> bt
x86_pause(d7a83c40,d0a5ecb8,d0a5ec7c,c0448b74,ffffffff,ffffffff,2,d04dd938,0,0) 
a
t netbsd:x86_pause
mutex_vector_enter(ce2c4f40,900000,d0ae3870,d0a5eda0,d0a5ed48,c0a181b0,d0a5ed3c,
c05185d0,d9768d00,d0a5ed00) at netbsd:mutex_vector_enter+0x28f
sys_getpid_with_ppid(d9768d00,d0a5ed00,d0a5ed28,0,100000,3,14001002,ffffffff,0,0
) at netbsd:sys_getpid_with_ppid+0x2a
syscall(d0a5ed48,b3,ab,1f,1f,bfbff6ee,bb901040,bfbfe328,bbbd522c,fefefeff) at ne
tbsd:syscall+0xa0


db{0}> mach cpu 4
using CPU 4
db{0}> bt
x86_pause(1000001,8,3,ba4e1000,d12138b8,0,d1b42bf0,0,0,0) at netbsd:x86_pause
mutex_spin_retry(ce2c4900,2c6dee,4e4bde96,1000002,c4838848,cf5220c0,c4838848,1,0
,cf701860) at netbsd:mutex_spin_retry+0x55
sched_nextlwp(cf701860,1,0,0,0,0,0,0,1000,c4835000) at netbsd:sched_nextlwp+0xa7
mi_switch(cf701860,cf7018c0,0,cf701860,cf701860,1,d1b42d3c,c0518973,d78800f8,ba4
e4000) at netbsd:mi_switch+0x31b
preempt(d78800f8,ba4e4000,2,0,33,d0b75800,bb9e7000,1000,d0b75800,1) at netbsd:pr
eempt+0x55
trap() at netbsd:trap+0xf3


db{0}> mach cpu 5
using CPU 5
db{0}> bt
mutex_vector_enter(ce2c4f40,0,d07b0cb8,c0475f89,33,d24fd640,d07b0c8c,d071c034,1,
d1ec1500) at netbsd:mutex_vector_enter+0x28f
exit1(d0636360,0,d07b0cec,0,0,c0a180cc,d07b0d3c,c05185d0,d0636360,d07b0d00) at n
etbsd:exit1+0x237
sys_exit(d0636360,d07b0d00,d07b0d28,0,bbbd522c,bfbfe1e8,bbba8dff,0,bb978000,34) 
a
t netbsd:sys_exit+0x4d
syscall(d07b0d48,b3,ab,1f,1f,1,1,bfbfe1e8,bbbd522c,80ca8d0) at netbsd:syscall+0x

db{0}> mach cpu 6
using CPU 6
db{0}> bt
x86_pause(d7a83c40,d07d5c38,d07d5c0c,c03f8e0a,d9bca238,0,4,c05027f5,0,0) at netb
sd:x86_pause
mutex_vector_enter(ce2c4f40,0,d07d5cb8,c0475f89,33,d03e3900,d07d5c8c,d1b90560,1,
d0718840) at netbsd:mutex_vector_enter+0x28f
exit1(d05df0c0,0,d07d5cec,0,0,c0a180cc,d07d5d3c,c05185d0,d05df0c0,d07d5d00) at n
etbsd:exit1+0x237
sys_exit(d05df0c0,d07d5d00,d07d5d28,0,bbbd522c,bfbfe03c,bbba8dff,ffffffff,0,0) a
t netbsd:sys_exit+0x4d
syscall(d07d5d48,bab400b3,bab400ab,bfbf001f,1f,bfbfe090,8446640,bfbfe03c,bbbd522
c,844a2d0) at netbsd:syscall+0xa0

db{0}> mach cpu 7
using CPU 7
db{0}> bt
x86_pause(ce2c4900,d1f4ac74,0,0,0,0,0,0,1000,c4838800) at netbsd:x86_pause+0x2
mi_switch(d1b92cc0,d1b92d20,0,d1b92cc0,d1b92cc0,1,d1f4ad3c,c0518973,d0dad63c,b9d
86000) at netbsd:mi_switch+0x23f
preempt(d0dad63c,b9d86000,2,0,33,d07ee180,bb708000,1000,d07ee180,1) at netbsd:pr
eempt+0x55
trap() at netbsd:trap+0xf3


>How-To-Repeat:
Be unlucky? I haven't seen this before.

>Fix:
Not known, but the spc_unlock() in sched_catchlwp() looks suspicious.
It may be unsafe to unlock curcpu there. I think it would be better
to bail out if the trylock fails.



Home | Main Index | Thread Index | Old Index