NetBSD-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Substantial COMPAT_LINUX changes in netbsd-5?



> | Loopback seems to work, but then again, we had problems running that.
> | So the hanging df seems to be COMPAT_LINUX + NFS related.
> |
> | How can we help to find the cause so it can be fixed?
>
> Make a LOCKDEBUG+DEBUG+DIAGNOSTIC kernel and see if you can get to ddb
> when you get stuck.

Managed to get that.  When the backup process got stuck, I typed
"df" out of old habit, and it also got stuck:

mail-server% ps axl | egrep tstile\|PID
 UID   PID  PPID  CPU PRI NI   VSZ   RSS WCHAN   STAT TTY       TIME COMMAND
   0 24173     1  195 117  0 17860  4876 tstile  D    ?      0:00.03 bpbkar -r 
   0 19422 26923    0 117  0  2824   796 tstile  D+   ttyp5  0:00.00 df 
mail-server% 

Broke into DDB on the console, and here's the two stack backtraces
for these two processes:

db{0}> trace/t 0t24173
trace: pid 24173 lid 1 at 0xd40b58ac
sleepq_block(0,0,c0ab4ae2,c0b27c04,d453f5b4,e9a88840,c150bac0,e2f6dcb0,75,40) 
at netbsd:sleepq_block+0xeb
turnstile_block(0,1,d453f5b4,c0b27c04,2,2,d40b595c,c0534d58,d453f5cc,2) at 
netbsd:turnstile_block+0x277
rw_vector_enter(d453f5b4,1,4,c5137918,2,d453f514,d40b59ec,c0542e7d,d453f5b4,2) 
at netbsd:rw_vector_enter+0x2a1
vlockmgr(d453f5b4,2,d40b59fc,d453f514,d453f514,d453f514,d40b5a1c,c053cd98,d40b5a0c,d453f5cc)
 at netbsd:vlockmgr+0x126
genfs_lock(d40b5a0c,d453f5cc,d453f514,c04b4fea,8,c0534d75,c089d3c0,d453f514,2,20002)
 at netbsd:genfs_lock+0x3d
VOP_LOCK(d453f514,2,d45bd000,d453f514,d453f514,30002,d40b5a5c,c052cdc8,d453f514,30002)
 at netbsd:VOP_LOCK+0x38
vn_lock(d453f514,30002,d40b5a7c,c053cd3f,d453f514,d45bd000,d40b5a7c,c03d3eb1,d453f514,20002)
 at netbsd:vn_lock+0xd8
vget(d453f514,20002,c089d400,d45bd000,d45bd000,d45bd000,d40b5a9c,c05293f5,d45bd000,d40b5ae8)
 at netbsd:vget+0x128
nfs_root(d45bd000,d40b5ae8,c089d400,0,0,d45bd000,d40b5afc,c0526b97,d45bd000,d40b5ae8)
 at netbsd:nfs_root+0x21
VFS_ROOT(d45bd000,d40b5ae8,d40b5bd8,c053cdac,1,0,0,18,20,0) at 
netbsd:VFS_ROOT+0x25
lookup(d40b5bb0,20002,400,d40b5bcc,200,c5854068,d40b5b3c,c5137918,d40b5bcc,0) 
at netbsd:lookup+0x297
namei(d40b5bb0,0,ce3a4f80,c04b4fea,8,c051337d,0,bfbfa23b,0,0) at 
netbsd:namei+0x145
do_sys_stat(bfbfa23b,0,d40b5c1c,0,ffffffff,817b,0,16,54,bfbf9080) at 
netbsd:do_sys_stat+0x37
linux_do_stat64(0,d40b5d10,d40b5d28,c0739562,0,d4994340,c0b4b050,bfbfa23b,bfbf81d0,bb028ff4)
 at netbsd:linux_do_stat64+0x2f
linux_syscall(d40b5d48,2b,2b,2b,2b,bb02a360,823f898,bfbf8198,bfbfa23b,bb028ff4) 
at netbsd:linux_syscall+0x9b
db{0}>
db{0}>
db{0}> trace/t 0t19422
trace: pid 19422 lid 1 at 0xd47a295c
sleepq_block(0,0,c0ab4ae2,c0b27c04,d453f5b4,d4419ae0,c150bac0,e2f6dcb0,75,40) 
at netbsd:sleepq_block+0xeb
turnstile_block(e2f6dc98,1,d453f5b4,c0b27c04,2,2,d47a2a0c,c0534d58,d453f5cc,2) 
at netbsd:turnstile_block+0x277
rw_vector_enter(d453f5b4,1,4,c5137918,2,d453f514,d47a2a9c,c0542e7d,d453f5b4,2) 
at netbsd:rw_vector_enter+0x2a1
vlockmgr(d453f5b4,2,d47a2aac,d453f514,d453f514,d453f514,d47a2acc,c053cd98,d47a2abc,d453f5cc)
 at netbsd:vlockmgr+0x126
genfs_lock(d47a2abc,d453f5cc,d453f514,c04b4fea,8,c0534d75,c089d3c0,d453f514,2,20002)
 at netbsd:genfs_lock+0x3d
VOP_LOCK(d453f514,2,d45bd000,d453f514,d453f514,30002,d47a2b0c,c052cdc8,d453f514,30002)
 at netbsd:VOP_LOCK+0x38
vn_lock(d453f514,30002,d47a2b2c,c053cd3f,d453f514,d45bd000,d47a2b2c,c03d3eb1,d453f514,20002)
 at netbsd:vn_lock+0xd8
vget(d453f514,20002,c089d400,d45bd000,d45bd000,d45bd000,d47a2b4c,c05293f5,d45bd000,d47a2b98)
 at netbsd:vget+0x128
nfs_root(d45bd000,d47a2b98,c089d400,0,0,d45bd000,d47a2bac,c0526b97,d45bd000,d47a2b98)
 at netbsd:nfs_root+0x21
VFS_ROOT(d45bd000,d47a2b98,d47a2c88,c053cdc0,d47a2b7c,d4419ae0,0,18,20,0) at 
netbsd:VFS_ROOT+0x25
lookup(d47a2c60,20002,400,d47a2c7c,c04ecd0f,ffffffff,0,c626c2c0,d47a2c7c,c50d0878)
 at netbsd:lookup+0x297
namei(d47a2c60,c50d0800,d47a2c8c,c04ed2ae,c50d0800,8b5,4,bb907070,0,0) at 
netbsd:namei+0x145
do_sys_pstatvfs(d4419ae0,bb907070,1,de74a3c8,c01007b0,0,0,d4419ae0,0,c0b25e7c) 
at netbsd:do_sys_pstatvfs+0x35
sys_statvfs1(d4419ae0,d47a2d00,d47a2d28,bbb6f000,e4f3cc84,e4f3cc84,1,bb907070,bb906fbc,1)
 at netbsd:sys_statvfs1+0x45
syscall(d47a2d48,b3,ab,1f,1f,bb906fbc,bfbfe348,bfbfe308,bbbd5ab8,bb906c16) at 
netbsd:syscall+0xc8
db{0}>
db{0}>
db{0}>


I also did a backtrace of the other bpbkar processes which in "ps axl"
output had these wait channels:

   0  5177     1  302 117  0 17860  4876 nfsrcv  D    ?      0:00.03 bpbkar -r 
   0  7179     1  915  85  0 18048  7260 uvn_fp2 D    ?      2:52.22 bpbkar -r 

db{0}> trace/t 0t7179
trace: pid 7179 lid 1 at 0xdb7ae3cc
sleepq_block(0,0,c0aaba51,c0b27c80,0,c150add8,62,c3ede230,de64667c,0) at 
netbsd:sleepq_block+0xeb
mtsleep(c3ede230,204,c0aaba51,0,de64667c,de64667c,10,6,0,0) at 
netbsd:mtsleep+0x12d
uvn_findpage(db7ae5ac,0,db7ae4ac,c05343fa,0,0,2,0,994000,db7ae5cc) at 
netbsd:uvn_findpage+0x92
uvn_findpages(de64667c,97c40000,3,db7ae5ec,db7ae5ac,0,994000,20,2,0) at 
netbsd:uvn_findpages+0x73
genfs_getpages(db7ae6b0,0,0,0,0,97cb0000,0,0,2,db7ae65c) at 
netbsd:genfs_getpages+0x743
nfs_getpages(db7ae6b0,4,97c42000,3,0,10000,97cc0000,c089d600,de64667c,97c40000) 
at netbsd:nfs_getpages+0xbb
VOP_GETPAGES(de64667c,97c40000,3,db7ae750,db7ae7c8,0,1,0,1802,0) at 
netbsd:VOP_GETPAGES+0x65
uvn_get(de64667c,97c40000,3,db7ae750,db7ae7c8,0,1,0,1802,e41be780) at 
netbsd:uvn_get+0x117
ubc_fault(db7ae8e0,d3a75000,db7ae8a0,1,0,1,42,c085d206,cee38540,ce3a4d00) at 
netbsd:ubc_fault+0x170
uvm_fault_internal(c0bc21c0,d3a75000,1,0,c4ec6482,c0000,0,c05a6cfa,6,6) at 
netbsd:uvm_fault_internal+0x3a9
trap() at netbsd:trap+0x797
--- trap (number 6) ---
copyout(e390a0e4,d3a75000,8249400,2000,e390a0e4,0,d3a75000,97c40000,3,d3a75000) 
at netbsd:copyout+0x33
uiomove(d3a75000,2000,db7aec8c,db7aeadc,0,101,deaddead,0,1829b58,0) at 
netbsd:uiomove+0x62
ubc_uiomove(de64667c,db7aec8c,10000,0,101,eee4221c,db7aeb2c,c085d206,de615800,de64671c)
 at netbsd:ubc_uiomove+0xeb
nfs_bioread(de64667c,db7aec8c,0,ce3a6f00,0,de64667c,db7aec2c,c053d6f4,db7aec14,de64667c)
 at netbsd:nfs_bioread+0x312
nfs_read(db7aec14,de64667c,c089d3c0,de64667c,1,20001,db7aec2c,c0534d58,c089ce80,de64667c)
 at netbsd:nfs_read+0x43
VOP_READ(de64667c,db7aec8c,0,ce3a6f00,d4728580,0,7aec6c,16,10000,8249400) at 
netbsd:VOP_READ+0x44
vn_read(e4408600,e4408600,db7aec8c,ce3a6f00,1,0,0,0,e41be780,db7aed48) at 
netbsd:vn_read+0x93
dofileread(9,e4408600,8249400,10000,e4408600,1,db7aed28,db7aed48,db7aed48,e41be780)
 at netbsd:dofileread+0x75
sys_read(e41be780,db7aed10,db7aed28,7aed20,96,10,c0b4a744,9,8249400,10000) at 
netbsd:sys_read+0x6f
linux_syscall(db7aed48,2b,2b,2b,2b,610,8259300,bfbeec08,9,10000) at 
netbsd:linux_syscall+0x9b
db{0}>
db{0}>
db{0}>
db{0}> trace/t 0t5177
trace: pid 5177 lid 1 at 0xd450b8bc
sleepq_block(0,0,c0aa0195,c0b26610,c0bc3ac2,0,0,dbd1a5bc,d45bdb04,d45bd914) at 
netbsd:sleepq_block+0xeb
cv_timedwait(d45bd91c,d45bd914,0,c04df59e,d45bd91c,0,50b94c,0,c588e3c8,dbd1a5bc)
 at netbsd:cv_timedwait+0x12b
nfs_rcvlock(0,8280,1,1,5,0,0,0,c6444c00,20) at netbsd:nfs_rcvlock+0xad
nfs_request(d4729990,c6444c00,1,e9a88840,ce3a6f00,d450bab4,d450bab0,d450bab8,0,0)
 at netbsd:nfs_request+0x59d
nfs_getattr(d450bae8,d6652c00,4,c052cc65,d453fad4,c089ce00,d453f514,d450bb14,ce3a6f00,d450bc1c)
 at netbsd:nfs_getattr+0x140
VOP_GETATTR(d453f514,d450bb14,ce3a6f00,d450bbcc,200,c5854068,d450bb3c,c5137918,d450bbcc,0)
 at netbsd:VOP_GETATTR+0x3e
vn_stat(d453f514,d450bc1c,ce3a4f80,c04b4fea,8,c051337d,0,bfbfa22b,0,0) at 
netbsd:vn_stat+0x26
do_sys_stat(bfbfa22b,0,d450bc1c,0,ffffffff,817b,0,16,54,bfbf9070) at 
netbsd:do_sys_stat+0x4f
linux_do_stat64(0,d450bd10,d450bd28,c0739562,0,e9a88840,c0b4b050,bfbfa22b,bfbf81c0,bb028ff4)
 at netbsd:linux_do_stat64+0x2f
linux_syscall(d450bd48,2b,2b,2b,2b,bb02a360,823f828,bfbf8188,bfbfa22b,bb028ff4) 
at netbsd:linux_syscall+0x9b
db{0}>
db{0}>
db{0}>

Hope this helps in narrowing down the problem.

If not, please instruct what more information to collect.

Oh, yes, BTW, when I did a "reboot 4", it panic()ed with a LOCKDEBUG
panic:

db{0}> reboot 4
Mutex error: lockdebug_wantlock: acquiring sleep lock from interrupt context

lock address : 0x00000000c150bc0c type     :     sleep/adaptive
initialized  : 0x00000000c04d8066
shared holds :                  0 exclusive:                  0
shares wanted:                  0 exclusive:                  0
current cpu  :                  0 last held:                  2
current lwp  : 0x00000000ce3a7c80 last held: 000000000000000000
last locked  : 0x00000000c04d7f07 unlocked : 0x00000000c04d7f87
owner field  : 000000000000000000 wait/spin:                0/0

Turnstile chain at 0xc150b420.
=> No active turnstile for this lock.

panic: LOCKDEBUG
Begin traceback...
copyright(cee36d83,0,0,c0b27974,cee36d80,cee36d18,c150bc0c,c0b273d0,0,c04d7f07) 
at 0xc0ab6ce8
End traceback...

dumping to dev 4,1 offset 9971839
dump 332 331 330 329 328 327 326 325 324 323 322 321 320 319 318 317 316 
...

but at least the reboot succeeded after dumping core.

Regards,

- Havard


Home | Main Index | Thread Index | Old Index