Subject: Re: SMP enabled
To: None <port-sparc@netbsd.org>
From: Hauke Fath <hauke@Espresso.Rhein-Neckar.DE>
List: port-sparc
Date: 01/08/2003 07:41:04
At 13:55 Uhr +0100 7.1.2003, Paul Kranenburg wrote:
>I've now enabled SMP operation in MULTIPROCESSOR kernels.
>
>As noted before, there are still a number of loose ends to tie up but
>it's now usable on at least two different platforms: various 2-processor
>viking supersparc machines and a 4-processor 4/670, i.e.:

With Paul's changes, I have managed to build a kernel with make -j3 on my
dual-SM71 ss10 just fine.

Then, I got adventurous and let the smp kernel (DIAGNOSTIC, DEBUG,
LOCKDEBUG)) run for the nightly amanda backup. It stayed up for about half
an hour into the backup, spitting messages like

Jan  8 02:38:05 pizza /netbsd: xcall(cpu0,0xf000862c): couldn't ping cpus: cpu1
Jan  8 02:38:18 pizza /netbsd: xcall(cpu1,0xf000862c): couldn't ping cpus: cpu0
Jan  8 02:38:18 pizza /netbsd: xcall(cpu1,0xf01ce2ec): couldn't ping cpus: cpu0
Jan  8 02:38:18 pizza /netbsd: xcall(cpu0,0xf000862c): couldn't ping cpus: cpu1

then broke into the debugger with something like


simple_lock: locking against myself
lock: 0xf023ecfc, currently at: /usr/src/sys/arch/sparc/sparc/pmap.c:689
on cpu 0
last locked: /usr/src/sys/arch/sparc/sparc/pmap.c:689
last unlocked: /usr/src/sys/arch/sparc/sparc/pmap.c:708
0x0(0xf0adf000, 0x1d, 0xf0ae7000, 0xf023e554, 0x0, 0xfffffffe) at
pmap_kremove4m+0x16c
pmap_kremove4m(0xf0249910, 0xf0adf000, 0xf0ae7000, 0xf0232424, 0x100,
0xf023e554) at uvm_unmap_remove+0x16c
uvm_unmap_remove(0xf0249910, 0xf0adf000, 0xf0ae7000, 0x408000e5, 0x33, 0x18060)
at uvm_unmap+0x110
uvm_unmap(0xf0249910, 0xf0adf000, 0x8000, 0xf028280c, 0xf0899910, 0xfff) at
uvm_km_free+0x14
uvm_km_free(0xf0adf000, 0x68, 0x0, 0xf573e000, 0xf08e1a50, 0x3d461400) at
free+0x118
free(0xf22abab0, 0xf00e8aa8, 0xf22abab0, 0x8000, 0xf22abab0, 0xf2284000) at
soft
dep_disk_write_complete+0x254
softdep_disk_write_complete(0xf22abab0, 0x500, 0x0, 0xf01bf3f0, 0xf01b1a94,
0xc3) at biodone+0x74
biodone(0x0, 0x22009, 0x22009, 0xe0000, 0xf08f9498, 0x8) at
scsipi_complete+0x4dc
scsipi_complete(0xf08f8f00, 0x5d73b, 0xf01b1a80, 0xf023e554, 0x0,
0xfffffffe) at
 scsipi_done+0x168
scsipi_done(0xf08e3e00, 0xf08f9498, 0x14, 0xf01b1a94, 0x100, 0xf023e554) at
ncr53c9x_done+0x1c8
ncr53c9x_done(0x3, 0xf0087020, 0x500, 0x408000e5, 0x33, 0x18060) at
ncr53c9x_intr+0x12bc
ncr53c9x_intr(0x404000e6, 0xf02110c8, 0x2ff, 0xf5fb7000, 0xf089cf90, 0x4000) at
sparc_interrupt44c+0x150
sparc_interrupt44c(0xf01ce2ec, 0x1, 0x0, 0x0, 0x0, 0xffffffff) at xcall+0x3a0
xcall(0x0, 0xf02ee688, 0x0, 0x182e99a, 0x0, 0xf028bab4) at updatepte4m+0x50
updatepte4m(0xf74a2000, 0x182e900, 0x1, 0xf06fdb80, 0x2, 0xf7250a60) at
pmap_ken
ter_pa4m+0xcc
pmap_kenter_pa4m(0x4000, 0xf727fe28, 0xf09e0300, 0xf0205400, 0xf023b800,
0xf023d
000) at sosend_loan+0x1e0
sosend_loan(0xf0aaef10, 0x0, 0xf727fe28, 0xf09e0300, 0x0, 0x0) at sosend+0x5c0
sosend(0xf718f158, 0xf718f180, 0xf727fe28, 0xf0ace700, 0x1, 0xf012edb0) at
soo_write+0x20
soo_write(0xf7250a60, 0x1, 0xf718f158, 0x4000, 0x4000, 0xf718f180) at
dofilewrite+0x8c
dofilewrite(0xf7250a60, 0xf727ff28, 0xf727ff20, 0xf012b274, 0xf727ff28,
0xf727ff20) at sys_write+0x70
sys_write(0x4, 0xf727ffb0, 0x0, 0x3991, 0x37e2c, 0xfffffffe) at syscall+0x1f4
syscall(0x1, 0x8d1d8, 0x4000, 0xf0002000, 0x3, 0xf06fdb80) at _syscall+0xcc
Stopped in pid 1677 (gzip) at   cpu_Debugger+0x8:       call
esigcode

db{0}> t
cpu_Debugger(0xf023ecfc, 0xf021ae18, 0x2b1, 0xdeadbeef, 0x0, 0xc3) at
_simple_lock+0x294
_simple_lock(0xf0adf000, 0xf02d3f7c, 0xffffffff, 0x0, 0x0, 0x2b) at
updatepte4m+0x24
updatepte4m(0xf0adf000, 0x1d, 0xf0ae7000, 0xf023e554, 0x0, 0xfffffffe) at
pmap_kremove4m+0x16c
pmap_kremove4m(0xf0249910, 0xf0adf000, 0xf0ae7000, 0xf0232424, 0x100,
0xf023e554) at uvm_unmap_remove+0x16c
uvm_unmap_remove(0xf0249910, 0xf0adf000, 0xf0ae7000, 0x408000e5, 0x33, 0x18060)
at uvm_unmap+0x110
uvm_unmap(0xf0249910, 0xf0adf000, 0x8000, 0xf028280c, 0xf0899910, 0xfff) at
uvm_km_free+0x14
uvm_km_free(0xf0adf000, 0x68, 0x0, 0xf573e000, 0xf08e1a50, 0x3d461400) at
free+0x118
free(0xf22abab0, 0xf00e8aa8, 0xf22abab0, 0x8000, 0xf22abab0, 0xf2284000) at
softdep_disk_write_complete+0x254
softdep_disk_write_complete(0xf22abab0, 0x500, 0x0, 0xf01bf3f0, 0xf01b1a94,
0xc3) at biodone+0x74
biodone(0x0, 0x22009, 0x22009, 0xe0000, 0xf08f9498, 0x8) at
scsipi_complete+0x4dc
scsipi_complete(0xf08f8f00, 0x5d73b, 0xf01b1a80, 0xf023e554, 0x0,
0xfffffffe) at
 scsipi_done+0x168
scsipi_done(0xf08e3e00, 0xf08f9498, 0x14, 0xf01b1a94, 0x100, 0xf023e554) at
ncr53c9x_done+0x1c8
ncr53c9x_done(0x3, 0xf0087020, 0x500, 0x408000e5, 0x33, 0x18060) at
ncr53c9x_intr+0x12bc
ncr53c9x_intr(0x404000e6, 0xf02110c8, 0x2ff, 0xf5fb7000, 0xf089cf90,
0x4000) at sparc_interrupt44c+0x150
sparc_interrupt44c(0xf01ce2ec, 0x1, 0x0, 0x0, 0x0, 0xffffffff) at xcall+0x3a0
xcall(0x0, 0xf02ee688, 0x0, 0x182e99a, 0x0, 0xf028bab4) at updatepte4m+0x50
updatepte4m(0xf74a2000, 0x182e900, 0x1, 0xf06fdb80, 0x2, 0xf7250a60) at
pmap_kenter_pa4m+0xcc
pmap_kenter_pa4m(0x4000, 0xf727fe28, 0xf09e0300, 0xf0205400, 0xf023b800,
0xf023d000) at sosend_loan+0x1e0
sosend_loan(0xf0aaef10, 0x0, 0xf727fe28, 0xf09e0300, 0x0, 0x0) at sosend+0x5c0
sosend(0xf718f158, 0xf718f180, 0xf727fe28, 0xf0ace700, 0x1, 0xf012edb0) at
soo_write+0x20
soo_write(0xf7250a60, 0x1, 0xf718f158, 0x4000, 0x4000, 0xf718f180) at
dofilewrite+0x8c
dofilewrite(0xf7250a60, 0xf727ff28, 0xf727ff20, 0xf012b274, 0xf727ff28,
0xf727ff20) at sys_write+0x70
sys_write(0x4, 0xf727ffb0, 0x0, 0x3991, 0x37e2c, 0xfffffffe) at syscall+0x1f4
syscall(0x1, 0x8d1d8, 0x4000, 0xf0002000, 0x3, 0xf06fdb80) at _syscall+0xcc
db{0}>


Typing 'c' a few times would get the box going again, but it returned to
the debugger a few minutes  later so I finally rebooted to a 1.6_STABLE
kernel.

Are these 'debugger' breaks really serious, or more of a diagnostics issue?

	hauke

--
/~\  The ASCII Ribbon Campaign
\ /    No HTML/RTF in email
 X     No Word docs in email
/ \  Respect for open standards