Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Stability problems on current running on acorn32



For the last week I've been trying to get an up to date current kernel running on my acorn32 system which had been stably running 5.99.16 for the last year or so.

I had to fix a couple of problems in the device drivers to get the kernel to boot at all but once I got to that stage I kept seeing panics and hangs after about 5-10mins (DIAGNOSTIC disabled) or even before getting fully multi-user (DIAGNOSTIC enabled).

Here is a selection of the crashes from a non-DIAGNOSTIC kernel

uvmfault (f0302204, f0319000, 2) -> e
Fatal kernel mode data abort: 'Translation Fault (P)'
trapframe 0xf301fd28
FSR=185050f7, FAR f1319f70, spsr a0000013
r0 =f1179000, r1 =00000002, r2 =00000035 r3 =f1319f6c
r4 =0000007f  r5 =f2764c04, r6 =f117904a r7 =f1179048
r8 =00010011, r9 =ffdc0000, r10=0000001b r11=f301fe20
r12=f1319fc6, ssp=f301fd74, slr=f00b7b1c pc =f00b6b64

in_pcbstate+0x134:      str     r0,[r3, #004]

Call stack:
in_pcbconnect
udb_usrreq
soconnect
do_sys_connect
sys_connect
syscall
swi_handler

uvmfault (f030fb80, e59ff000, 1) -> e
Fatal kernal mode data abort: 'Translation Fault (S)'
trapframe: 0xf1ee3e0c
FSR =18361005, FAR =e59ff12c, spsr=80000013

r0 =0000006f, r1 =00006f00, r2 =0000008a, r3 =e59ff114
r4 =f132535c, r5 =f1425300, r6 =00000025, r7 =f0316c7c
r8 =f1325348, r9 =e59ff114, r10=0000001c, r11=f1ee3efc
r12=f0316c54, ssp=f1ee3e58, slr=00000001, pc =f0215c74

Call stack:
udp_input+0x274 ldr r3, [r9,#0x018]
ip_input
ipintr
sleepq_lendpri

uvm_fault(f0309844,0,2) ->
Data abort Permission fault P

pool_get@a8: str r2,[r1]

r0 =00000005, r1 =00000000, r2 =00000000, r3 =f1179fe0
r4 =f02ffb98, r5 =f02ffc0c, r6 =f11790e8, r7 =00000000
r8 =f02ffc10, r9 =00000000, r10=000000ff, r11=f295cde0
r12=f295cdbc, ssp=f295cdbc. slr=f00e88d8, pc =f01d4968

pool_get+0xa8
in_pcballoc+0x2c
rip_userreg+0x2dc
socreate+0x13c
fsocreate+0x9c
sys___socket30+0x3c
syscall+x094
swi_handler+0xbc
swi_entry+0x5c

And one from a diagnostic kernel:

uvm_fault(0xf27e36a0, deadd000, 1) -> e
Fatal kernel mode data abort: 'Tranlation Fault (S)'
trapframe: 0xf2c4bd58
FSR=18529005,FAR=deaddec5 SPSR=a0000013

r0 =f037e128, r1 =0901a8c0, r2 =deaddead, r3 =0101a8c0
r4 =00003500, r5 =f1179658, r6 =0000faff, r7 =00000000
r8 =8150020a, r9 =c0a80109, r10=f1179658, r11=f2c4bde0
r12=00000000, ssp=f2c4bda4, slr=f02448a0, pc =f00be848

in_pcblookup_connect+??
udp_input+0x4cc
ip_input+0x608
ipintr+0xc8
softint_overlay+0x3bc
lwp_userret+x0238
userret+0xc4
irq_entry+2b0

Should I be digging through the networking code looking for some sort of race condition exposed by the slowness of this platform or is there some machine dependent bit of code that this part of the networking stack relies on to work correctly that needs fixing up for acorn32?

One thing of note the last diagnostic run was taken with a video frame buffer driver that forces cache flushes for every write to the framebuffer. The system made it further with this change than without it in a DIAGNOSTIC kernel. It made no difference at all if DIAGNOSTIC was disabled.

I've attached the patches I've made to the MD code just in case I've done something stupid in one of those changes. I've also attached a dmesg taken before the system panics to give full details of the hardware in the system.

If there is any more information I can gather from the ddb prompt after one of these crashes to help track this down please let me know.

Mike
Index: sys/arch/arm/iomd/iomd_clock.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/iomd/iomd_clock.c,v
retrieving revision 1.25
diff -c -r1.25 iomd_clock.c
*** sys/arch/arm/iomd/iomd_clock.c      17 Jan 2009 17:06:18 -0000      1.25
--- sys/arch/arm/iomd/iomd_clock.c      27 Nov 2011 18:56:09 -0000
***************
*** 73,78 ****
--- 73,80 ----
  #define TIMER_FREQUENCY 2000000               /* 2MHz clock */
  #define TICKS_PER_MICROSECOND (TIMER_FREQUENCY / 1000000)
  
+ #define       IOMD_READ(a)     *(volatile uint8_t *)(IOMD_ADDRESS(a))
+ #define       IOMD_WRITE(a,x) *(volatile uint8_t *)(IOMD_ADDRESS(a)) = (x)
  static void *clockirq;
  static void *statclockirq;
  static struct clock_softc *clock_sc;
***************
*** 90,95 ****
--- 92,98 ----
  static volatile uint32_t timer0_lastcount;
  static volatile uint32_t timer0_offset;
  static volatile int timer0_ticked;
+ 
  /* TODO: Get IRQ status */
  
  static struct simplelock tmr_lock = SIMPLELOCK_INITIALIZER;  /* protect TC 
timer variables */
***************
*** 251,256 ****
--- 254,273 ----
  }
  #endif
  
+ 
+ static void 
+ initclock(void)
+ {
+       timer0_count = TIMER_FREQUENCY / hz;
+ 
+       IOMD_WRITE(IOMD_T0LOW, (timer0_count >> 0) & 0xff);
+       IOMD_WRITE(IOMD_T0HIGH, (timer0_count >> 8) & 0xff);
+ 
+       /* reload the counter */
+ 
+       IOMD_WRITE( IOMD_T0GO, 0);
+ }
+ 
  /*
   * void cpu_initclocks(void)
   *
***************
*** 269,286 ****
         */
  
        printf("clock: hz=%d stathz = %d profhz = %d\n", hz, stathz, profhz);
! 
!       timer0_count = TIMER_FREQUENCY / hz;
! 
!       bus_space_write_1(clock_sc->sc_iot, clock_sc->sc_ioh,
!           IOMD_T0LOW, (timer0_count >> 0) & 0xff);
!       bus_space_write_1(clock_sc->sc_iot, clock_sc->sc_ioh,
!           IOMD_T0HIGH, (timer0_count >> 8) & 0xff);
! 
!       /* reload the counter */
! 
!       bus_space_write_1(clock_sc->sc_iot, clock_sc->sc_ioh,
!           IOMD_T0GO, 0);
  
        clockirq = intr_claim(IRQ_TIMER0, IPL_CLOCK, "tmr0 hard clk",
            clockhandler, 0);
--- 286,293 ----
         */
  
        printf("clock: hz=%d stathz = %d profhz = %d\n", hz, stathz, profhz);
!       if (!timer0_count) 
!               initclock();
  
        clockirq = intr_claim(IRQ_TIMER0, IPL_CLOCK, "tmr0 hard clk",
            clockhandler, 0);
***************
*** 297,330 ****
                        panic("%s: Cannot installer timer 1 IRQ handler",
                            clock_sc->sc_dev.dv_xname);
        }
  #ifdef DIAGNOSTIC
        checkdelay();
  #endif
-       tc_init(&iomd_timecounter);
  }
  
  
  
  static u_int iomd_timecounter0_get(struct timecounter *tc)
  {
-       int s;
        u_int tm;
! 
        /*
         * Latch the current value of the timer and then read it.
         * This garentees an atmoic reading of the time.
         */
        s = splhigh();
        bus_space_write_1(clock_sc->sc_iot, clock_sc->sc_ioh,
            IOMD_T0LATCH, 0);
   
        tm = bus_space_read_1(clock_sc->sc_iot, clock_sc->sc_ioh,
!           IOMD_T0LOW);
        tm += (bus_space_read_1(clock_sc->sc_iot, clock_sc->sc_ioh,
            IOMD_T0HIGH) << 8);
        splx(s);
        simple_lock(&tmr_lock);
- 
        tm = timer0_count - tm;
        
  
--- 304,348 ----
                        panic("%s: Cannot installer timer 1 IRQ handler",
                            clock_sc->sc_dev.dv_xname);
        }
+       tc_init(&iomd_timecounter);
  #ifdef DIAGNOSTIC
        checkdelay();
  #endif
  }
  
  
  
  static u_int iomd_timecounter0_get(struct timecounter *tc)
  {
        u_int tm;
! #ifdef SPL_LOCKING
!       int s;
! #else
!       u_int oldirqstate;
! #endif
        /*
         * Latch the current value of the timer and then read it.
         * This garentees an atmoic reading of the time.
         */
+ #ifdef SPL_LOCKING
        s = splhigh();
+ #else
+       oldirqstate = disable_interrupts(I32_bit);
+ #endif
        bus_space_write_1(clock_sc->sc_iot, clock_sc->sc_ioh,
            IOMD_T0LATCH, 0);
   
        tm = bus_space_read_1(clock_sc->sc_iot, clock_sc->sc_ioh,
!                           IOMD_T0LOW);
        tm += (bus_space_read_1(clock_sc->sc_iot, clock_sc->sc_ioh,
            IOMD_T0HIGH) << 8);
+ 
+ #ifdef SPL_LOCKING
        splx(s);
+ #else
+       restore_interrupts(oldirqstate);
+ #endif
        simple_lock(&tmr_lock);
        tm = timer0_count - tm;
        
  
***************
*** 356,371 ****
  void
  delay(u_int n)
  {
!       volatile u_int n2;
!       volatile u_int i;
  
!       if (n == 0) return;
!       n2 = n;
!       while (n2-- > 0) {
!               if (cputype == CPU_ID_SA110)    /* XXX - Seriously gross hack */
!                       for (i = delaycount; --i;);
                else
!                       for (i = 8; --i;);
        }
  }
  
--- 374,436 ----
  void
  delay(u_int n)
  {
!       u_int last;
!       u_int cur;
!       u_int acc = 0;
! #ifdef SPL_LOCKING
!       int s;
! #else
!       u_int oldirqstate;
! #endif
!       if (!timer0_count) 
!               initclock();
!       /* Adjust the delay count to a tick count */
!       n *= TICKS_PER_MICROSECOND; 
  
!       /* Read an initial value from the timer */
! #ifdef SPL_LOCKING
!       s = splhigh();
! #else
!       oldirqstate = disable_interrupts(I32_bit);
! #endif
!       IOMD_WRITE(IOMD_T0LATCH, 0);
!       
!       last = IOMD_READ(IOMD_T0LOW);
!       last += (IOMD_READ(IOMD_T0HIGH) << 8);
! #ifdef SPL_LOCKING
!       splx(s);
! #else
!       restore_interrupts(oldirqstate);
! #endif
!       /* IOMD timer is countdown. So normally last > cur &
!        * last - cur is our delta. If the timer wraps while we
!        * are polling result will be cur <= last in this case we
!        * need timer0_count - cur + last for the interval as the interval 
!        * is from last down to 0 and then from timer0_count down to cur. 
!        */
!       while (acc < n) {
! #ifdef SPL_LOCKING
!               s = splhigh();
! #else
!               oldirqstate = disable_interrupts(I32_bit);
! #endif
!               IOMD_WRITE(IOMD_T0LATCH, 0);
!       
!               cur = IOMD_READ(IOMD_T0LOW);
!               cur += (IOMD_READ(IOMD_T0HIGH) << 8);
! 
! #ifdef SPL_LOCKING
!               splx(s);
! #else
!               restore_interrupts(oldirqstate);
! #endif
!               
!               if (cur > last)
!                       acc += timer0_count - cur + last;
                else
!                       acc += last - cur;
!               /* Update our last time */
!               last = cur;
        }
  }
  
Index: sys/arch/arm/iomd/vidcvideo.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/iomd/vidcvideo.c,v
retrieving revision 1.39
diff -c -r1.39 vidcvideo.c
*** sys/arch/arm/iomd/vidcvideo.c       1 Jul 2011 20:26:35 -0000       1.39
--- sys/arch/arm/iomd/vidcvideo.c       27 Nov 2011 19:14:39 -0000
***************
*** 264,273 ****
        struct rasops_info *ri = &scr->scr_ri;
        struct fb_devconfig *dc = cookie;
  
        if ((scr == &dc->dc_console) && (dc->dc_vd.active != NULL))
                return;
  
-       ri->ri_flg    = 0; /* RI_CENTER | RI_FULLCLEAR; */
        ri->ri_depth  = dc->dc_depth;
        ri->ri_bits   = (void *) dc->dc_videobase;
        ri->ri_width  = dc->dc_width;
--- 264,283 ----
        struct rasops_info *ri = &scr->scr_ri;
        struct fb_devconfig *dc = cookie;
  
+ #if 0
        if ((scr == &dc->dc_console) && (dc->dc_vd.active != NULL))
                return;
+ #endif
+       
+       if (scr == &dc->dc_console) {
+               if (ri->ri_flg == 0) {
+                       /* First time set RI_NO_AUTO */
+                       ri->ri_flg    |= RI_NO_AUTO;
+               } else {
+                       ri->ri_flg    &= ~RI_NO_AUTO;
+               }
+       }
  
        ri->ri_depth  = dc->dc_depth;
        ri->ri_bits   = (void *) dc->dc_videobase;
        ri->ri_width  = dc->dc_width;
Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
    2006, 2007, 2008, 2009, 2010, 2011
    The NetBSD Foundation, Inc.  All rights reserved.
Copyright (c) 1982, 1986, 1989, 1991, 1993
    The Regents of the University of California.  All rights reserved.

NetBSD 5.99.57 (GENERIC) #33: Sat Nov 26 16:13:30 GMT 2011
        
mpumford%trigati.mudcovered.org.uk@localhost:/work/netbsd/current/obj.acorn32/sys/arch/acorn32/compile/GENERIC
total memory = 65536 KB
avail memory = 60144 KB
timecounter: Timecounters tick every 10.000 msec
cprng kernel: WARNING insufficient entropy at creation.
mainbus0 (root)
cpu0 at mainbus0: SA-110 step K (SA-1 core)
cpu0: DC enabled IC enabled WB enabled EABT
cpu0: 16KB/32B 32-way Instruction cache
cpu0: 16KB/32B 32-way write-back Data cache
cpu0: SA-110 with bugged STM^ instruction
pioc0 at mainbus0 base 0xf6210000-0xf6212fff
pioc0: SMC FDC37C665GT peripheral controller rev 1
fdc0 at pioc0 offset 0x3f0-0x3f7 irq 12 drq 0x00002000
iomd_irq: 12, level 5
fd0 at fdc0 drive 0: 1.44MB 80 cyl, 2 head, 18 sec
com0 at pioc0 offset 0x3f8-0x3ff irq 10: ns16550a, working fifo
iomd_irq: 10, level 7
lpt0 at pioc0 offset 0x278-0x27b irq 0
iomd_irq: 0, level 5
iomd0 at mainbus0: IOMD20 version 2
iomd0: DRAM refresh=16us
clock0 at iomd0
iomdkbc0 at iomd0
iomd_irq: 15, level 5
pckbd0 at iomdkbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
iomdiic0 at iomd0
iic0 at iomdiic0: I2C bus
pcfrtc0 at iic0 addr 0x50: PCF8583 Real-time Clock/NVRAM
pcfrtc0:  32.768 kHz clock
qms0 at iomd0
wsmouse0 at qms0 mux 0
vidc0 at mainbus0: VIDC20
vidcvideo0 at vidc0: refclk=24MHz 2048KB VRAM : mode 1024x768x60, 8bpp
iomd_irq: 3, level 5
wsdisplay0 at vidcvideo0 kbdmux 1: console (std, vt100 emulation), using wskbd0
wsmux1: connecting to wsdisplay0
vidcaudio0 at vidc0: 16-bit external DAC
iomd_irq: 20, level 6
audio0 at vidcaudio0: half duplex, playback, capture
podulebus0 (root)
podule0  at podulebus0 : Acorn Computers : Acorn SCSI interface : CDFS & SCSI 
Expansion Card
podule1  at podulebus0 : Simtec Electronics : 16 bit IDE interface : Simtec 16 
bit IDE Interface
netslot0 at podulebus0 : I-Cubed : AEH62/78/99 (EtherLan 602) : Acorn Risc 
PC/A7000 interface M-) i-cubed ltd, EtherLan 600A (00:
sec0 at podulebus0 slot 0: WD33C93A (8.0 MHz clock, BURST DMA, SCSI ID 7)
scsibus0 at sec0: 8 targets, 8 luns per target
iomd_irq: 13, level 5
simide0 at podulebus0 slot 1: card/cable fault (d8) - (addr)
iomd_irq: 13, level 5
atabus0 at simide0 channel 0
iomd_irq: 13, level 5
atabus1 at simide0 channel 1
ne0 at podulebus0 [ netslot 0 ]: EtherLan 600A ethernet
ne0: Ethernet address 00:c0:32:00:9d:9c
ne0: 10base2, 10baseT, auto, default auto
ne0: NE2000 chipset, 31 Kb memory
iomd_irq: 11, level 5
timecounter: Timecounter "clockinterrupt" frequency 100 Hz quality 0
clock: hz=100 stathz = 0 profhz = 0
iomd_irq: 5, level 6
timecounter: Timecounter "iomd_timer0" frequency 2000000 Hz quality 100
Setting statclock to 0Hz (-1 ticks)
scsibus0: waiting 2 seconds for devices to settle...
wd0 at atabus0 drive 0
wd0: <SAMSUNG HD400LD>
wd0: drive supports 16-sector PIO transfers, LBA48 addressing
wd0: 372 GB, 775221 cyl, 16 head, 63 sec, 512 bytes/sect x 781422768 sectors
wd0: drive supports PIO mode 4, DMA mode 2, Ultra-DMA mode 5 (Ultra/100)
Kernelized RAIDframe activated
cprng sysctl: WARNING insufficient entropy at creation.
boot device: wd0
root on wd0a dumps on wd0b
root file system type: ffs
wsdisplay0: screen 1 added (std, vt100 emulation)
wsdisplay0: screen 2 added (std, vt100 emulation)
wsdisplay0: screen 3 added (std, vt100 emulation)
wsdisplay0: screen 4 added (std, vt100 emulation)


Home | Main Index | Thread Index | Old Index