Subject: Re: -current does not like non-boot disk
To: Christos Zoulas <christos@zoulas.com>
From: Hauke Fath <hauke@Espresso.Rhein-Neckar.DE>
List: port-mac68k
Date: 12/14/1998 19:56:00
At 8:50 Uhr +0100 14.12.1998, Christos Zoulas wrote:
>In article <l03102803b299e3fb6401@[192.168.1.4]>
>hauke@Espresso.Rhein-Neckar.DE (Hauke Fath) writes:

>># disklabel sd1
>>panic: fstat
>>Stopped in disklabel at _Debugger+0x6:  unlk    a6
>>db> t
>>_Debugger(128,7b4f88,7b4f44,20718,206a6) + 6
>>_panic(206a6,7a04d0,1a1ec,0,0) + 50
>>_sys___fstat13(7a04d0,7b4f88,7b4f80) + 66
>>_syscall(117) + 146
>>_trap0() + e
>
>The only panic in fstat it when the file is not a vnode or a socket...
>What the hell is it then?

"We wonders, aye, we wonders."

The code in question (from sys/kern/kern_descrip.c) has

[...]
        if ((u_int)fd >= fdp->fd_nfiles ||
            (fp = fdp->fd_ofiles[fd]) == NULL)
                return (EBADF);
        switch (fp->f_type) {

        case DTYPE_VNODE:
                error = vn_stat((struct vnode *)fp->f_data, &ub, p);
                break;

        case DTYPE_SOCKET:
                error = soo_stat((struct socket *)fp->f_data, &ub);
                break;

        default:
                panic("fstat");
                /*NOTREACHED*/
        }
[...]

which is disassembled

db> x /i _sys___fstat13
_sys___fstat13: linkw   a6,#-0x60
_sys___fstat13+0x4:     movl    a2,sp@-
_sys___fstat13+0x6:     moval   a6@(0x8),a1
_sys___fstat13+0xa:     moval   a6@(0xc),a2
_sys___fstat13+0xe:     movl    a2@,d0
_sys___fstat13+0x10:    moval   a1@(0x14),a0
_sys___fstat13+0x14:    cmpl    a0@(0x10),d0
_sys___fstat13+0x18:    bccb    <_sys___fstat13+0x24>   [addr:0x1eef4 ]
_sys___fstat13+0x1a:    moval   a0@,a0
_sys___fstat13+0x1c:    moval   a0@(0,d0:l:4),a0
_sys___fstat13+0x20:    tstl    a0
_sys___fstat13+0x22:    bneb    <_sys___fstat13+0x28>   [addr:0x1eef8 ]
_sys___fstat13+0x24:    movq    #0x9,d0
_sys___fstat13+0x26:    brab    <_sys___fstat13+0x82>   [addr:0x1ef52 ]
_sys___fstat13+0x28:    movw    a0@(0xa),d0
_sys___fstat13+0x2c:    cmpiw   #0x1,d0
_sys___fstat13+0x30:    beqb    <_sys___fstat13+0x3a>   [addr:0x1ef0a ]
_sys___fstat13+0x32:    cmpiw   #0x2,d0
_sys___fstat13+0x36:    beqb    <_sys___fstat13+0x50>   [addr:0x1ef20 ]
_sys___fstat13+0x38:    brab    <_sys___fstat13+0x62>   [addr:0x1ef32 ]
_sys___fstat13+0x3a:    movl    a1,sp@-
_sys___fstat13+0x3c:    pea     a6@(-0x60)
_sys___fstat13+0x40:    movl    a0@(0x20),sp@-
_sys___fstat13+0x44:    bsrl    _vn_stat        [addr:0x4870e ]
_sys___fstat13+0x4a:    addqw   #0x8,sp
_sys___fstat13+0x4c:    addqw   #0x4,sp
_sys___fstat13+0x4e:    brab    <_sys___fstat13+0x6c>   [addr:0x1ef3c ]
_sys___fstat13+0x50:    pea     a6@(-0x60)
_sys___fstat13+0x54:    movl    a0@(0x20),sp@-
_sys___fstat13+0x58:    bsrl    _soo_stat       [addr:0x308f6 ]
_sys___fstat13+0x5e:    addqw   #0x8,sp
_sys___fstat13+0x60:    brab    <_sys___fstat13+0x6c>   [addr:0x1ef3c ]
_sys___fstat13+0x62:    pea     pc@(-0x6a)      [disp:_sys_close+0x28 ]
_sys___fstat13+0x66:    bsrl    _panic  [addr:0x2d91a ]

and the register dump is

db> show registers
d0              0x2704  opaop_mask+0x684
d1                0x5c  VER_41+0x1b
d2               0x100  inex1_mask
d3               0x117  inx1a_mask+0xf
d4                   0
d5                 0x2  nmc_operr_bit
d6             0x15972  _db_putchar+0x86
d7                 0x2  nmc_operr_bit
a0             0x2d919  _tablefull+0x21
a1                   0
a2            0x779ee0  _end+0x651200
a3                 0x8  ainex_mask
a4                   0
a5             0xfbc38  _sysent+0x8b8
a6            0x779ec4  _end+0x6511e4
sp            0x779ec4  _end+0x6511e4
pc             0xbb9b8  _Debugger+0x6
sr              0x2000  operr_mask
_Debugger+0x6:  unlk    a6
db>

so "fp->f_type" is 0x2704 if I read things right. I get this panic in
single- user, btw.; when I access the disk after booting to multi-user the
box simply locks up. I have seen a panic from corrupted ffs data
structures, too; unfortunately I have not kept the stack trace from db.

Like others in this thread, I strongly suspect corruption from an interrupt
routine.

	hauke


--
"It's never straight up and down"     (DEVO)