Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Problems updating to new libc on pmax



2010/5/9 Erik Bertelsen <bertelsen.erik%gmail.com@localhost>:
> 2010/4/28 Eric Haszlakiewicz <erh%nimenees.com@localhost>:
>> On Tue, Apr 27, 2010 at 09:26:49PM +0200, Erik Bertelsen wrote:
>>> 2010/4/27 Matthias Drochner <M.Drochner%fz-juelich.de@localhost>:
>>> > bertelsen.erik%gmail.com@localhost said:
>>> >> [1] ? Segmentation fault (core dumped) cat >&2 <<...
>>> >
>>> > can you make sense of the core dumps?
>>>
>
> Hello again,
>
> I've done some further experiments in order to try to pin-point the
> problem that I have with recent libc on pmax.
>
> I tried the following patch as a poor man's debugger:
>
>
> ===================================================================
> RCS file: /cvsroot/src/lib/libc/stdlib/jemalloc.c,v
> retrieving revision 1.21
> diff -c -r1.21 jemalloc.c
> *** jemalloc.c  4 Mar 2010 22:48:31 -0000       1.21
> --- jemalloc.c  9 May 2010 07:35:02 -0000
> ***************
> *** 3574,3581 ****
> --- 3574,3584 ----
>        malloc_mutex_init(&chunks_mtx);
>        RB_INIT(&huge);
>  #ifdef USE_BRK
> +                       readlink("/etc/brk.conf", buf, sizeof(buf) - 1);
>        malloc_mutex_init(&brk_mtx);
> +                       readlink("/etc/brk1.conf", buf, sizeof(buf) - 1);
>        brk_base = sbrk(0);
> +                       readlink("/etc/brk2.conf", buf, sizeof(buf) - 1);
>        brk_prev = brk_base;
>        brk_max = brk_base;
>  #endif
>
>
> With this patch installed, the complete ktruss output of a plain ls command 
> is:
>
> # /rescue/mv /tmp/libc.so.12.172 .
> # ls
> Memory fault (core dumped)
> # ktruss ls
>  8077      1 ktruss   fktrace                     = 0, 2112355200
>  8077      1 ktruss   emul(netbsd)
>  8077      1 ktruss   fcntl(0x4, 0x3, 0)          = 1, 2112355200
>  8077      1 ktruss   fcntl(0x4, 0x4, 0x1)        = 0, 2112355200
>  8077      1 ls       execve("/bin/ls", 0x7fffdd08, 0x7fffdd10) JUSTRETURN
>  8077      1 ls       emul(netbsd)
>  8077      1 ls       mmap(0, 0x8000, 0x3, 0x1002, 0xffffffff, 0, 0,
> 0) = 0x7dff7000
>  8077      1 ls       open("/etc/ld.so.conf", 0, 0x7dff0cc0) Err#2 ENOENT
>  8077      1 ls       open("/lib/libutil.so.7", 0, 0) = 3, 2147472849
>  8077      1 ls       __fstat50(0x3, 0x7fffd500)  = 0, 2147472849
>  8077      1 ls       mmap(0, 0x1000, 0x1, 0x1, 0x3, 0, 0, 0) = 0x7dff6000
>  8077      1 ls       munmap(0x7dff6000, 0x1000)  = 0
>  8077      1 ls       mmap(0, 0x28000, 0x5, 0x10000002, 0x3, 0, 0, 0)
> = 0x7dfb0000
>  8077      1 ls       mmap(0x7dfd5000, 0x2000, 0x3, 0x12, 0x3, 0,
> 0x15000, 0) = 0x7dfd5000
>  8077      1 ls       mmap(0x7dfd7000, 0x1000, 0x3, 0x1012,
> 0xffffffff, 0, 0, 0) = 0x7dfd7000
>  8077      1 ls       mprotect(0x7dfc5000, 0x10000, 0) = 0, -4096
>  8077      1 ls       close(0x3)                  = 0
>  8077      1 ls       open("/lib/libc.so.12", 0, 0) = 3, 2147472849
>  8077      1 ls       __fstat50(0x3, 0x7fffd500)  = 0, 2147472849
>  8077      1 ls       mmap(0, 0x1000, 0x1, 0x1, 0x3, 0, 0, 0) = 0x7dff6000
>  8077      1 ls       munmap(0x7dff6000, 0x1000)  = 0
>  8077      1 ls       mmap(0, 0x14c000, 0x5, 0x10000002, 0x3, 0, 0,
> 0) = 0x7de60000
>  8077      1 ls       mmap(0x7df95000, 0x8000, 0x3, 0x12, 0x3, 0,
> 0x125000, 0) = 0x7df95000
>  8077      1 ls       mmap(0x7df9d000, 0xf000, 0x3, 0x1012,
> 0xffffffff, 0, 0, 0) = 0x7df9d000
>  8077      1 ls       mprotect(0x7df86000, 0xf000, 0) = 0, -4096
>  8077      1 ls       close(0x3)                  = 0
>  8077      1 ls       __sysctl(0x7fffdbec, 0x2, 0x7dfa9fd0,
> 0x7fffdbe8, 0, 0) = 0, 81
>  8077      1 ls       __sysctl(0x7fffdbf8, 0x2, 0x7fffdbf0,
> 0x7fffdbf4, 0, 0) = 0, 6
>  8077      1 ls       rasctl(0x7defeb90, 0x14, 0) = 0, -1
>  8077      1 ls       issetugid()                 = 0, 2113498716
>  8077      1 ls       __sysctl(0x7fffc640, 0x2, 0x7dfa48a0,
> 0x7fffc63c, 0, 0) = 0, 6
>  8077      1 ls       __sysctl(0x7fffc564, 0x2, 0x7dfab280,
> 0x7fffc560, 0, 0) = 0, 6
>  8077      1 ls       readlink("/etc/malloc.conf", 0x7fffc654, 0x400) = 1, 1
>  8077      1 ls       readlink("/etc/brk.conf", 0x7fffc654, 0x400) Err#2 
> ENOENT
>  8077      1 ls       readlink("/etc/brk1.conf", 0x7fffc654, 0x400)
> Err#2 ENOENT
>  8077      1 ls       break(0x4167d0)             = 0, 4286416
>  8077      1 ls       SIGSEGV SIG_DFL
> #
>
> This seems to indicate that it is the 'brk_base = sbrk(0);' statement
> that fails.
>
> Another observation: not all programs fail in this way, e.g. with the
> faulty libc install, the ftp command can still be used. Actually
> random tests indicate that (many or all?) applications in /bin fail
> like ls above, while (many or all?) applications in /usr/bin can still
> be used. I can't really believe that the /bin/ vs. /usr/bin/ path is
> significant, however...
>
> Another observation: reverting the latest update to jemalloc.c by
> reverting to version 1.20 does not make any difference.
>
> kind regards
> Erik
>

Based on a hint, I added options DEBUG and DIAGNOSTIC to my pmax
kernel. With this, I don't see any difference in the ktruss output,
but /var/log/messages received the lines shown below the ktruss
output:

# ktruss ls
    38      1 ktruss   fktrace                     = 0, 2112355200
    38      1 ktruss   emul(netbsd)
    38      1 ktruss   fcntl(0x4, 0x3, 0)          = 1, 2112355200
    38      1 ktruss   fcntl(0x4, 0x4, 0x1)        = 0, 2112355200
    38      1 ls       execve("/bin/ls", 0x7fffdd18, 0x7fffdd20) JUSTRETURN
    38      1 ls       emul(netbsd)
    38      1 ls       mmap(0, 0x8000, 0x3, 0x1002, 0xffffffff, 0, 0,
0) = 0x7dff7000
    38      1 ls       open("/etc/ld.so.conf", 0, 0x7dff0cc0) Err#2 ENOENT
    38      1 ls       open("/lib/libutil.so.7", 0, 0) = 3, 2147472857
    38      1 ls       __fstat50(0x3, 0x7fffd508)  = 0, 2147472857
    38      1 ls       mmap(0, 0x1000, 0x1, 0x1, 0x3, 0, 0, 0) = 0x7dff6000
    38      1 ls       munmap(0x7dff6000, 0x1000)  = 0
    38      1 ls       mmap(0, 0x28000, 0x5, 0x10000002, 0x3, 0, 0, 0)
= 0x7dfb0000
    38      1 ls       mmap(0x7dfd5000, 0x2000, 0x3, 0x12, 0x3, 0,
0x15000, 0) = 0x7dfd5000
    38      1 ls       mmap(0x7dfd7000, 0x1000, 0x3, 0x1012,
0xffffffff, 0, 0, 0) = 0x7dfd7000
    38      1 ls       mprotect(0x7dfc5000, 0x10000, 0) = 0, -4096
    38      1 ls       close(0x3)                  = 0
    38      1 ls       open("/lib/libc.so.12", 0, 0) = 3, 2147472857
    38      1 ls       __fstat50(0x3, 0x7fffd508)  = 0, 2147472857
    38      1 ls       mmap(0, 0x1000, 0x1, 0x1, 0x3, 0, 0, 0) = 0x7dff6000
    38      1 ls       munmap(0x7dff6000, 0x1000)  = 0
    38      1 ls       mmap(0, 0x14c000, 0x5, 0x10000002, 0x3, 0, 0,
0) = 0x7de60000
    38      1 ls       mmap(0x7df95000, 0x8000, 0x3, 0x12, 0x3, 0,
0x125000, 0) = 0x7df95000
    38      1 ls       mmap(0x7df9d000, 0xf000, 0x3, 0x1012,
0xffffffff, 0, 0, 0) = 0x7df9d000
    38      1 ls       mprotect(0x7df86000, 0xf000, 0) = 0, -4096
    38      1 ls       close(0x3)                  = 0
    38      1 ls       __sysctl(0x7fffdbf4, 0x2, 0x7dfa9fd0,
0x7fffdbf0, 0, 0) = 0, 81
    38      1 ls       __sysctl(0x7fffdc00, 0x2, 0x7fffdbf8,
0x7fffdbfc, 0, 0) = 0, 6
    38      1 ls       rasctl(0x7defeb90, 0x14, 0) = 0, -1
    38      1 ls       issetugid()                 = 0, 2113498716
    38      1 ls       __sysctl(0x7fffc648, 0x2, 0x7dfa48a0,
0x7fffc644, 0, 0) = 0, 6
    38      1 ls       __sysctl(0x7fffc56c, 0x2, 0x7dfab280,
0x7fffc568, 0, 0) = 0, 6
    38      1 ls       readlink("/etc/malloc.conf", 0x7fffc65c, 0x400) = 1, 1
    38      1 ls       readlink("/etc/brk.conf", 0x7fffc65c, 0x400) Err#2 ENOENT
    38      1 ls       readlink("/etc/brk1.conf", 0x7fffc65c, 0x400)
Err#2 ENOENT
    38      1 ls       break(0x4167d0)             = 0, 4286416
    38      1 ls       SIGSEGV SIG_DFL
#

May 10 19:52:53 sockdev /netbsd: trap: pid 38(ls): sig 11:
cause=0xb000000c epc=0x7df0f8a4 va=0x50
May 10 19:52:53 sockdev /netbsd: registers:
May 10 19:52:53 sockdev /netbsd: [ 0]=00000000 [ 1]=00000000 [
2]=004167d0 [ 3]=004167d0
May 10 19:52:53 sockdev /netbsd: [ 4]=004167d0 [ 5]=7fffc65c [
6]=00000400 [ 7]=00000000
May 10 19:52:53 sockdev /netbsd: [ 8]=00000050 [ 9]=00000064
[10]=7df9ae40 [11]=00000000
May 10 19:52:53 sockdev /netbsd: [12]=00000001 [13]=00000003
[14]=00000001 [15]=004029d8
May 10 19:52:53 sockdev /netbsd: [16]=7fffc65d [17]=7dfa0000
[18]=00000001 [19]=7fffc65c
May 10 19:52:53 sockdev /netbsd: [20]=7dfa0000 [21]=7dfa0000
[22]=7dfa0000 [23]=7dfa0000
May 10 19:52:53 sockdev /netbsd: [24]=00000043 [25]=7de7e3a0
[26]=00000000 [27]=00000000
May 10 19:52:53 sockdev /netbsd: [28]=7dfa2e30 [29]=7fffc620
[30]=7dfa0000 [31]=7df0d6ac
May 10 19:52:53 sockdev /netbsd: pid 38 (ls), uid 0: exited on signal
11 (core dumped)

I hope that this may help someone to get a better idea of what is going on...

best regards
- Erik


Home | Main Index | Thread Index | Old Index