Subject: lib/6470: libc perror.c crashes with bus error
To: None <gnats-bugs@gnats.netbsd.org>
From: None <Havard.Eidnes@runit.sintef.no>
List: netbsd-bugs
Date: 11/20/1998 16:52:55
>Number:         6470
>Category:       lib
>Synopsis:       libc perror.c crashes with bus error
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    lib-bug-people (Library Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Nov 20 08:05:00 1998
>Last-Modified:
>Originator:     Havard Eidnes
>Organization:
	RUNIT AS
>Release:        NetBSD-current 19981117 (snapshot source)
>Environment:
	
System: NetBSD klodrik.uninett.no 1.3H NetBSD 1.3H (KLODRIK) #0: Wed Nov 18 22:41:58 MET 1998     he@klodrik.uninett.no:/usr/src/sys/arch/i386/compile/KLODRIK i386

>Description:
	telnetting to the local host when it doesn't run an SMTP
	server causes telnet to exit with bus error instead of
	printing an error message.

	klodrik% telnet localhost smtp
	Trying 127.0.0.1...
	Bus error
	klodrik%

>How-To-Repeat:
	See above.

	Some preliminary debugging reveals that this is happening
	inside perror.c, probably in the stlen() call inside the if()
	test on line 80 of perror.c version 1.18.

	However, if I single-step through the preceding instructions,
	the error does not appear to occur (!).

	Putting a single breakpoint at the repnz instruction does
	reveal that %edi is -1, which appears to cause the subsequent
	bus error in the scansb instruction.

klodrik# gdb telnet
GDB is free software and you are welcome to distribute copies of it
 under certain conditions; type "show copying" to see the conditions.
There is absolutely no warranty for GDB; type "show warranty" for details.
GDB 4.16 (i386--netbsd), Copyright 1996 Free Software Foundation, Inc...
(gdb) run localhost smtp
Starting program: /usr/src/usr.bin/telnet/telnet localhost smtp
Trying 127.0.0.1...

Program received signal SIGBUS, Bus error.
0x400666c9 in perror ()
(gdb) where
#0  0x400666c9 in perror ()
#1  0x6a3a in tn (argc=0, argv=0xefbfda70) at commands.c:2340
#2  0x7d9d in main (argc=2, argv=0xefbfdac8) at main.c:313
(gdb) up
#1  0x6a3a in tn (argc=0, argv=0xefbfda70) at commands.c:2340
2340                perror("telnet: Unable to connect to remote host");
(gdb) down
#0  0x400666c9 in perror ()
(gdb) x/i perror
0x40066670 <perror>:    pushl  %ebp
(gdb) x/i
0x40066671 <perror+1>:  movl   %esp,%ebp
(gdb) 
0x40066673 <perror+3>:  subl   $0x124,%esp
(gdb) 
0x40066679 <perror+9>:  pushl  %edi
(gdb) 
0x4006667a <perror+10>: pushl  %esi
(gdb) 
0x4006667b <perror+11>: pushl  %ebx
(gdb) 
0x4006667c <perror+12>: call   0x40066681 <perror+17>
(gdb) 
0x40066681 <perror+17>: popl   %ebx
(gdb) 
0x40066682 <perror+18>: addl   $0x2a9df,%ebx
(gdb) 
0x40066688 <perror+24>: movl   0x8(%ebp),%edx
(gdb) 
0x4006668b <perror+27>: leal   0xffffffe0(%ebp),%esi
(gdb) 
0x4006668e <perror+30>: testl  %edx,%edx
(gdb) 
0x40066690 <perror+32>: je     0x400666bf <perror+79>
(gdb) 
0x40066692 <perror+34>: cmpb   $0x0,(%edx)
(gdb) 
0x40066695 <perror+37>: je     0x400666bf <perror+79>
(gdb) 
0x40066697 <perror+39>: movl   %edx,0xffffffe0(%ebp)
(gdb) 
0x4006669a <perror+42>: movl   %edx,%edi
(gdb) 
0x4006669c <perror+44>: xorl   %eax,%eax
(gdb) 
0x4006669e <perror+46>: cld    
(gdb) 
0x4006669f <perror+47>: movl   $0xffffffff,%ecx
(gdb) 
0x400666a4 <perror+52>: repnz scasb %es:(%edi),%al
(gdb) 
0x400666a6 <perror+54>: notl   %ecx
(gdb) 
0x400666a8 <perror+56>: decl   %ecx
(gdb) 
0x400666a9 <perror+57>: movl   %ecx,0x4(%esi)
(gdb) 
0x400666ac <perror+60>: leal   0xfffd5608(%ebx),%eax
(gdb) 
0x400666b2 <perror+66>: movl   %eax,0xffffffe8(%ebp)
(gdb) 
0x400666b5 <perror+69>: movl   $0x2,0xffffffec(%ebp)
(gdb) 
0x400666bc <perror+76>: leal   0xfffffff0(%ebp),%esi
(gdb) 
0x400666bf <perror+79>: movl   (%esi),%edi
(gdb) 
0x400666c1 <perror+81>: xorl   %eax,%eax
(gdb) 
0x400666c3 <perror+83>: cld    
(gdb) 
0x400666c4 <perror+84>: movl   $0xffffffff,%ecx
(gdb) 
0x400666c9 <perror+89>: repnz scasb %es:(%edi),%al
(gdb) 
0x400666cb <perror+91>: notl   %ecx
(gdb) 
0x400666cd <perror+93>: decl   %ecx
(gdb) 
0x400666ce <perror+94>: movl   %ecx,0x4(%esi)
(gdb) i reg
eax            0x0      0
ecx            0xffffffff       -1
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9f4       -272639500
edi            0xffffffff       -1
eip            0x400666c9       0x400666c9
eflags         0x10246  66118
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) 
(gdb) break *0x400666c4
Breakpoint 1 at 0x400666c4
(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/src/usr.bin/telnet/telnet localhost smtp
Trying 127.0.0.1...

Breakpoint 1, 0x400666c4 in perror ()
(gdb) i reg
eax            0x0      0
ecx            0x28     40
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9f4       -272639500
edi            0xffffffff       -1
eip            0x400666c4       0x400666c4
eflags         0x246    582
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/s 0x6304
0x6304 <ayt_status+424>:         "telnet: Unable to connect to remote host"
(gdb) ni
0x400666c9 in perror ()
(gdb) i reg
eax            0x0      0
ecx            0xffffffff       -1
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9f4       -272639500
edi            0xffffffff       -1
eip            0x400666c9       0x400666c9
eflags         0x346    838
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/s 0x6304
0x6304 <ayt_status+424>:         "telnet: Unable to connect to remote host"
(gdb) ni

Program received signal SIGBUS, Bus error.
0x400666c9 in perror ()
(gdb) i reg
eax            0x0      0
ecx            0xffffffff       -1
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9f4       -272639500
edi            0xffffffff       -1
eip            0x400666c9       0x400666c9
eflags         0x10346  66374
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) 
(gdb) b *0x4006669a
Breakpoint 2 at 0x4006669a
(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/src/usr.bin/telnet/telnet localhost smtp
Trying 127.0.0.1...

Breakpoint 2, 0x4006669a in perror ()
(gdb) i reg
eax            0x40094790       1074349968
ecx            0x40094824       1074350116
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9e4       -272639516
edi            0xefbfdac4       -272639292
eip            0x4006669a       0x4006669a
eflags         0x206    518
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/i 0x4006669a
0x4006669a <perror+42>: movl   %edx,%edi
(gdb) ni
0x4006669c in perror ()
(gdb) i reg
eax            0x40094790       1074349968
ecx            0x40094824       1074350116
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9e4       -272639516
edi            0x6304   25348
eip            0x4006669c       0x4006669c
eflags         0x306    774
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/i 0x4006669c
0x4006669c <perror+44>: xorl   %eax,%eax
(gdb) ni
0x4006669e in perror ()
(gdb) i reg
eax            0x0      0
ecx            0x40094824       1074350116
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9e4       -272639516
edi            0x6304   25348
eip            0x4006669e       0x4006669e
eflags         0x346    838
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/i 0x4006669e
0x4006669e <perror+46>: cld    
(gdb) ni
0x4006669f in perror ()
(gdb) i reg
eax            0x0      0
ecx            0x40094824       1074350116
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9e4       -272639516
edi            0x6304   25348
eip            0x4006669f       0x4006669f
eflags         0x346    838
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/i 0x4006669f
0x4006669f <perror+47>: movl   $0xffffffff,%ecx
(gdb) ni
0x400666a4 in perror ()
(gdb) i reg
eax            0x0      0
ecx            0xffffffff       -1
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9e4       -272639516
edi            0x6304   25348
eip            0x400666a4       0x400666a4
eflags         0x346    838
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) x/i 0x400666a4
0x400666a4 <perror+52>: repnz scasb %es:(%edi),%al
(gdb) ni
0x400666a4 in perror ()
(gdb) i b
Num Type           Disp Enb Address    What
1   breakpoint     keep y   0x400666c4  <perror+84>
2   breakpoint     keep y   0x4006669a  <perror+42>
        breakpoint already hit 1 time
(gdb) d 
Delete all breakpoints? (y or n) y
(gdb) b *0x400666c9
Breakpoint 3 at 0x400666c9
(gdb) i b
Num Type           Disp Enb Address    What
3   breakpoint     keep y   0x400666c9  <perror+89>
(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/src/usr.bin/telnet/telnet localhost smtp
Trying 127.0.0.1...

Breakpoint 3, 0x400666c9 in perror ()
(gdb) i reg
eax            0x0      0
ecx            0xffffffff       -1
edx            0x6304   25348
ebx            0x40091060       1074335840
esp            0xefbfd8d4       0xefbfd8d4
ebp            0xefbfda04       0xefbfda04
esi            0xefbfd9f4       -272639500
edi            0xffffffff       -1
eip            0x400666c9       0x400666c9
eflags         0x246    582
cs             0x17     23
ss             0x1f     31
ds             0x1f     31
es             0x1f     31
fs             0x1f     31
gs             0x1f     31
(gdb) 

	This is reproducible with a small test program:

klodrik# more t.c
#include <stdio.h>

extern int errno;

int
main(argc, argv)
        int argc;
        char *argv;
{
        errno=1;
        perror("test of perror");
        exit(0);
}
klodrik# cc -g -o t t.c
klodrik# ./t
Segmentation fault (core dumped)
klodrik#

	However, if perror.so or perror.o is extracted and statically
	linked, it "almost" works:

klodrik# ar x /usr/lib/libc_pic.a perror.so
klodrik# cc -g -c t.c
klodrik# cc -o t t.o perror.so
klodrik# ./t
test of perror: Operation not
klodrik# ar x /usr/lib/libc.a perror.o
klodrik# cc -o t t.o perror.o
klodrik# ./t
test of perror: Operation not
klodrik#


>Fix:
	Don't know.

	We have not totally ruled out a CPU bug, this is on a P-II
	333MHz.

	This is however the latest snapshot compiled with egcs and
	gas.new...
>Audit-Trail:
>Unformatted: