Subject: lib/2509: signed char problem of strto{d,l,q,ul,uq}()
To: None <gnats-bugs@NetBSD.ORG>
From: None <soda@sra.co.jp>
List: netbsd-bugs
Date: 06/04/1996 05:44:19
>Number:         2509
>Category:       lib
>Synopsis:       signed char problem of strto{d,l,q,ul,uq}()
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    lib-bug-people (Library Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Mon Jun  3 16:50:06 1996
>Last-Modified:
>Originator:     Noriyuki Soda
>Organization:
	Software Research Associates, Inc., Japan
	software tools and technology group
>Release:        1.2_ALPHA (May 30 1996)
>Environment:
System: NetBSD james 1.2_ALPHA NetBSD 1.2_ALPHA (PALM) #0: Sat Jun 1 05:27:33 JST 1996 soda@james:/usr/src/sys/arch/i386/compile/PALM i386

>Description:

	strtod(), strtol(), strtoq(), strtoul(), strtouq() may access
	index out of range on array "_ctype_[]", when argument string
	of these functions includes international character like
	ISO-8859-1 (Latin-1).
	This problem cause unexpected return value of these functions.

	This problem is caused by passing "signed char" argument to
	<ctype.h> functions like a isspace(). Because of C Language
	Standards section 7.3 says that argument value of functions
	defined in <ctype.h> should be "unsigned char" or "EOF", the
	codings of these strtoXX() functions are wrong.

	Possible fixes in this problem is following
		1. first check isascii(c), then isspace(c)
	or
		2. use "unsigned char" instead of "char"
	I choose 2, because
		- It doesn't depend on non-ANSI function isascii().
		- It is better in internationalization.
		  For example,
			isprint((unsigned char)c)
		  is better than
			isascii(c) && isprint(c)
		  where c == `\300' in ISO Latin-1 locale.
		- It may be faster.

	IMHO, strtoXX() functions are important enough to fix before
	NetBSD-1.2 release. Though there are many other sources which
	have same problem as this report, and they probably cannot be
	fixed before 1.2.

	BTW, Is there plan to add better locale support to NetBSD ?
	Or, can I volunteer ?

OTHER NOTE:
	strtol() and strtoul() manual page should be include the
	following description as C Language Standards says (and
	as strtod() manual page says).

	------------------------------------------------------------
	RETURN VALUES
	     If no conversion is performed, zero is returned....
	------------------------------------------------------------

>How-To-Repeat:

	% ./test-program-attached-below | grep fail | wc
	40

	in my environment. (but it may be vary)

------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

void
test(c)
	int c;
{
	static char *msg[] = { "fail", "pass" };
	char *p, s[2];

	s[0] = c; s[1] = '\0';
	printf("%d:\t%s\t%s\t%s\t%s\t%s\n", c,
	       msg[strtod(s, &p) == 0.0 && p == s],
	       msg[strtouq(s, &p, 0) == 0 && p == s],
	       msg[strtoq(s, &p, 0) == 0 && p == s],
	       msg[strtoul(s, &p, 0) == 0 && p == s],
	       msg[strtol(s, &p, 0) == 0 && p == s]);
}

main()
{
	int i;

	for (i = SCHAR_MIN; i < 0; i++)
		test(i);
}
------------------------------------------------------------------------

>Fix:

------------------------------------------------------------------------
diff -u /usr/src-org/lib/libc/stdlib/strtod.c ./strtod.c
--- /usr/src-org/lib/libc/stdlib/strtod.c	Sat Feb 17 21:25:14 1996
+++ ./strtod.c	Tue Jun  4 04:11:53 1996
@@ -1237,7 +1237,7 @@
 	rv = 0.;
 
 
-	for(s = s00; isspace(*s); s++)
+	for(s = s00; isspace(*(unsigned char *)s); s++)
 		;
 
 	if (*s == '-') {
diff -u /usr/src-org/lib/libc/stdlib/strtol.c ./strtol.c
--- /usr/src-org/lib/libc/stdlib/strtol.c	Thu Dec 28 21:17:08 1995
+++ ./strtol.c	Tue Jun  4 04:14:49 1996
@@ -59,7 +59,7 @@
 	char **endptr;
 	register int base;
 {
-	register const char *s;
+	register const unsigned char *s;
 	register long acc, cutoff;
 	register int c;
 	register int neg, any, cutlim;
@@ -69,7 +69,7 @@
 	 * If base is 0, allow 0x for hex and 0 for octal, else
 	 * assume decimal; if base is already 16, allow 0x.
 	 */
-	s = nptr;
+	s = (const unsigned char *)nptr;
 	do {
 		c = *s++;
 	} while (isspace(c));
@@ -151,6 +151,6 @@
 		}
 	}
 	if (endptr != 0)
-		*endptr = (char *) (any ? s - 1 : nptr);
+		*endptr = any ? (char *)s - 1 : (char *)nptr;
 	return (acc);
 }
diff -u /usr/src-org/lib/libc/stdlib/strtoq.c ./strtoq.c
--- /usr/src-org/lib/libc/stdlib/strtoq.c	Thu Dec 21 21:19:29 1995
+++ ./strtoq.c	Tue Jun  4 04:15:54 1996
@@ -54,7 +54,7 @@
 	char **endptr;
 	register int base;
 {
-	register const char *s;
+	register const unsigned char *s;
 	register quad_t acc, cutoff;
 	register int c;
 	register int neg, any, cutlim;
@@ -64,7 +64,7 @@
 	 * If base is 0, allow 0x for hex and 0 for octal, else
 	 * assume decimal; if base is already 16, allow 0x.
 	 */
-	s = nptr;
+	s = (const unsigned char *)nptr;
 	do {
 		c = *s++;
 	} while (isspace(c));
@@ -147,6 +147,6 @@
 		}
 	}
 	if (endptr != 0)
-		*endptr = (char *) (any ? s - 1 : nptr);
+		*endptr = any ? (char *)s - 1 : (char *)nptr;
 	return (acc);
 }
diff -u /usr/src-org/lib/libc/stdlib/strtoul.c ./strtoul.c
--- /usr/src-org/lib/libc/stdlib/strtoul.c	Thu Dec 28 21:17:09 1995
+++ ./strtoul.c	Tue Jun  4 04:16:37 1996
@@ -58,7 +58,7 @@
 	char **endptr;
 	register int base;
 {
-	register const char *s;
+	register const unsigned char *s;
 	register unsigned long acc, cutoff;
 	register int c;
 	register int neg, any, cutlim;
@@ -66,7 +66,7 @@
 	/*
 	 * See strtol for comments as to the logic used.
 	 */
-	s = nptr;
+	s = (const unsigned char *)nptr;
 	do {
 		c = *s++;
 	} while (isspace(c));
@@ -113,6 +113,6 @@
 	if (neg && any > 0)
 		acc = -acc;
 	if (endptr != 0)
-		*endptr = (char *) (any ? s - 1 : nptr);
+		*endptr = any ? (char *)s - 1 : (char *)nptr;
 	return (acc);
 }
diff -u /usr/src-org/lib/libc/stdlib/strtouq.c ./strtouq.c
--- /usr/src-org/lib/libc/stdlib/strtouq.c	Thu Dec 21 21:19:30 1995
+++ ./strtouq.c	Tue Jun  4 04:17:14 1996
@@ -54,7 +54,7 @@
 	char **endptr;
 	register int base;
 {
-	register const char *s;
+	register const unsigned char *s;
 	register u_quad_t acc, cutoff;
 	register int c;
 	register int neg, any, cutlim;
@@ -62,7 +62,7 @@
 	/*
 	 * See strtoq for comments as to the logic used.
 	 */
-	s = nptr;
+	s = (const unsigned char *)nptr;
 	do {
 		c = *s++;
 	} while (isspace(c));
@@ -109,6 +109,6 @@
 	if (neg && any > 0)
 		acc = -acc;
 	if (endptr != 0)
-		*endptr = (char *) (any ? s - 1 : nptr);
+		*endptr = any ? (char *)s - 1 : (char *)nptr;
 	return (acc);
 }
------------------------------------------------------------------------
>Audit-Trail:
>Unformatted: