tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Suggested change of ctype.h



Hello,
several times I stumbled over gcc's warnings regarding the use of a
signed character as index in the ctype.h functions. And indeed, when you
look at the results of e.g. toupper(-128) to toupper(-1) they are
'interesting' to say the least. You get some code from the memory
before the array. The values are not quite random, but very wrong.

Other OSes return the original value for {toupper|tolower}(char<0) and
to me it seems to make sense to clean up this little annoyance.

I changed /usr/include/ctype.h to the following:

[snip]

__BEGIN_DECLS
static int      isalnum(int);
static int      isalpha(int);
static int      iscntrl(int);
static int      isdigit(int);
static int      isgraph(int);
static int      islower(int);
static int      isprint(int);
static int      ispunct(int);
static int      isspace(int);
static int      isupper(int);
static int      isxdigit(int);
static int      tolower(int);
static int      toupper(int);

[snap]

static __inline int isdigit(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _N);
}
static __inline int islower(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _L);
}
static __inline int isspace(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _S);
}
static __inline int ispunct(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _P);
}
static __inline int isupper(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _U);
}
static __inline int isalpha(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & (_U|_L));
}
static __inline int isxdigit(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & (_N|_X));
}
static __inline int isalnum(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & (_U|_L|_N));
}
static __inline int isprint(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & (_P|_U|_L|_N|_B));
}
static __inline int isgraph(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & (_P|_U|_L|_N));
}
static __inline int iscntrl(int c) {
        return (c < 0) ? 0 : (int)((_ctype_ + 1)[c] & _C);
}
static __inline int tolower(int c) {
        return (c < 0) ? c : (int)((_tolower_tab_ + 1)[c]);
}
static __inline int toupper(int c) {
        return (c < 0) ? c : (int)((_toupper_tab_ + 1)[c]);
}

This makes the calls to the functions local code, which would be
needed, since the macro argument (c) should be evaluated only once to
avoid unwanted side effects like toupper(*src++) incrementing src 2 or 3
times.

Another way to solve the issue would be to increase the size of the
_ctype_, _tolower_tab_ and _toupper_tab_ to 256 elements, where the
first half contained all 0s for _ctype_ and -128 to -1 for the other
two. With '+ 129' instead of '+ 1' the result should then become
predictable.

I hope I wasn't talking complete BS and you can make any use of this
suggestion :)

Juergen


Home | Main Index | Thread Index | Old Index