tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Changing fgetstr/fgetln to use getdelim



Hi List!

Attached is a patch that changes libc fgetstr/fgetln to use getdelim(3).
This saves over 200 bytes on amd64 (although adding getdelim in the process probably added more still - heh).

One thing I did notice though is that __slbexpand expands the buffer on upto a size_t, but the place holder on the struct is only an int and doesn't have any bounds checking. Surely this is a potential overflow?

Comments? OK to commit?

Thanks

Roy
Index: stdio/fgetln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetln.c,v
retrieving revision 1.14
diff -u -p -r1.14 fgetln.c
--- stdio/fgetln.c      10 May 2004 16:47:11 -0000      1.14
+++ stdio/fgetln.c      20 Sep 2009 13:02:13 -0000
@@ -64,10 +64,6 @@ fgetln(fp, lenp)
        FILE *fp;
        size_t *lenp;
 {
-       char *cp;
-
-       FLOCKFILE(fp);
-       cp = __fgetstr(fp, lenp, '\n');
-       FUNLOCKFILE(fp);
-       return cp;
+       
+       return __fgetstr(fp, lenp, '\n');
 }
Index: stdio/fgetstr.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetstr.c,v
retrieving revision 1.5
diff -u -p -r1.5 fgetstr.c
--- stdio/fgetstr.c     31 Jan 2009 06:14:13 -0000      1.5
+++ stdio/fgetstr.c     20 Sep 2009 13:02:14 -0000
@@ -1,11 +1,10 @@
-/*     $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $        */
+/* $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $    */
 
-/*-
- * Copyright (c) 1990, 1993
- *     The Regents of the University of California.  All rights reserved.
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
  *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Roy Marples.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -15,160 +14,56 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)fgetline.c 8.1 (Berkeley) 6/4/93";
-#else
-__RCSID("$NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $");
-#endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: fgetstr.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
 
 #include "namespace.h"
 
-#include <assert.h>
+#include <errno.h>
+#include <limits.h>
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+
 #include "reentrant.h"
 #include "local.h"
 
 /*
- * Expand the line buffer.  Return -1 on error.
-#ifdef notdef
- * The `new size' does not account for a terminating '\0',
- * so we add 1 here.
-#endif
- */
-int
-__slbexpand(fp, newsize)
-       FILE *fp;
-       size_t newsize;
-{
-       void *p;
-
-#ifdef notdef
-       ++newsize;
-#endif
-       _DIAGASSERT(fp != NULL);
-
-       if ((size_t)fp->_lb._size >= newsize)
-               return (0);
-       if ((p = realloc(fp->_lb._base, newsize)) == NULL)
-               return (-1);
-       fp->_lb._base = p;
-       fp->_lb._size = newsize;
-       return (0);
-}
-
-/*
- * Get an input line.  The returned pointer often (but not always)
- * points into a stdio buffer.  Fgetline does not alter the text of
- * the returned line (which is thus not a C string because it will
- * not necessarily end with '\0'), but does allow callers to modify
- * it if they wish.  Thus, we set __SMOD in case the caller does.
+ * Get an input line.
+ * This now uses getdelim(3) for a code reduction.
+ * The upside is that strings are now always NULL terminated, but relying
+ * on this is non portable - better to use the POSIX getdelim(3) function.
  */
 char *
-__fgetstr(fp, lenp, sep)
-       FILE *fp;
-       size_t *lenp;
-       int sep;
+__fgetstr(FILE *__restrict fp, size_t *__restrict lenp, int sep)
 {
-       unsigned char *p;
-       size_t len;
-       size_t off;
-
-       _DIAGASSERT(fp != NULL);
-       _DIAGASSERT(lenp != NULL);
-
-       /* make sure there is input */
-       if (fp->_r <= 0 && __srefill(fp)) {
-               *lenp = 0;
-               return (NULL);
-       }
+       char *p;
+       size_t size;
 
-       /* look for a newline in the input */
-       if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) != NULL) {
-               char *ret;
-
-               /*
-                * Found one.  Flag buffer as modified to keep fseek from
-                * `optimising' a backward seek, in case the user stomps on
-                * the text.
-                */
-               p++;            /* advance over it */
-               ret = (char *)fp->_p;
-               *lenp = len = p - fp->_p;
-               fp->_flags |= __SMOD;
-               fp->_r -= len;
-               fp->_p = p;
-               return (ret);
+       p = (char *)fp->_lb._base;
+       size = fp->_lb._size;
+       p = __getdelim(&p, &size, sep, fp, lenp);
+       /* The struct size variable is only an int ..... */
+       if (size > INT_MAX) {
+               fp->_lb._size = INT_MAX;
+               errno = EOVERFLOW;
+               goto error;
        }
-
-       /*
-        * We have to copy the current buffered data to the line buffer.
-        * As a bonus, though, we can leave off the __SMOD.
-        *
-        * OPTIMISTIC is length that we (optimistically) expect will
-        * accommodate the `rest' of the string, on each trip through the
-        * loop below.
-        */
-#define OPTIMISTIC 80
-
-       for (len = fp->_r, off = 0;; len += fp->_r) {
-               size_t diff;
-
-               /*
-                * Make sure there is room for more bytes.  Copy data from
-                * file buffer to line buffer, refill file and look for
-                * newline.  The loop stops only when we find a newline.
-                */
-               if (__slbexpand(fp, len + OPTIMISTIC))
-                       goto error;
-               (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
-                   len - off);
-               off = len;
-               if (__srefill(fp))
-                       break;  /* EOF or error: return partial line */
-               if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) == NULL)
-                       continue;
-
-               /* got it: finish up the line (like code above) */
-               p++;
-               diff = p - fp->_p;
-               len += diff;
-               if (__slbexpand(fp, len))
-                       goto error;
-               (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
-                   diff);
-               fp->_r -= diff;
-               fp->_p = p;
-               break;
-       }
-       *lenp = len;
-#ifdef notdef
-       fp->_lb._base[len] = 0;
-#endif
-       return ((char *)fp->_lb._base);
-
+       fp->_lb._size = (int)size;
+       if (p != NULL)
+               return p;
 error:
-       *lenp = 0;              /* ??? */
-       return (NULL);          /* ??? */
+       *lenp = 0;
+       return NULL;
 }
Index: stdio/fgetwln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetwln.c,v
retrieving revision 1.2
diff -u -p -r1.2 fgetwln.c
--- stdio/fgetwln.c     31 Jan 2009 06:08:28 -0000      1.2
+++ stdio/fgetwln.c     20 Sep 2009 13:02:14 -0000
@@ -36,7 +36,11 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
 #endif /* LIBC_SCCS and not lint */
 
 #include "namespace.h"
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <wchar.h>
 #include "reentrant.h"
 #include "local.h"
@@ -45,6 +49,37 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
 __weak_alias(fgetwln,_fgetwln)
 #endif
 
+/*
+ * Expand the line buffer.  Return -1 on error.
+#ifdef notdef
+ * The `new size' does not account for a terminating '\0',
+ * so we add 1 here.
+#endif
+ */
+static int
+__slbexpand(FILE *fp, size_t newsize)
+{
+       void *p;
+
+#ifdef notdef
+       ++newsize;
+#endif
+       _DIAGASSERT(fp != NULL);
+
+       /* fp->_lb._size is an int ..... */
+       if (newsize > INT_MAX) {
+               errno = EOVERFLOW;
+               return (-1);
+       }
+       if ((size_t)fp->_lb._size >= newsize)
+               return (0);
+       if ((p = realloc(fp->_lb._base, newsize)) == NULL)
+               return (-1);
+       fp->_lb._base = p;
+       fp->_lb._size = newsize;
+       return (0);
+}
+
 wchar_t *
 fgetwln(FILE * __restrict fp, size_t *lenp)
 {
Index: stdio/getdelim.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/getdelim.c,v
retrieving revision 1.3
diff -u -p -r1.3 getdelim.c
--- stdio/getdelim.c    14 Jul 2009 18:29:41 -0000      1.3
+++ stdio/getdelim.c    20 Sep 2009 13:02:14 -0000
@@ -30,6 +30,8 @@
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: getdelim.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
 
+#include "namespace.h"
+
 #include <sys/param.h>
 
 #include <assert.h>
@@ -47,19 +49,21 @@ __RCSID("$NetBSD: getdelim.c,v 1.3 2009/
  * without the need for a realloc. */
 #define MINBUF 128
 
-ssize_t
-getdelim(char **__restrict buf, size_t *__restrict buflen,
-    int sep, FILE *__restrict fp)
+/* POSIX getdelim allows buffers of size_t, but only return lengths of ssize_t.
+ * We should be able to handle returns of size_t also. */
+char *
+__getdelim(char **__restrict buf, size_t *__restrict buflen,
+    int sep, FILE *__restrict fp, size_t *__restrict off)
 {
        unsigned char *p;
-       size_t len, off, newlen;
+       size_t len, newlen;
        char *newb;
 
        _DIAGASSERT(fp != NULL);
 
        if (buf == NULL || buflen == NULL) {
                errno = EINVAL;
-               return -1;
+               return NULL;
        }
 
        /* If buf is NULL, we have to assume a size of zero */
@@ -68,12 +72,12 @@ getdelim(char **__restrict buf, size_t *
 
        FLOCKFILE(fp);
        _SET_ORIENTATION(fp, -1);
-       off = 0;
+       *off = 0;
        for (;;) {
                /* If the input buffer is empty, refill it */
                if (fp->_r <= 0 && __srefill(fp)) {
                        /* POSIX requires we return -1 on EOF */
-                       if (off == 0 || __sferror(fp))
+                       if (*off == 0 || __sferror(fp))
                                goto error;
                        break;
                }
@@ -85,13 +89,12 @@ getdelim(char **__restrict buf, size_t *
                else
                        len = (p - fp->_p) + 1;
 
-               newlen = off + len;
-               /* Ensure that the resultant buffer length fits in ssize_t */
-               if (newlen > (size_t)SSIZE_MAX) {
+               newlen = *off + len;
+               /* Ensure we can handle it */
+               if (++newlen < *off) {
                        errno = EOVERFLOW;
                        goto error;
                }
-               newlen++; /* reserve space for the NULL terminator */
                if (newlen > *buflen) {
                        if (newlen < MINBUF)
                                newlen = MINBUF;
@@ -116,20 +119,37 @@ getdelim(char **__restrict buf, size_t *
                        *buflen = newlen;
                }
 
-               (void)memcpy((void *)(*buf + off), (void *)fp->_p, len);
+               (void)memcpy((void *)(*buf + *off), (void *)fp->_p, len);
                /* Safe, len is never greater than what fp->_r can fit. */
                fp->_r -= (int)len;
                fp->_p += (int)len;
-               off += len;
+               *off += len;
                if (p != NULL)
                        break;
        }
        FUNLOCKFILE(fp);
        if (*buf != NULL)
-               *(*buf + off) = '\0';
-       return off;
+               *(*buf + *off) = '\0';
+       return *buf;
 
 error:
        FUNLOCKFILE(fp);
-       return -1;
+       return NULL;
+}
+
+ssize_t
+getdelim(char **__restrict buf, size_t *__restrict buflen,
+    int sep, FILE *__restrict fp)
+{
+       size_t len;
+       char *p;
+
+       p = __getdelim(buf, buflen, sep, fp, &len);
+       if (p == NULL)
+               return -1;
+       if (len > (size_t)SSIZE_MAX) {
+               errno = EOVERFLOW;
+               return -1;
+       }
+       return (ssize_t)len;
 }
Index: stdio/local.h
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/local.h,v
retrieving revision 1.20
diff -u -p -r1.20 local.h
--- stdio/local.h       14 May 2005 23:51:02 -0000      1.20
+++ stdio/local.h       20 Sep 2009 13:02:14 -0000
@@ -75,8 +75,9 @@ extern int    __gettemp __P((char *, int *,
 extern wint_t  __fgetwc_unlock __P((FILE *));
 extern wint_t  __fputwc_unlock __P((wchar_t, FILE *));
 
+extern char    *__getdelim __P((char ** __restrict, size_t * __restrict, int,
+    FILE * __restrict, size_t *__restrict));
 extern char    *__fgetstr __P((FILE * __restrict, size_t * __restrict, int));
-extern int      __slbexpand __P((FILE *, size_t));
 extern int      __vfwprintf_unlocked __P((FILE *, const wchar_t *,
     _BSD_VA_LIST_));
 extern int      __vfwscanf_unlocked __P((FILE * __restrict,


Home | Main Index | Thread Index | Old Index