tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Changing fgetstr/fgetln to use getdelim



Roy Marples wrote:
Attached is a patch that changes libc fgetstr/fgetln to use getdelim(3).
This saves over 200 bytes on amd64 (although adding getdelim in the process probably added more still - heh).

This new version makes __getdelim use the same interface as getdelim, but returns 0 on EOF and works with strings of SIZE_MAX - 2, allowing a NULL terminator and the error code -1. Maybe this should be propsed to the OpenGroup as fgetdelim and fix all 3 issues a few people have with getdelim?

Thanks

Roy
Index: stdio/fgetln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetln.c,v
retrieving revision 1.14
diff -u -p -r1.14 fgetln.c
--- stdio/fgetln.c      10 May 2004 16:47:11 -0000      1.14
+++ stdio/fgetln.c      21 Sep 2009 07:25:21 -0000
@@ -64,10 +64,6 @@ fgetln(fp, lenp)
        FILE *fp;
        size_t *lenp;
 {
-       char *cp;
-
-       FLOCKFILE(fp);
-       cp = __fgetstr(fp, lenp, '\n');
-       FUNLOCKFILE(fp);
-       return cp;
+       
+       return __fgetstr(fp, lenp, '\n');
 }
Index: stdio/fgetstr.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetstr.c,v
retrieving revision 1.5
diff -u -p -r1.5 fgetstr.c
--- stdio/fgetstr.c     31 Jan 2009 06:14:13 -0000      1.5
+++ stdio/fgetstr.c     21 Sep 2009 07:25:21 -0000
@@ -1,11 +1,10 @@
-/*     $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $        */
+/* $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $    */
 
-/*-
- * Copyright (c) 1990, 1993
- *     The Regents of the University of California.  All rights reserved.
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
  *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Roy Marples.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -15,160 +14,60 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)fgetline.c 8.1 (Berkeley) 6/4/93";
-#else
-__RCSID("$NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $");
-#endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: fgetstr.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
 
 #include "namespace.h"
 
 #include <assert.h>
+#include <errno.h>
+#include <limits.h>
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+
 #include "reentrant.h"
 #include "local.h"
 
 /*
- * Expand the line buffer.  Return -1 on error.
-#ifdef notdef
- * The `new size' does not account for a terminating '\0',
- * so we add 1 here.
-#endif
- */
-int
-__slbexpand(fp, newsize)
-       FILE *fp;
-       size_t newsize;
-{
-       void *p;
-
-#ifdef notdef
-       ++newsize;
-#endif
-       _DIAGASSERT(fp != NULL);
-
-       if ((size_t)fp->_lb._size >= newsize)
-               return (0);
-       if ((p = realloc(fp->_lb._base, newsize)) == NULL)
-               return (-1);
-       fp->_lb._base = p;
-       fp->_lb._size = newsize;
-       return (0);
-}
-
-/*
- * Get an input line.  The returned pointer often (but not always)
- * points into a stdio buffer.  Fgetline does not alter the text of
- * the returned line (which is thus not a C string because it will
- * not necessarily end with '\0'), but does allow callers to modify
- * it if they wish.  Thus, we set __SMOD in case the caller does.
+ * Get an input line.
+ * This now uses getdelim(3) for a code reduction.
+ * The upside is that strings are now always NULL terminated, but relying
+ * on this is non portable - better to use the POSIX getdelim(3) function.
  */
 char *
-__fgetstr(fp, lenp, sep)
-       FILE *fp;
-       size_t *lenp;
-       int sep;
+__fgetstr(FILE *__restrict fp, size_t *__restrict lenp, int sep)
 {
-       unsigned char *p;
-       size_t len;
-       size_t off;
+       char *p;
+       size_t size;
 
        _DIAGASSERT(fp != NULL);
        _DIAGASSERT(lenp != NULL);
 
-       /* make sure there is input */
-       if (fp->_r <= 0 && __srefill(fp)) {
-               *lenp = 0;
-               return (NULL);
-       }
-
-       /* look for a newline in the input */
-       if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) != NULL) {
-               char *ret;
-
-               /*
-                * Found one.  Flag buffer as modified to keep fseek from
-                * `optimising' a backward seek, in case the user stomps on
-                * the text.
-                */
-               p++;            /* advance over it */
-               ret = (char *)fp->_p;
-               *lenp = len = p - fp->_p;
-               fp->_flags |= __SMOD;
-               fp->_r -= len;
-               fp->_p = p;
-               return (ret);
+       p = (char *)fp->_lb._base;
+       size = fp->_lb._size;
+       *lenp = __getdelim(&p, &size, sep, fp);
+       /* The struct size variable is only an int ..... */
+       if (size > INT_MAX) {
+               fp->_lb._size = INT_MAX;
+               errno = EOVERFLOW;
+               goto error;
        }
-
-       /*
-        * We have to copy the current buffered data to the line buffer.
-        * As a bonus, though, we can leave off the __SMOD.
-        *
-        * OPTIMISTIC is length that we (optimistically) expect will
-        * accommodate the `rest' of the string, on each trip through the
-        * loop below.
-        */
-#define OPTIMISTIC 80
-
-       for (len = fp->_r, off = 0;; len += fp->_r) {
-               size_t diff;
-
-               /*
-                * Make sure there is room for more bytes.  Copy data from
-                * file buffer to line buffer, refill file and look for
-                * newline.  The loop stops only when we find a newline.
-                */
-               if (__slbexpand(fp, len + OPTIMISTIC))
-                       goto error;
-               (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
-                   len - off);
-               off = len;
-               if (__srefill(fp))
-                       break;  /* EOF or error: return partial line */
-               if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) == NULL)
-                       continue;
-
-               /* got it: finish up the line (like code above) */
-               p++;
-               diff = p - fp->_p;
-               len += diff;
-               if (__slbexpand(fp, len))
-                       goto error;
-               (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
-                   diff);
-               fp->_r -= diff;
-               fp->_p = p;
-               break;
-       }
-       *lenp = len;
-#ifdef notdef
-       fp->_lb._base[len] = 0;
-#endif
-       return ((char *)fp->_lb._base);
-
+       fp->_lb._size = (int)size;
+       if (*lenp < SIZE_MAX)
+               return p;
 error:
-       *lenp = 0;              /* ??? */
-       return (NULL);          /* ??? */
+       *lenp = 0;
+       return NULL;
 }
Index: stdio/fgetwln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetwln.c,v
retrieving revision 1.2
diff -u -p -r1.2 fgetwln.c
--- stdio/fgetwln.c     31 Jan 2009 06:08:28 -0000      1.2
+++ stdio/fgetwln.c     21 Sep 2009 07:25:21 -0000
@@ -36,7 +36,11 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
 #endif /* LIBC_SCCS and not lint */
 
 #include "namespace.h"
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <wchar.h>
 #include "reentrant.h"
 #include "local.h"
@@ -45,6 +49,37 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
 __weak_alias(fgetwln,_fgetwln)
 #endif
 
+/*
+ * Expand the line buffer.  Return -1 on error.
+#ifdef notdef
+ * The `new size' does not account for a terminating '\0',
+ * so we add 1 here.
+#endif
+ */
+static int
+__slbexpand(FILE *fp, size_t newsize)
+{
+       void *p;
+
+#ifdef notdef
+       ++newsize;
+#endif
+       _DIAGASSERT(fp != NULL);
+
+       /* fp->_lb._size is an int ..... */
+       if (newsize > INT_MAX) {
+               errno = EOVERFLOW;
+               return (-1);
+       }
+       if ((size_t)fp->_lb._size >= newsize)
+               return (0);
+       if ((p = realloc(fp->_lb._base, newsize)) == NULL)
+               return (-1);
+       fp->_lb._base = p;
+       fp->_lb._size = newsize;
+       return (0);
+}
+
 wchar_t *
 fgetwln(FILE * __restrict fp, size_t *lenp)
 {
Index: stdio/getdelim.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/getdelim.c,v
retrieving revision 1.3
diff -u -p -r1.3 getdelim.c
--- stdio/getdelim.c    14 Jul 2009 18:29:41 -0000      1.3
+++ stdio/getdelim.c    21 Sep 2009 07:25:21 -0000
@@ -30,6 +30,8 @@
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: getdelim.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
 
+#include "namespace.h"
+
 #include <sys/param.h>
 
 #include <assert.h>
@@ -47,12 +49,15 @@ __RCSID("$NetBSD: getdelim.c,v 1.3 2009/
  * without the need for a realloc. */
 #define MINBUF 128
 
+/* This private function allows strings of upto SIZE_MAX - 2
+ * and returns 0 on EOF, both of which are disallowed by POSIX.
+ * Maybe this should be named fgetdelim and proposed to the OpenGroup....*/
 ssize_t
-getdelim(char **__restrict buf, size_t *__restrict buflen,
+__getdelim(char **__restrict buf, size_t *__restrict buflen,
     int sep, FILE *__restrict fp)
 {
        unsigned char *p;
-       size_t len, off, newlen;
+       size_t len, newlen, off;
        char *newb;
 
        _DIAGASSERT(fp != NULL);
@@ -72,8 +77,7 @@ getdelim(char **__restrict buf, size_t *
        for (;;) {
                /* If the input buffer is empty, refill it */
                if (fp->_r <= 0 && __srefill(fp)) {
-                       /* POSIX requires we return -1 on EOF */
-                       if (off == 0 || __sferror(fp))
+                       if (__sferror(fp))
                                goto error;
                        break;
                }
@@ -85,13 +89,12 @@ getdelim(char **__restrict buf, size_t *
                else
                        len = (p - fp->_p) + 1;
 
-               newlen = off + len;
-               /* Ensure that the resultant buffer length fits in ssize_t */
-               if (newlen > (size_t)SSIZE_MAX) {
+               newlen = off + len + 1;
+               /* Ensure we can handle it */
+               if (newlen < off || newlen > SIZE_MAX - 2) {
                        errno = EOVERFLOW;
                        goto error;
                }
-               newlen++; /* reserve space for the NULL terminator */
                if (newlen > *buflen) {
                        if (newlen < MINBUF)
                                newlen = MINBUF;
@@ -133,3 +136,21 @@ error:
        FUNLOCKFILE(fp);
        return -1;
 }
+
+ssize_t
+getdelim(char **__restrict buf, size_t *__restrict buflen,
+    int sep, FILE *__restrict fp)
+{
+       ssize_t len;
+
+       len = __getdelim(buf, buflen, sep, fp);
+       if (len == 0) {
+               /* POSIX requires that we return -1 on EOF */
+               return -1;
+       } else if (len < -1) {
+               /* POSIX requires no string larger than SSIZE_MAX */
+               errno = EOVERFLOW;
+               return -1;
+       }
+       return len;
+}
Index: stdio/local.h
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/local.h,v
retrieving revision 1.20
diff -u -p -r1.20 local.h
--- stdio/local.h       14 May 2005 23:51:02 -0000      1.20
+++ stdio/local.h       21 Sep 2009 07:25:21 -0000
@@ -75,8 +75,9 @@ extern int    __gettemp __P((char *, int *,
 extern wint_t  __fgetwc_unlock __P((FILE *));
 extern wint_t  __fputwc_unlock __P((wchar_t, FILE *));
 
+extern ssize_t __getdelim __P((char ** __restrict, size_t * __restrict, int,
+    FILE * __restrict));
 extern char    *__fgetstr __P((FILE * __restrict, size_t * __restrict, int));
-extern int      __slbexpand __P((FILE *, size_t));
 extern int      __vfwprintf_unlocked __P((FILE *, const wchar_t *,
     _BSD_VA_LIST_));
 extern int      __vfwscanf_unlocked __P((FILE * __restrict,


Home | Main Index | Thread Index | Old Index