Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/printf Changes for POSIX conformance.



details:   https://anonhg.NetBSD.org/src/rev/fedb523a634d
branches:  trunk
changeset: 983402:fedb523a634d
user:      kre <kre%NetBSD.org@localhost>
date:      Wed May 19 22:41:19 2021 +0000

description:
Changes for POSIX conformance.

1.  exit(1) with an error message on stderr if an I/O error occurs.
1a. To work properly when built into /bin/sh sprinkle clearerr() at
    appropriate places.

2.  Verify that when a 'X data value is used with one of the numeric
    conversions, that nothing follows the 'X'.   It used to be unclear
    in the standard whether this was required or not, it is clear that
    with numeric conversions the entire data value must be used, or an
    error must result.   But with string conversions, that isn't the case
    and unused parts are simply ignored.   This one is a numeric conversion
    with a string value, so which applies?   The standard used to contain
    an example of '+3 being converted, producing the same as '+ ignoring
    the '3' with no mention of any error, so that's the approach we adopted,
    The forthcoming version now explicitly states that an error would also
    be generated from that case, as the '3' was not used by the numeric
    conversion.

2a. We support those conversions with floating as well as integer conversions,
    as the standard used to suggest that was required (but it makes no sense,
    the values are always integers, printing them in a floating format is
    dumb).  The standard has been revised to make it clear that only the
    integer numeric conversions %d %u %x (etc) are supposed to handle the 'X
    form of data value.   We still allow it with the floating formats as an
    extension, for backward compat, just in case someone (other than the ATF
    tests) is using it.   It might go away.

2b. These formats are sypposed to convert 'X where 'X' is a character
    (perhaps multibyte encoded) in the current LC_CTYPE locale category.
    We don't handle that, only 1 byte characters are handled currently.
    However the framework is now there to allow code to (one hopes, easily)
    be added to handle multi-byte locales.   (Note that for the purposes of
    #2 above, 'X' must be a single character, not a single byte.)

diffstat:

 usr.bin/printf/printf.c |  48 +++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 41 insertions(+), 7 deletions(-)

diffs (115 lines):

diff -r 342ffcfcb99f -r fedb523a634d usr.bin/printf/printf.c
--- a/usr.bin/printf/printf.c   Wed May 19 22:29:18 2021 +0000
+++ b/usr.bin/printf/printf.c   Wed May 19 22:41:19 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: printf.c,v 1.52 2021/04/16 18:31:28 christos Exp $     */
+/*     $NetBSD: printf.c,v 1.53 2021/05/19 22:41:19 kre Exp $  */
 
 /*
  * Copyright (c) 1989, 1993
@@ -41,7 +41,7 @@
 #if 0
 static char sccsid[] = "@(#)printf.c   8.2 (Berkeley) 3/22/95";
 #else
-__RCSID("$NetBSD: printf.c,v 1.52 2021/04/16 18:31:28 christos Exp $");
+__RCSID("$NetBSD: printf.c,v 1.53 2021/05/19 22:41:19 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -74,6 +74,7 @@
 static intmax_t         getintmax(void);
 static char    *getstr(void);
 static char    *mklong(const char *, char);
+static intmax_t         wide_char(const char *);
 static void      check_conversion(const char *, const char *);
 static void     usage(void);
 
@@ -141,6 +142,7 @@
 #endif
 
        rval = 0;       /* clear for builtin versions (avoid holdover) */
+       clearerr(stdout);       /* for the builtin version */
 
        /*
         * printf does not comply with Posix XBD 12.2 - there are no opts,
@@ -372,10 +374,16 @@
                        *fmt = nextch;
                        /* escape if a \c was encountered */
                        if (rval & 0x100)
-                               return rval & ~0x100;
+                               goto done;
                }
        } while (gargv != argv && *gargv);
 
+  done:
+       (void)fflush(stdout);
+       if (ferror(stdout)) {
+               clearerr(stdout);
+               err(1, "write error");
+       }
        return rval & ~0x100;
   out:
        warn("print failed");
@@ -628,8 +636,9 @@
 
        len = strlen(str) + 2;
        if (len > sizeof copy) {
-               warnx("format %s too complex", str);
+               warnx("format \"%s\" too complex", str);
                len = 4;
+               rval = 1;
        }
        (void)memmove(copy, str, len - 3);
        copy[len - 3] = 'j';
@@ -691,7 +700,7 @@
        gargv++;
 
        if (*cp == '\"' || *cp == '\'')
-               return *(cp + 1);
+               return wide_char(cp);
 
        errno = 0;
        val = strtoimax(cp, &ep, 0);
@@ -708,8 +717,9 @@
        if (!*gargv)
                return 0.0;
 
-       if (**gargv == '\"' || **gargv == '\'')
-               return (double) *((*gargv++)+1);
+       /* This is a NetBSD extension, not required by POSIX (it is useless) */
+       if (*(ep = *gargv) == '\"' || *ep == '\'')
+               return (double)wide_char(ep);
 
        errno = 0;
        val = strtod(*gargv, &ep);
@@ -717,6 +727,30 @@
        return val;
 }
 
+/*
+ * XXX This is just a placeholder for a later version which
+ *     will do mbtowc() on p+1 (and after checking that all of the
+ *     string has been consumed) return that value.
+ *
+ * This (mbtowc) behaviour is required by POSIX (as is the check
+ * that the whole arg is consumed).
+ *
+ * What follows is actually correct if we assume that LC_CTYPE=C
+ * (or something else similar that is a single byte charset).
+ */
+static intmax_t
+wide_char(const char *p)
+{
+       intmax_t ch = (intmax_t)(unsigned char)p[1];
+
+       if (ch != 0 && p[2] != '\0') {
+               warnx("%s: not completelty converted", p);
+               rval = 1;
+       }
+
+       return ch;
+}
+
 static void
 check_conversion(const char *s, const char *ep)
 {



Home | Main Index | Thread Index | Old Index