Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src More fixes from J.R. Oldroyd. We introduce a new function th...



details:   https://anonhg.NetBSD.org/src/rev/c473031e2079
branches:  trunk
changeset: 785066:c473031e2079
user:      christos <christos%NetBSD.org@localhost>
date:      Wed Feb 20 17:01:15 2013 +0000

description:
More fixes from J.R. Oldroyd. We introduce a new function that can
retrieve and return if there has been a conversion error, so that
it can keep performing char-by-char processing if a multi-byte
conversion occurred. Also some more code cleanups in the "extra"
processing.

diffstat:

 include/vis.h      |    5 +-
 lib/libc/gen/vis.3 |   55 ++++++--
 lib/libc/gen/vis.c |  320 ++++++++++++++++++++++++++++++++++------------------
 3 files changed, 252 insertions(+), 128 deletions(-)

diffs (truncated from 765 to 300 lines):

diff -r 062755f7309d -r c473031e2079 include/vis.h
--- a/include/vis.h     Wed Feb 20 16:41:48 2013 +0000
+++ b/include/vis.h     Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vis.h,v 1.20 2012/12/14 21:36:59 christos Exp $        */
+/*     $NetBSD: vis.h,v 1.21 2013/02/20 17:01:15 christos Exp $        */
 
 /*-
  * Copyright (c) 1990, 1993
@@ -95,9 +95,12 @@
 
 int    strvisx(char *, const char *, size_t, int);
 int    strnvisx(char *, size_t, const char *, size_t, int);
+int    strenvisx(char *, size_t, const char *, size_t, int, int *);
 
 int    strsvisx(char *, const char *, size_t, int, const char *);
 int    strsnvisx(char *, size_t, const char *, size_t, int, const char *);
+int    strsenvisx(char *, size_t, const char *, size_t , int, const char *,
+    int *);
 
 int    strunvis(char *, const char *);
 int    strnunvis(char *, size_t, const char *);
diff -r 062755f7309d -r c473031e2079 lib/libc/gen/vis.3
--- a/lib/libc/gen/vis.3        Wed Feb 20 16:41:48 2013 +0000
+++ b/lib/libc/gen/vis.3        Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-.\"    $NetBSD: vis.3,v 1.36 2013/02/13 22:19:48 wiz Exp $
+.\"    $NetBSD: vis.3,v 1.37 2013/02/20 17:01:15 christos Exp $
 .\"
 .\" Copyright (c) 1989, 1991, 1993
 .\"    The Regents of the University of California.  All rights reserved.
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)vis.3      8.1 (Berkeley) 6/9/93
 .\"
-.Dd February 13, 2013
+.Dd February 19, 2013
 .Dt VIS 3
 .Os
 .Sh NAME
@@ -39,12 +39,14 @@
 .Nm strnvis ,
 .Nm strvisx ,
 .Nm strnvisx ,
+.Nm strenvisx ,
 .Nm svis ,
 .Nm snvis ,
 .Nm strsvis ,
 .Nm strsnvis ,
-.Nm strsvisx
-.Nm strsnvisx
+.Nm strsvisx ,
+.Nm strsnvisx ,
+.Nm strsenvisx
 .Nd visually encode characters
 .Sh LIBRARY
 .Lb libc
@@ -62,6 +64,8 @@
 .Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
 .Ft int
 .Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
+.Ft int
+.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
 .Ft char *
 .Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
 .Ft char *
@@ -74,6 +78,8 @@
 .Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
 .Ft int
 .Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
+.Ft int
+.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
 .Sh DESCRIPTION
 The
 .Fn vis
@@ -88,11 +94,11 @@
 The string is null terminated, and a pointer to the end of the string is
 returned.
 The maximum length of any encoding is four
-characters (not including the trailing
+bytes (not including the trailing
 .Dv NUL ) ;
 thus, when
 encoding a set of characters into a buffer, the size of the buffer should
-be four times the number of characters encoded, plus one for the trailing
+be four times the number of bytes encoded, plus one for the trailing
 .Dv NUL .
 The flag parameter is used for altering the default range of
 characters considered for encoding and for altering the visual
@@ -141,7 +147,7 @@
 The size of
 .Fa dst
 must be four times the number
-of characters encoded from
+of bytes encoded from
 .Fa src
 (plus one for the
 .Dv NUL ) .
@@ -167,6 +173,14 @@
 .Va errno
 to
 .Dv ENOSPC .
+The
+.Fn strenvisx
+function takes an additional argument,
+.Fa cerr_ptr ,
+that is used to pass in and out a multibyte conversion error flag.
+This is useful when processing single characters at a time when
+it is possible that the locale may be set to something other
+than the locale of the characters in the input data.
 .Pp
 The functions
 .Fn svis ,
@@ -174,16 +188,18 @@
 .Fn strsvis ,
 .Fn strsnvis ,
 .Fn strsvisx ,
+.Fn strsnvisx ,
 and
-.Fn strsnvisx
+.Fn strsenvisx
 correspond to
 .Fn vis ,
 .Fn nvis ,
 .Fn strvis ,
 .Fn strnvis ,
 .Fn strvisx ,
+.Fn strnvisx ,
 and
-.Fn strnvisx
+.Fn strenvisx
 but have an additional argument
 .Fa extra ,
 pointing to a
@@ -407,15 +423,27 @@
 If the locales of the data and the conversion are mismatched,
 multibyte character recognition may fail and encoding will be performed
 byte-by-byte instead.
-The result of encoding using one of these functions followed by
-decoding using the corresponding
-.Xr unvis 3
-function is unlikely to return the same input data in this case.
+.Pp
+As noted above,
+.Fa dst
+must be four times the number of bytes processed from
+.Fa src .
+But note that each multibyte character can be up to
+.Dv MB_LEN_MAX
+bytes (see
+.Xr multibyte 3 )
+so in terms of multibyte characters,
+.Fa dst
+must be four times
+.Dv MB_LEN_MAX
+times the number of characters processed from
+.Fa src .
 .Sh ENVIRONMENT
 .Bl -tag -width ".Ev LC_CTYPE"
 .It Ev LC_CTYPE
 Specify the locale of the input data.
 Set to C if the input data locale is unknown.
+.El
 .Sh ERRORS
 The functions
 .Fn nvis
@@ -442,6 +470,7 @@
 .Sh SEE ALSO
 .Xr unvis 1 ,
 .Xr vis 1 ,
+.Xr multibyte 3 ,
 .Xr glob 3 ,
 .Xr unvis 3
 .Rs
diff -r 062755f7309d -r c473031e2079 lib/libc/gen/vis.c
--- a/lib/libc/gen/vis.c        Wed Feb 20 16:41:48 2013 +0000
+++ b/lib/libc/gen/vis.c        Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $        */
+/*     $NetBSD: vis.c,v 1.54 2013/02/20 17:01:15 christos Exp $        */
 
 /*-
  * Copyright (c) 1989, 1993
@@ -57,7 +57,7 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.54 2013/02/20 17:01:15 christos Exp $");
 #endif /* LIBC_SCCS and not lint */
 #ifdef __FBSDID
 __FBSDID("$FreeBSD$");
@@ -103,38 +103,13 @@
 #define xtoa(c)                L"0123456789abcdef"[c]
 #define XTOA(c)                L"0123456789ABCDEF"[c]
 
-#define MAXEXTRAS      9
-
-#define MAKEEXTRALIST(flag, extra, orig_str)                                 \
-do {                                                                         \
-       const wchar_t *orig = orig_str;                                       \
-       const wchar_t *o = orig;                                              \
-       wchar_t *e;                                                           \
-       while (*o++)                                                          \
-               continue;                                                     \
-       extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra));    \
-       if (!extra) break;                                                    \
-       for (o = orig, e = extra; (*e++ = *o++) != L'\0';)                    \
-               continue;                                                     \
-       e--;                                                                  \
-       if (flag & VIS_GLOB) {                                                \
-               *e++ = L'*';                                                  \
-               *e++ = L'?';                                                  \
-               *e++ = L'[';                                                  \
-               *e++ = L'#';                                                  \
-       }                                                                     \
-       if (flag & VIS_SP) *e++ = L' ';                                       \
-       if (flag & VIS_TAB) *e++ = L'\t';                                     \
-       if (flag & VIS_NL) *e++ = L'\n';                                      \
-       if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\';                          \
-       *e = L'\0';                                                           \
-} while (/*CONSTCOND*/0)
+#define MAXEXTRAS      10
 
 /*
  * This is do_hvis, for HTTP style (RFC 1808)
  */
 static wchar_t *
-do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
        if (iswalnum(c)
            /* safe */
@@ -142,7 +117,7 @@
            /* extra */
            || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
            || c == L',')
-               dst = do_svis(dst, c, flag, nextc, extra);
+               dst = do_svis(dst, c, flags, nextc, extra);
        else {
                *dst++ = L'%';
                *dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
@@ -157,7 +132,7 @@
  * NB: No handling of long lines or CRLF.
  */
 static wchar_t *
-do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
        if ((c != L'\n') &&
            /* Space at the end of the line */
@@ -170,31 +145,17 @@
                *dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
                *dst++ = XTOA((unsigned int)c & 0xf);
        } else
-               dst = do_svis(dst, c, flag, nextc, extra);
+               dst = do_svis(dst, c, flags, nextc, extra);
        return dst;
 }
 
 /*
- * This is do_vis, the central code of vis.
- * dst:              Pointer to the destination buffer
- * c:        Character to encode
- * flag:      Flag word
- * nextc:     The character following 'c'
- * extra:     Pointer to the list of extra characters to be
- *           backslash-protected.
+ * Output single byte of multibyte character.
  */
 static wchar_t *
-do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
 {
-       int iswextra;
-
-       iswextra = wcschr(extra, c) != NULL;
-       if (!iswextra && (iswgraph(c) || iswwhite(c) ||
-           ((flag & VIS_SAFE) && iswsafe(c)))) {
-               *dst++ = c;
-               return dst;
-       }
-       if (flag & VIS_CSTYLE) {
+       if (flags & VIS_CSTYLE) {
                switch (c) {
                case L'\n':
                        *dst++ = L'\\'; *dst++ = L'n';
@@ -235,20 +196,20 @@
                        }
                }
        }
-       if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
+       if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
                *dst++ = L'\\';
                *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
                *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
                *dst++ =                             (c       & 07) + L'0';
        } else {
-               if ((flag & VIS_NOSLASH) == 0)
+               if ((flags & VIS_NOSLASH) == 0)



Home | Main Index | Thread Index | Old Index