Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src More fixes from J.R. Oldroyd. We introduce a new function th...
details: https://anonhg.NetBSD.org/src/rev/c473031e2079
branches: trunk
changeset: 785066:c473031e2079
user: christos <christos%NetBSD.org@localhost>
date: Wed Feb 20 17:01:15 2013 +0000
description:
More fixes from J.R. Oldroyd. We introduce a new function that can
retrieve and return if there has been a conversion error, so that
it can keep performing char-by-char processing if a multi-byte
conversion occurred. Also some more code cleanups in the "extra"
processing.
diffstat:
include/vis.h | 5 +-
lib/libc/gen/vis.3 | 55 ++++++--
lib/libc/gen/vis.c | 320 ++++++++++++++++++++++++++++++++++------------------
3 files changed, 252 insertions(+), 128 deletions(-)
diffs (truncated from 765 to 300 lines):
diff -r 062755f7309d -r c473031e2079 include/vis.h
--- a/include/vis.h Wed Feb 20 16:41:48 2013 +0000
+++ b/include/vis.h Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vis.h,v 1.20 2012/12/14 21:36:59 christos Exp $ */
+/* $NetBSD: vis.h,v 1.21 2013/02/20 17:01:15 christos Exp $ */
/*-
* Copyright (c) 1990, 1993
@@ -95,9 +95,12 @@
int strvisx(char *, const char *, size_t, int);
int strnvisx(char *, size_t, const char *, size_t, int);
+int strenvisx(char *, size_t, const char *, size_t, int, int *);
int strsvisx(char *, const char *, size_t, int, const char *);
int strsnvisx(char *, size_t, const char *, size_t, int, const char *);
+int strsenvisx(char *, size_t, const char *, size_t , int, const char *,
+ int *);
int strunvis(char *, const char *);
int strnunvis(char *, size_t, const char *);
diff -r 062755f7309d -r c473031e2079 lib/libc/gen/vis.3
--- a/lib/libc/gen/vis.3 Wed Feb 20 16:41:48 2013 +0000
+++ b/lib/libc/gen/vis.3 Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-.\" $NetBSD: vis.3,v 1.36 2013/02/13 22:19:48 wiz Exp $
+.\" $NetBSD: vis.3,v 1.37 2013/02/20 17:01:15 christos Exp $
.\"
.\" Copyright (c) 1989, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@@ -29,7 +29,7 @@
.\"
.\" @(#)vis.3 8.1 (Berkeley) 6/9/93
.\"
-.Dd February 13, 2013
+.Dd February 19, 2013
.Dt VIS 3
.Os
.Sh NAME
@@ -39,12 +39,14 @@
.Nm strnvis ,
.Nm strvisx ,
.Nm strnvisx ,
+.Nm strenvisx ,
.Nm svis ,
.Nm snvis ,
.Nm strsvis ,
.Nm strsnvis ,
-.Nm strsvisx
-.Nm strsnvisx
+.Nm strsvisx ,
+.Nm strsnvisx ,
+.Nm strsenvisx
.Nd visually encode characters
.Sh LIBRARY
.Lb libc
@@ -62,6 +64,8 @@
.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
.Ft int
.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
+.Ft int
+.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
.Ft char *
.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
.Ft char *
@@ -74,6 +78,8 @@
.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
.Ft int
.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
+.Ft int
+.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
.Sh DESCRIPTION
The
.Fn vis
@@ -88,11 +94,11 @@
The string is null terminated, and a pointer to the end of the string is
returned.
The maximum length of any encoding is four
-characters (not including the trailing
+bytes (not including the trailing
.Dv NUL ) ;
thus, when
encoding a set of characters into a buffer, the size of the buffer should
-be four times the number of characters encoded, plus one for the trailing
+be four times the number of bytes encoded, plus one for the trailing
.Dv NUL .
The flag parameter is used for altering the default range of
characters considered for encoding and for altering the visual
@@ -141,7 +147,7 @@
The size of
.Fa dst
must be four times the number
-of characters encoded from
+of bytes encoded from
.Fa src
(plus one for the
.Dv NUL ) .
@@ -167,6 +173,14 @@
.Va errno
to
.Dv ENOSPC .
+The
+.Fn strenvisx
+function takes an additional argument,
+.Fa cerr_ptr ,
+that is used to pass in and out a multibyte conversion error flag.
+This is useful when processing single characters at a time when
+it is possible that the locale may be set to something other
+than the locale of the characters in the input data.
.Pp
The functions
.Fn svis ,
@@ -174,16 +188,18 @@
.Fn strsvis ,
.Fn strsnvis ,
.Fn strsvisx ,
+.Fn strsnvisx ,
and
-.Fn strsnvisx
+.Fn strsenvisx
correspond to
.Fn vis ,
.Fn nvis ,
.Fn strvis ,
.Fn strnvis ,
.Fn strvisx ,
+.Fn strnvisx ,
and
-.Fn strnvisx
+.Fn strenvisx
but have an additional argument
.Fa extra ,
pointing to a
@@ -407,15 +423,27 @@
If the locales of the data and the conversion are mismatched,
multibyte character recognition may fail and encoding will be performed
byte-by-byte instead.
-The result of encoding using one of these functions followed by
-decoding using the corresponding
-.Xr unvis 3
-function is unlikely to return the same input data in this case.
+.Pp
+As noted above,
+.Fa dst
+must be four times the number of bytes processed from
+.Fa src .
+But note that each multibyte character can be up to
+.Dv MB_LEN_MAX
+bytes (see
+.Xr multibyte 3 )
+so in terms of multibyte characters,
+.Fa dst
+must be four times
+.Dv MB_LEN_MAX
+times the number of characters processed from
+.Fa src .
.Sh ENVIRONMENT
.Bl -tag -width ".Ev LC_CTYPE"
.It Ev LC_CTYPE
Specify the locale of the input data.
Set to C if the input data locale is unknown.
+.El
.Sh ERRORS
The functions
.Fn nvis
@@ -442,6 +470,7 @@
.Sh SEE ALSO
.Xr unvis 1 ,
.Xr vis 1 ,
+.Xr multibyte 3 ,
.Xr glob 3 ,
.Xr unvis 3
.Rs
diff -r 062755f7309d -r c473031e2079 lib/libc/gen/vis.c
--- a/lib/libc/gen/vis.c Wed Feb 20 16:41:48 2013 +0000
+++ b/lib/libc/gen/vis.c Wed Feb 20 17:01:15 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $ */
+/* $NetBSD: vis.c,v 1.54 2013/02/20 17:01:15 christos Exp $ */
/*-
* Copyright (c) 1989, 1993
@@ -57,7 +57,7 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.54 2013/02/20 17:01:15 christos Exp $");
#endif /* LIBC_SCCS and not lint */
#ifdef __FBSDID
__FBSDID("$FreeBSD$");
@@ -103,38 +103,13 @@
#define xtoa(c) L"0123456789abcdef"[c]
#define XTOA(c) L"0123456789ABCDEF"[c]
-#define MAXEXTRAS 9
-
-#define MAKEEXTRALIST(flag, extra, orig_str) \
-do { \
- const wchar_t *orig = orig_str; \
- const wchar_t *o = orig; \
- wchar_t *e; \
- while (*o++) \
- continue; \
- extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra)); \
- if (!extra) break; \
- for (o = orig, e = extra; (*e++ = *o++) != L'\0';) \
- continue; \
- e--; \
- if (flag & VIS_GLOB) { \
- *e++ = L'*'; \
- *e++ = L'?'; \
- *e++ = L'['; \
- *e++ = L'#'; \
- } \
- if (flag & VIS_SP) *e++ = L' '; \
- if (flag & VIS_TAB) *e++ = L'\t'; \
- if (flag & VIS_NL) *e++ = L'\n'; \
- if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\'; \
- *e = L'\0'; \
-} while (/*CONSTCOND*/0)
+#define MAXEXTRAS 10
/*
* This is do_hvis, for HTTP style (RFC 1808)
*/
static wchar_t *
-do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
if (iswalnum(c)
/* safe */
@@ -142,7 +117,7 @@
/* extra */
|| c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
|| c == L',')
- dst = do_svis(dst, c, flag, nextc, extra);
+ dst = do_svis(dst, c, flags, nextc, extra);
else {
*dst++ = L'%';
*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
@@ -157,7 +132,7 @@
* NB: No handling of long lines or CRLF.
*/
static wchar_t *
-do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
if ((c != L'\n') &&
/* Space at the end of the line */
@@ -170,31 +145,17 @@
*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
*dst++ = XTOA((unsigned int)c & 0xf);
} else
- dst = do_svis(dst, c, flag, nextc, extra);
+ dst = do_svis(dst, c, flags, nextc, extra);
return dst;
}
/*
- * This is do_vis, the central code of vis.
- * dst: Pointer to the destination buffer
- * c: Character to encode
- * flag: Flag word
- * nextc: The character following 'c'
- * extra: Pointer to the list of extra characters to be
- * backslash-protected.
+ * Output single byte of multibyte character.
*/
static wchar_t *
-do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
+do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
{
- int iswextra;
-
- iswextra = wcschr(extra, c) != NULL;
- if (!iswextra && (iswgraph(c) || iswwhite(c) ||
- ((flag & VIS_SAFE) && iswsafe(c)))) {
- *dst++ = c;
- return dst;
- }
- if (flag & VIS_CSTYLE) {
+ if (flags & VIS_CSTYLE) {
switch (c) {
case L'\n':
*dst++ = L'\\'; *dst++ = L'n';
@@ -235,20 +196,20 @@
}
}
}
- if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
+ if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
*dst++ = L'\\';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
*dst++ = (c & 07) + L'0';
} else {
- if ((flag & VIS_NOSLASH) == 0)
+ if ((flags & VIS_NOSLASH) == 0)
Home |
Main Index |
Thread Index |
Old Index