Subject: bin/34244: '/usr/bin/cut -c' is not utf-8 ready
To: None <gnats-admin@netbsd.org, netbsd-bugs@netbsd.org>
From: Aleksey Cheusov <cheusov@tut.by>
List: netbsd-bugs
Date: 08/20/2006 17:40:01
>Number:         34244
>Category:       bin
>Synopsis:       '/bin/cut -c' is not utf-8 ready
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    bin-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sun Aug 20 17:40:00 +0000 2006
>Originator:     Aleksey Cheusov <cheusov@tut.by>
>Release:        NetBSD 3.0_STABLE
>Organization:
Best regards, Aleksey Cheusov.
>Environment:
System: NetBSD chen.chizhovka.net 3.0_STABLE NetBSD 3.0_STABLE (GENERIC) #2: Sun Mar 12 12:49:58 GMT 2006 cheusov@chen:/usr/src/sys/arch/i386/compile/GENERIC i386
Architecture: i386
Machine: i386
>Description:
'cut -c' is equivalent to 'cut -b' and therefore doesn't handle
multibyte character sets (including utf-8) correctly.
Patch follows.

>Fix:

--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=cut-c.utf8.patch
Content-Description: patch for cut -c

Index: cut.c
===================================================================
RCS file: /cvsroot/src/usr.bin/cut/cut.c,v
retrieving revision 1.21
diff -u -r1.21 cut.c
--- cut.c	29 Jul 2006 02:01:24 -0000	1.21
+++ cut.c	20 Aug 2006 16:32:32 -0000
@@ -54,13 +54,16 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>
 
+int bflag;
 int	cflag;
 char	dchar;
 int	dflag;
 int	fflag;
 int	sflag;
 
+void	b_cut(FILE *, const char *);
 void	c_cut(FILE *, const char *);
 void	f_cut(FILE *, const char *);
 void	get_list(char *);
@@ -83,6 +86,10 @@
 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
 		switch(ch) {
 		case 'b':
+			fcn = b_cut;
+			get_list(optarg);
+			bflag = 1;
+			break;
 		case 'c':
 			fcn = c_cut;
 			get_list(optarg);
@@ -110,9 +117,11 @@
 	argv += optind;
 
 	if (fflag) {
-		if (cflag)
+		if (cflag || bflag)
 			usage();
-	} else if (!cflag || dflag || sflag)
+	} else if ((!cflag && !bflag) || dflag || sflag)
+		usage();
+	else if (bflag && cflag)
 		usage();
 
 	if (*argv)
@@ -193,7 +202,7 @@
 
 /* ARGSUSED */
 void
-c_cut(FILE *fp, const char *fname)
+b_cut(FILE *fp, const char *fname)
 {
 	int ch, col;
 	char *pos;
@@ -221,6 +230,35 @@
 }
 
 void
+c_cut(FILE *fp, const char *fname)
+{
+	wint_t ch;
+	int col;
+	char *pos;
+
+	ch = 0;
+	for (;;) {
+		pos = positions + 1;
+		for (col = maxval; col; --col) {
+			if ((ch = getwc(fp)) == WEOF)
+				return;
+			if (ch == '\n')
+				break;
+			if (*pos++)
+				(void)putwc(ch, stdout);
+		}
+		if (ch != '\n') {
+			if (autostop)
+				while ((ch = getwc(fp)) != WEOF && ch != '\n')
+					(void)putwc(ch, stdout);
+			else
+				while ((ch = getwc(fp)) != WEOF && ch != '\n');
+		}
+		(void)putwc('\n', stdout);
+	}
+}
+
+void
 f_cut(FILE *fp, const char *fname)
 {
 	int ch, field, isdelim;

--=-=-=--

>Unformatted:
 --=-=-=