Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/grep Replace usr.bin/grep with the BSD grep implemen...



details:   https://anonhg.NetBSD.org/src/rev/da6240cbf8d7
branches:  trunk
changeset: 762111:da6240cbf8d7
user:      joerg <joerg%NetBSD.org@localhost>
date:      Wed Feb 16 01:31:33 2011 +0000

description:
Replace usr.bin/grep with the BSD grep implementation from FreeBSD.

diffstat:

 usr.bin/grep/Makefile                |   27 +-
 usr.bin/grep/TODO                    |   33 -
 usr.bin/grep/binary.c                |   99 ---
 usr.bin/grep/fastgrep.c              |  333 +++++++++++++
 usr.bin/grep/file.c                  |  374 +++++++------
 usr.bin/grep/grep.1                  |  552 ++++++++++++++-------
 usr.bin/grep/grep.c                  |  889 ++++++++++++++++++++--------------
 usr.bin/grep/grep.h                  |  190 ++++---
 usr.bin/grep/mmfile.c                |  110 ----
 usr.bin/grep/nls/C.msg               |   13 +
 usr.bin/grep/nls/es_ES.ISO8859-1.msg |   13 +
 usr.bin/grep/nls/gl_ES.ISO8859-1.msg |   13 +
 usr.bin/grep/nls/hu_HU.ISO8859-2.msg |   13 +
 usr.bin/grep/nls/ja_JP.SJIS.msg      |   13 +
 usr.bin/grep/nls/ja_JP.UTF-8.msg     |   13 +
 usr.bin/grep/nls/ja_JP.eucJP.msg     |   13 +
 usr.bin/grep/nls/pt_BR.ISO8859-1.msg |   13 +
 usr.bin/grep/nls/ru_RU.KOI8-R.msg    |   13 +
 usr.bin/grep/nls/uk_UA.UTF-8.msg     |   12 +
 usr.bin/grep/nls/zh_CN.UTF-8.msg     |   13 +
 usr.bin/grep/queue.c                 |   92 +--
 usr.bin/grep/util.c                  |  505 ++++++++++++-------
 22 files changed, 2040 insertions(+), 1306 deletions(-)

diffs (truncated from 4019 to 300 lines):

diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/Makefile
--- a/usr.bin/grep/Makefile     Tue Feb 15 23:17:02 2011 +0000
+++ b/usr.bin/grep/Makefile     Wed Feb 16 01:31:33 2011 +0000
@@ -1,20 +1,37 @@
-# $NetBSD: Makefile,v 1.3 2009/04/14 22:15:20 lukem Exp $
+#      $NetBSD: Makefile,v 1.4 2011/02/16 01:31:33 joerg Exp $
+#      $FreeBSD: head/usr.bin/grep/Makefile 210389 2010-07-22 19:11:57Z gabor $
+#      $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
 
 PROG=  grep
-SRCS=  binary.c file.c grep.c mmfile.c queue.c util.c
+SRCS=  fastgrep.c file.c grep.c queue.c util.c
 
-LINKS=  ${BINDIR}/grep ${BINDIR}/egrep \
+LINKS= ${BINDIR}/grep ${BINDIR}/egrep  \
        ${BINDIR}/grep ${BINDIR}/fgrep  \
        ${BINDIR}/grep ${BINDIR}/zgrep  \
        ${BINDIR}/grep ${BINDIR}/zegrep \
        ${BINDIR}/grep ${BINDIR}/zfgrep
 
-MLINKS= grep.1 egrep.1 \
+MLINKS=        grep.1 egrep.1  \
        grep.1 fgrep.1  \
        grep.1 zgrep.1  \
        grep.1 zegrep.1 \
        grep.1 zfgrep.1
 
-LDADD=  -lz
+LDADD= -lz -lbz2
+DPADD= ${LIBZ} ${LIBBZ2}
+
+.PATH: ${.CURDIR}/nls
+
+NLS=   C.msg \
+       es_ES.ISO8859-1.msg \
+       gl_ES.ISO8859-1.msg \
+       hu_HU.ISO8859-2.msg \
+       ja_JP.eucJP.msg \
+       ja_JP.SJIS.msg \
+       ja_JP.UTF-8.msg \
+       pt_BR.ISO8859-1.msg \
+       ru_RU.KOI8-R.msg \
+       uk_UA.UTF-8.msg \
+       zh_CN.UTF-8.msg
 
 .include <bsd.prog.mk>
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/TODO
--- a/usr.bin/grep/TODO Tue Feb 15 23:17:02 2011 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-
-$NetBSD: TODO,v 1.4 2006/04/08 23:56:39 wiz Exp $
-
-Hopefully this program can become a full drop-in replacement for 
-GNU grep. If you want to help out, please let me (cjep@) know so that 
-we can organise our efforts efficiently.
-
-1. Add functionality and maybe change flags to match GNU grep. 
-  * --include,--exclude
-       possibly use code from pax for this.
-
-2. Binary file detection needs to be better (as currently this grep thinks
-its own source code is binary...). This implementation looks at the 
-first few bytes to determine whether a file is binary. GNU grep seems 
-to search for a byte worth 0 or 128 (depending on -z). 
-
-(3. Merge in improvements from OpenBSD. Mostly done. Main improvement left
-    is the speed up for simple regex's.)
-
-(4. Make code style more consistent with the NetBSD source tree. Have done
-    a few fixes. Could probably do with more.)
-
-5. Maybe revisit symbolic link handling and -S, -P.
-
-6. Sort out any performance issues, e.g. 
-    i) this is slower than GNU grep;
-   ii) we probably stat wastefully.
-
-7. Fix the manual page.
-
-8. Possible look at regex libc speedups from FreeBSD.
-
-9. The -v option is currently broken (i.e. does not invert the logic).
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/binary.c
--- a/usr.bin/grep/binary.c     Tue Feb 15 23:17:02 2011 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-/*     $NetBSD: binary.c,v 1.3 2005/04/22 21:02:42 christos Exp $      */
-
-/*-
- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-#include <sys/cdefs.h>
-#ifndef lint
-__RCSID("$NetBSD: binary.c,v 1.3 2005/04/22 21:02:42 christos Exp $");
-#endif /* not lint */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <zlib.h>
-
-#include "grep.h"
-
-#define BUFFER_SIZE 128
-
-static inline int
-okchar(unsigned char c)
-{
-       return isprint(c) || isspace(c) || c == line_endchar;
-}
-
-int
-bin_file(FILE *f)
-{
-       unsigned char buf[BUFFER_SIZE];
-       size_t i, m;
-
-       if (fseek(f, 0L, SEEK_SET) == -1)
-               return 0;
-
-       if ((m = fread(buf, 1, BUFFER_SIZE, f)) == 0)
-               return 0;
-
-       for (i = 0; i < m; i++)
-               if (!okchar(buf[i]))
-                       return 1; 
-       
-       rewind(f);
-       return 0;
-}
-
-int
-gzbin_file(gzFile *f)
-{
-       unsigned char buf[BUFFER_SIZE];
-       int i, m;
-
-       if (gzseek(f, 0L, SEEK_SET) == -1)
-               return 0;
-
-       if ((m = gzread(f, buf, BUFFER_SIZE)) <= 0)
-               return 0;
-
-       for (i = 0; i < m; i++)
-               if (!okchar(buf[i]))
-                       return 1;
-
-       gzrewind(f);
-       return 0;
-}
-
-int
-mmbin_file(mmf_t *f)
-{
-       size_t i;
-       /* XXX knows too much about mmf internals */
-       for (i = 0; i < BUFFER_SIZE && i < f->len; i++)
-               if (!okchar(f->base[i]))
-                       return 1;
-       mmrewind(f);
-       return 0;
-}
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/fastgrep.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.bin/grep/fastgrep.c   Wed Feb 16 01:31:33 2011 +0000
@@ -0,0 +1,333 @@
+/*     $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $  */
+/*     $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */
+
+/*-
+ * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
+ * Copyright (C) 2008 Gabor Kovesdan <gabor%FreeBSD.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * XXX: This file is a speed up for grep to cover the defects of the
+ * regex library.  These optimizations should practically be implemented
+ * there keeping this code clean.  This is a future TODO, but for the
+ * meantime, we need to use this workaround.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: fastgrep.c,v 1.1 2011/02/16 01:31:33 joerg Exp $");
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "grep.h"
+
+static inline int      grep_cmp(const unsigned char *, const unsigned char *, size_t);
+static inline void     grep_revstr(unsigned char *, int);
+
+void
+fgrepcomp(fastgrep_t *fg, const char *pat)
+{
+       unsigned int i;
+
+       /* Initialize. */
+       fg->len = strlen(pat);
+       fg->bol = false;
+       fg->eol = false;
+       fg->reversed = false;
+
+       fg->pattern = grep_malloc(strlen(pat) + 1);
+       strcpy(fg->pattern, pat);
+
+       /* Preprocess pattern. */
+       for (i = 0; i <= UCHAR_MAX; i++)
+               fg->qsBc[i] = fg->len;
+       for (i = 1; i < fg->len; i++)
+               fg->qsBc[fg->pattern[i]] = fg->len - i;
+}
+
+/*
+ * Returns: -1 on failure, 0 on success
+ */
+int
+fastcomp(fastgrep_t *fg, const char *pat)
+{
+       unsigned int i;
+       int firstHalfDot = -1;
+       int firstLastHalfDot = -1;
+       int hasDot = 0;
+       int lastHalfDot = 0;
+       int shiftPatternLen;
+       bool bol = false;
+       bool eol = false;
+
+       /* Initialize. */
+       fg->len = strlen(pat);
+       fg->bol = false;
+       fg->eol = false;
+       fg->reversed = false;
+
+       /* Remove end-of-line character ('$'). */
+       if (fg->len > 0 && pat[fg->len - 1] == '$') {
+               eol = true;
+               fg->eol = true;
+               fg->len--;
+       }
+
+       /* Remove beginning-of-line character ('^'). */
+       if (pat[0] == '^') {
+               bol = true;
+               fg->bol = true;
+               fg->len--;
+       }
+
+       if (fg->len >= 14 &&
+           strncmp(pat + (fg->bol ? 1 : 0), "[[:<:]]", 7) == 0 &&



Home | Main Index | Thread Index | Old Index