Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/less/less Nuke unused V8 regexp.



details:   https://anonhg.NetBSD.org/src/rev/e8742d117bad
branches:  trunk
changeset: 475960:e8742d117bad
user:      kleink <kleink%NetBSD.org@localhost>
date:      Tue Aug 31 21:05:07 1999 +0000

description:
Nuke unused V8 regexp.

diffstat:

 usr.bin/less/less/regexp.c |  1237 --------------------------------------------
 usr.bin/less/less/regexp.h |    35 -
 2 files changed, 0 insertions(+), 1272 deletions(-)

diffs (truncated from 1280 to 300 lines):

diff -r 04d341739413 -r e8742d117bad usr.bin/less/less/regexp.c
--- a/usr.bin/less/less/regexp.c        Tue Aug 31 13:58:58 1999 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1237 +0,0 @@
-/*     $NetBSD: regexp.c,v 1.1.1.4 1999/04/06 05:30:37 mrg Exp $       */
-
-/*
- * regcomp and regexec -- regsub and regerror are elsewhere
- *
- *     Copyright (c) 1986 by University of Toronto.
- *     Written by Henry Spencer.  Not derived from licensed software.
- *
- *     Permission is granted to anyone to use this software for any
- *     purpose on any computer system, and to redistribute it freely,
- *     subject to the following restrictions:
- *
- *     1. The author is not responsible for the consequences of use of
- *             this software, no matter how awful, even if they arise
- *             from defects in it.
- *
- *     2. The origin of this software must not be misrepresented, either
- *             by explicit claim or by omission.
- *
- *     3. Altered versions must be plainly marked as such, and must not
- *             be misrepresented as being the original software.
- *
- * Beware that some of this code is subtly aware of the way operator
- * precedence is structured in regular expressions.  Serious changes in
- * regular-expression syntax might require a total rethink.
- *
- * *** NOTE: this code has been altered slightly for use in Tcl. ***
- * Slightly modified by David MacKenzie to undo most of the changes for TCL.
- */
-
-#include "less.h"
-#if HAVE_STDIO_H
-#include <stdio.h>
-#endif
-#if STDC_HEADERS
-#include <stdlib.h>
-#include <string.h>
-#endif
-#include "regexp.h"
-
-/*
- * The "internal use only" fields in regexp.h are present to pass info from
- * compile to execute that permits the execute phase to run lots faster on
- * simple cases.  They are:
- *
- * regstart    char that must begin a match; '\0' if none obvious
- * reganch     is the match anchored (at beginning-of-line only)?
- * regmust     string (pointer into program) that match must include, or NULL
- * regmlen     length of regmust string
- *
- * Regstart and reganch permit very fast decisions on suitable starting points
- * for a match, cutting down the work a lot.  Regmust permits fast rejection
- * of lines that cannot possibly match.  The regmust tests are costly enough
- * that regcomp() supplies a regmust only if the r.e. contains something
- * potentially expensive (at present, the only such thing detected is * or +
- * at the start of the r.e., which can involve a lot of backup).  Regmlen is
- * supplied because the test in regexec() needs it and regcomp() is
- * computing it anyway.
- */
-
-/*
- * Structure for regexp "program".  This is essentially a linear encoding
- * of a nondeterministic finite-state machine (aka syntax charts or
- * "railroad normal form" in parsing technology).  Each node is an opcode
- * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
- * all nodes except BRANCH implement concatenation; a "next" pointer with
- * a BRANCH on both ends of it is connecting two alternatives.  (Here we
- * have one of the subtle syntax dependencies:  an individual BRANCH (as
- * opposed to a collection of them) is never concatenated with anything
- * because of operator precedence.)  The operand of some types of node is
- * a literal string; for others, it is a node leading into a sub-FSM.  In
- * particular, the operand of a BRANCH node is the first node of the branch.
- * (NB this is *not* a tree structure:  the tail of the branch connects
- * to the thing following the set of BRANCHes.)  The opcodes are:
- */
-
-/* definition  number  opnd?   meaning */
-#undef EOL
-#define        END     0       /* no   End of program. */
-#define        BOL     1       /* no   Match "" at beginning of line. */
-#define        EOL     2       /* no   Match "" at end of line. */
-#define        ANY     3       /* no   Match any one character. */
-#define        ANYOF   4       /* str  Match any character in this string. */
-#define        ANYBUT  5       /* str  Match any character not in this string. */
-#define        BRANCH  6       /* node Match this alternative, or the next... */
-#define        BACK    7       /* no   Match "", "next" ptr points backward. */
-#define        EXACTLY 8       /* str  Match this string. */
-#define        NOTHING 9       /* no   Match empty string. */
-#define        STAR    10      /* node Match this (simple) thing 0 or more times. */
-#define        PLUS    11      /* node Match this (simple) thing 1 or more times. */
-#define        OPEN    20      /* no   Mark this point in input as start of #n. */
-                       /*      OPEN+1 is number 1, etc. */
-#define        CLOSE   30      /* no   Analogous to OPEN. */
-
-/*
- * Opcode notes:
- *
- * BRANCH      The set of branches constituting a single choice are hooked
- *             together with their "next" pointers, since precedence prevents
- *             anything being concatenated to any individual branch.  The
- *             "next" pointer of the last BRANCH in a choice points to the
- *             thing following the whole choice.  This is also where the
- *             final "next" pointer of each individual branch points; each
- *             branch starts with the operand node of a BRANCH node.
- *
- * BACK                Normal "next" pointers all implicitly point forward; BACK
- *             exists to make loop structures possible.
- *
- * STAR,PLUS   '?', and complex '*' and '+', are implemented as circular
- *             BRANCH structures using BACK.  Simple cases (one character
- *             per match) are implemented with STAR and PLUS for speed
- *             and to minimize recursive plunges.
- *
- * OPEN,CLOSE  ...are numbered at compile time.
- */
-
-/*
- * A node is one char of opcode followed by two chars of "next" pointer.
- * "Next" pointers are stored as two 8-bit pieces, high order first.  The
- * value is a positive offset from the opcode of the node containing it.
- * An operand, if any, simply follows the node.  (Note that much of the
- * code generation knows about this implicit relationship.)
- *
- * Using two bytes for the "next" pointer is vast overkill for most things,
- * but allows patterns to get big without disasters.
- */
-#define        OP(p)   (*(p))
-#define        NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
-#define        OPERAND(p)      ((p) + 3)
-
-/*
- * See regmagic.h for one further detail of program structure.
- */
-
-
-/*
- * Utility definitions.
- */
-#ifndef CHARBITS
-#define        UCHARAT(p)      ((int)*(unsigned char *)(p))
-#else
-#define        UCHARAT(p)      ((int)*(p)&CHARBITS)
-#endif
-
-#define        FAIL(m) { regerror(m); return(NULL); }
-#define        ISMULT(c)       ((c) == '*' || (c) == '+' || (c) == '?')
-#define        META    "^$.[()|?+*\\"
-
-/*
- * Flags to be passed up and down.
- */
-#define        HASWIDTH        01      /* Known never to match null string. */
-#define        SIMPLE          02      /* Simple enough to be STAR/PLUS operand. */
-#define        SPSTART         04      /* Starts with * or +. */
-#define        WORST           0       /* Worst case. */
-
-/*
- * Global work variables for regcomp().
- */
-static char *regparse;         /* Input-scan pointer. */
-static int regnpar;            /* () count. */
-static char regdummy;
-static char *regcode;          /* Code-emit pointer; &regdummy = don't. */
-static long regsize;           /* Code size. */
-
-/*
- * The first byte of the regexp internal "program" is actually this magic
- * number; the start node begins in the second byte.
- */
-#define        MAGIC   0234
-
-
-/*
- * Forward declarations for regcomp()'s friends.
- */
-#ifndef STATIC
-#define        STATIC  static
-#endif
-STATIC char *reg();
-STATIC char *regbranch();
-STATIC char *regpiece();
-STATIC char *regatom();
-STATIC char *regnode();
-STATIC char *regnext();
-STATIC void regc();
-STATIC void reginsert();
-STATIC void regtail();
-STATIC void regoptail();
-#ifdef STRCSPN
-STATIC int strcspn();
-#endif
-
-/*
- - regcomp - compile a regular expression into internal code
- *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code.  So we cheat:  we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it.  (Note that it has to be in
- * one piece because free() must be able to free it all.)
- *
- * Beware that the optimization-preparation code in here knows about some
- * of the structure of the compiled regexp.
- */
-regexp *
-regcomp(exp)
-char *exp;
-{
-       register regexp *r;
-       register char *scan;
-       register char *longest;
-       register int len;
-       int flags;
-
-       if (exp == NULL)
-               FAIL("NULL argument");
-
-       /* First pass: determine size, legality. */
-       regparse = exp;
-       regnpar = 1;
-       regsize = 0L;
-       regcode = &regdummy;
-       regc(MAGIC);
-       if (reg(0, &flags) == NULL)
-               return(NULL);
-
-       /* Small enough for pointer-storage convention? */
-       if (regsize >= 32767L)          /* Probably could be 65535L. */
-               FAIL("regexp too big");
-
-       /* Allocate space. */
-       r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
-       if (r == NULL)
-               FAIL("out of space");
-
-       /* Second pass: emit code. */
-       regparse = exp;
-       regnpar = 1;
-       regcode = r->program;
-       regc(MAGIC);
-       if (reg(0, &flags) == NULL)
-               return(NULL);
-
-       /* Dig out information for optimizations. */
-       r->regstart = '\0';     /* Worst-case defaults. */
-       r->reganch = 0;
-       r->regmust = NULL;
-       r->regmlen = 0;
-       scan = r->program+1;                    /* First BRANCH. */
-       if (OP(regnext(scan)) == END) {         /* Only one top-level choice. */
-               scan = OPERAND(scan);
-
-               /* Starting-point info. */
-               if (OP(scan) == EXACTLY)
-                       r->regstart = *OPERAND(scan);
-               else if (OP(scan) == BOL)
-                       r->reganch++;
-
-               /*
-                * If there's something expensive in the r.e., find the
-                * longest literal string that must appear and make it the
-                * regmust.  Resolve ties in favor of later strings, since
-                * the regstart check works with the beginning of the r.e.
-                * and avoiding duplication strengthens checking.  Not a
-                * strong reason, but sufficient in the absence of others.
-                */
-               if (flags&SPSTART) {
-                       longest = NULL;
-                       len = 0;
-                       for (; scan != NULL; scan = regnext(scan))
-                               if (OP(scan) == EXACTLY && ((int) strlen(OPERAND(scan))) >= len) {
-                                       longest = OPERAND(scan);
-                                       len = strlen(OPERAND(scan));
-                               }
-                       r->regmust = longest;
-                       r->regmlen = len;
-               }
-       }
-
-       return(r);
-}
-
-/*
- - reg - regular expression, i.e. main body or parenthesized thing
- *
- * Caller must absorb opening parenthesis.
- *
- * Combining parenthesis handling with the base level of regular expression
- * is a trifle forced, but the need to tie the tails of the branches to what
- * follows makes it hard to avoid.
- */
-static char *
-reg(paren, flagp)



Home | Main Index | Thread Index | Old Index