NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: bin/28126 (sed fails to match empty back-reference)



The following reply was made to PR bin/28126; it has been noted by GNATS.

From: dieter roelants <dieter.NetBSD%pandora.be@localhost>
To: gnats-bugs%NetBSD.org@localhost
Cc: 
Subject: Re: bin/28126 (sed fails to match empty back-reference)
Date: Sat, 17 May 2008 17:00:41 +0200

 I've been looking into this. The change that caused it is:
 
 
http://cvsweb.be.netbsd.org/cgi-bin/cvsweb.cgi/src/lib/libc/regex/engine.c.diff?r2=1.18&r1=1.17&f=H
 "Avoid infinite recursion on:
 
     echo "foo foo bar bar bar baz" | sed 's/\([^ ]*\)\( *\1\)*/\1/g'
 
 From OpenBSD."
 
 Wondering whether OpenBSD people had perhaps detected and fixed the
 problem, I noticed that they did, in response to this very same PR.
 
 The patch, adapted to NetBSD, follows. I have verified with a small C
 program (included after the patch) that it works (and returns the same
 results as PCRE).
 
 I should also note that the foo bar baz sed thing above returns the
 wrong result with the current libc:
 
 echo "foo foo bar bar bar baz" | sed 's/\([^ ]*\)\( *\1\)*/\1/g'
 foo bar baz
 echo "foo foo bar bar bar baz" | LD_PRELOAD=/usr/src/lib/libc/libc.so.12 sed 
's/\([^ ]*\)\( *\1\)*/\1/g'
 foobarbaz
 
 Index: regex/engine.c
 ===================================================================
 RCS file: /cvsroot/src/lib/libc/regex/engine.c,v
 retrieving revision 1.21
 diff -u -r1.21 engine.c
 --- regex/engine.c     8 Feb 2007 05:44:18 -0000       1.21
 +++ regex/engine.c     17 May 2008 14:47:45 -0000
 @@ -128,10 +128,11 @@
  /* === engine.c === */
  static int matcher(struct re_guts *g, const char *string, size_t nmatch, 
regmatch_t pmatch[], int eflags);
  static const char *dissect(struct match *m, const char *start, const char 
*stop, sopno startst, sopno stopst);
 -static const char *backref(struct match *m, const char *start, const char 
*stop, sopno startst, sopno stopst, sopno lev);
 +static const char *backref(struct match *m, const char *start, const char 
*stop, sopno startst, sopno stopst, sopno lev, int rec);
  static const char *fast(struct match *m, const char *start, const char *stop, 
sopno startst, sopno stopst);
  static const char *slow(struct match *m, const char *start, const char *stop, 
sopno startst, sopno stopst);
  static states step(struct re_guts *g, sopno start, sopno stop, states bef, 
int ch, states aft);
 +#define MAX_RECURSION 100
  #define       BOL     (OUT+1)
  #define       EOL     (BOL+1)
  #define       BOLEOL  (BOL+2)
 @@ -279,7 +280,7 @@
                                goto done;
                        }
                        NOTE("backref dissect");
 -                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
 +                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
                }
                if (dp != NULL)
                        break;
 @@ -302,7 +303,7 @@
                        }
  #endif
                        NOTE("backoff dissect");
 -                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
 +                      dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
                }
                assert(dp == NULL || dp == endp);
                if (dp != NULL)         /* found a shorter one */
 @@ -565,7 +566,8 @@
      const char *stop,
      sopno startst,
      sopno stopst,
 -    sopno lev)                        /* PLUS nesting level */
 +    sopno lev,                        /* PLUS nesting level */
 +    int rec)
  {
        int i;
        sopno ss;       /* start sop of current subRE */
 @@ -675,7 +677,7 @@
                        return(NULL);
                assert(m->pmatch[i].rm_so != (regoff_t)-1);
                len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
 -              if (len == 0)
 +              if (len == 0 && rec++ > MAX_RECURSION)
                        return(NULL);
                assert(stop - m->beginp >= len);
                if (sp > stop - len)
 @@ -685,28 +687,28 @@
                        return(NULL);
                while (m->g->strip[ss] != SOP(O_BACK, i))
                        ss++;
 -              return(backref(m, sp+len, stop, ss+1, stopst, lev));
 +              return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
  
        case OQUEST_:           /* to null or not */
 -              dp = backref(m, sp, stop, ss+1, stopst, lev);
 +              dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);     /* not */
 -              return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
 +              return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
  
        case OPLUS_:
                assert(m->lastpos != NULL);
                assert(lev+1 <= m->g->nplus);
                m->lastpos[lev+1] = sp;
 -              return(backref(m, sp, stop, ss+1, stopst, lev+1));
 +              return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
  
        case O_PLUS:
                if (sp == m->lastpos[lev])      /* last pass matched null */
 -                      return(backref(m, sp, stop, ss+1, stopst, lev-1));
 +                      return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
                /* try another pass */
                m->lastpos[lev] = sp;
 -              dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
 +              dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
                if (dp == NULL)
 -                      dp = backref(m, sp, stop, ss+1, stopst, lev-1);
 +                      dp = backref(m, sp, stop, ss+1, stopst, lev-1, rec);
                return(dp);
  
        case OCH_:              /* find the right one, if any */
 @@ -714,7 +716,7 @@
                esub = ss + OPND(s) - 1;
                assert(OP(m->g->strip[esub]) == OOR1);
                for (;;) {      /* find first matching branch */
 -                      dp = backref(m, sp, stop, ssub, esub, lev);
 +                      dp = backref(m, sp, stop, ssub, esub, lev, rec);
                        if (dp != NULL)
                                return(dp);
                        /* that one missed, try next one */
 @@ -735,7 +737,7 @@
                assert(0 < i && i <= m->g->nsub);
                offsave = m->pmatch[i].rm_so;
                m->pmatch[i].rm_so = sp - m->offp;
 -              dp = backref(m, sp, stop, ss+1, stopst, lev);
 +              dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);
                m->pmatch[i].rm_so = offsave;
 @@ -746,7 +748,7 @@
                assert(0 < i && i <= m->g->nsub);
                offsave = m->pmatch[i].rm_eo;
                m->pmatch[i].rm_eo = sp - m->offp;
 -              dp = backref(m, sp, stop, ss+1, stopst, lev);
 +              dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);
                m->pmatch[i].rm_eo = offsave;
 
 
 ======================================
 #include <inttypes.h>
 #ifdef _USE_PCRE
 #include <pcreposix.h>
 #else
 #include <regex.h>
 #endif
 #include <stdio.h>
 
 void
 testreg(regex_t *r, const char *s)
 {
        regmatch_t m[2];
        printf("%s: %d\t", s, regexec(r, s, 2, m, 0));
        printf("%jd - %jd\t", (intmax_t)(m[0].rm_so), (intmax_t)(m[0].rm_eo));
        printf("%jd - %jd\n", (intmax_t)(m[1].rm_so), (intmax_t)(m[1].rm_eo));
 }
 
 int
 main()
 {
        regex_t r;
        int ret;
        char err[1024];
 #ifdef _USE_PCRE
 #define TESTRE "foo(.*)bar\\1"
 #else
 #define TESTRE "foo\\(.*\\)bar\\1"
 #endif
 
        if ((ret = regcomp(&r, TESTRE, 0)) != 0) {
                regerror(ret, &r, err, 1024);
                fprintf(stderr, "RE error: %s\n", err);
                return 1;
        }
        testreg(&r, "foobar");
        testreg(&r, "foo_bar_");
        testreg(&r, "foobar_");
        testreg(&r, "foolbar_");
        return 0;
 }
 
 /*
 output:
 
 current libc:
 ./t_libc
 foobar: 1      0 - 0   0 - 577757282805521148
 foo_bar_: 0    0 - 8   3 - 4
 foobar_: 1     0 - 8   3 - 4
 foolbar_: 1    0 - 8   3 - 4
 
 patched libc:
 LD_PRELOAD=/usr/src/lib/libc/libc.so.12 ./t_libc
 foobar: 0      0 - 6   3 - 3
 foo_bar_: 0    0 - 8   3 - 4
 foobar_: 0     0 - 6   3 - 3
 foolbar_: 1    0 - 6   3 - 3
 
 pcre:
 LD_PRELOAD=/usr/pkg/lib/libpcreposix.so ./t_pcre
 foobar: 0      0 - 6   3 - 3
 foo_bar_: 0    0 - 8   3 - 4
 foobar_: 0     0 - 6   3 - 3
 foolbar_: 17   0 - 6   3 - 3
 */
 
 dieter
 


Home | Main Index | Thread Index | Old Index