Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libc/regex Prevent regcomp/regexec DoS attacks by limiti...



details:   https://anonhg.NetBSD.org/src/rev/6234e6f056fe
branches:  trunk
changeset: 770252:6234e6f056fe
user:      christos <christos%NetBSD.org@localhost>
date:      Sun Oct 09 18:23:00 2011 +0000

description:
Prevent regcomp/regexec DoS attacks by limiting the amount of memory used
and the level of recursion. Thanks to Maksymilian Arciemowicz for discovery
and help with the implementation.

diffstat:

 lib/libc/regex/engine.c  |    6 +-
 lib/libc/regex/regcomp.c |  125 ++++++++++++++++++++++++++++++----------------
 lib/libc/regex/regex2.h  |   16 +++---
 3 files changed, 92 insertions(+), 55 deletions(-)

diffs (truncated from 457 to 300 lines):

diff -r b8eb09dd922d -r 6234e6f056fe lib/libc/regex/engine.c
--- a/lib/libc/regex/engine.c   Sun Oct 09 18:21:08 2011 +0000
+++ b/lib/libc/regex/engine.c   Sun Oct 09 18:23:00 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: engine.c,v 1.22 2009/02/12 05:06:54 lukem Exp $        */
+/*     $NetBSD: engine.c,v 1.23 2011/10/09 18:23:00 christos Exp $     */
 
 /*-
  * Copyright (c) 1992, 1993, 1994
@@ -212,8 +212,8 @@
        /* prescreening; this does wonders for this rather slow code */
        if (g->must != NULL) {
                for (dp = start; dp < stop; dp++)
-                       if (*dp == g->must[0] && stop - dp >= g->mlen &&
-                               memcmp(dp, g->must, (size_t)g->mlen) == 0)
+                       if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
+                               memcmp(dp, g->must, g->mlen) == 0)
                                break;
                if (dp == stop)         /* we didn't find g->must */
                        return(REG_NOMATCH);
diff -r b8eb09dd922d -r 6234e6f056fe lib/libc/regex/regcomp.c
--- a/lib/libc/regex/regcomp.c  Sun Oct 09 18:21:08 2011 +0000
+++ b/lib/libc/regex/regcomp.c  Sun Oct 09 18:23:00 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $       */
+/*     $NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $    */
 
 /*-
  * Copyright (c) 1992, 1993, 1994
@@ -76,7 +76,7 @@
 #if 0
 static char sccsid[] = "@(#)regcomp.c  8.5 (Berkeley) 3/20/94";
 #else
-__RCSID("$NetBSD: regcomp.c,v 1.29 2009/02/12 05:06:54 lukem Exp $");
+__RCSID("$NetBSD: regcomp.c,v 1.30 2011/10/09 18:23:00 christos Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 
@@ -125,11 +125,11 @@
 #endif
 
 /* === regcomp.c === */
-static void p_ere(struct parse *p, int stop);
-static void p_ere_exp(struct parse *p);
+static void p_ere(struct parse *p, int stop, size_t reclimit);
+static void p_ere_exp(struct parse *p, size_t reclimit);
 static void p_str(struct parse *p);
-static void p_bre(struct parse *p, int end1, int end2);
-static int p_simp_re(struct parse *p, int starordinary);
+static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
+static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
 static int p_count(struct parse *p);
 static void p_bracket(struct parse *p);
 static void p_b_term(struct parse *p, cset *cs);
@@ -141,7 +141,7 @@
 static void bothcases(struct parse *p, int ch);
 static void ordinary(struct parse *p, int ch);
 static void nonnewline(struct parse *p);
-static void repeat(struct parse *p, sopno start, int from, int to);
+static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
 static int seterr(struct parse *p, int e);
 static cset *allocset(struct parse *p);
 static void freeset(struct parse *p, cset *cs);
@@ -163,7 +163,7 @@
 static void doemit(struct parse *p, sop op, sopno opnd);
 static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
 static void dofwd(struct parse *p, sopno pos, sopno value);
-static void enlarge(struct parse *p, sopno size);
+static int enlarge(struct parse *p, sopno size);
 static void stripsnug(struct parse *p, struct re_guts *g);
 static void findmust(struct parse *p, struct re_guts *g);
 static sopno pluscount(struct parse *p, struct re_guts *g);
@@ -211,6 +211,13 @@
 #define        never   0               /* some <assert.h>s have bugs too */
 #endif
 
+#define        MEMLIMIT        0x8000000
+#define MEMSIZE(p) \
+       ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
+       (p)->ncsalloc * sizeof(cset) + \
+       (p)->ssize * sizeof(sop))
+#define        RECLIMIT        256
+
 /*
  - regcomp - interface for parser and compilation
  = extern int regcomp(regex_t *, const char *, int);
@@ -260,7 +267,7 @@
        if (g == NULL)
                return(REG_ESPACE);
        p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
-       p->strip = (sop *)malloc(p->ssize * sizeof(sop));
+       p->strip = malloc(p->ssize * sizeof(sop));
        p->slen = 0;
        if (p->strip == NULL) {
                free(g);
@@ -297,11 +304,11 @@
        EMIT(OEND, 0);
        g->firststate = THERE();
        if (cflags&REG_EXTENDED)
-               p_ere(p, OUT);
+               p_ere(p, OUT, 0);
        else if (cflags&REG_NOSPEC)
                p_str(p);
        else
-               p_bre(p, OUT, OUT);
+               p_bre(p, OUT, OUT, 0);
        EMIT(OEND, 0);
        g->laststate = THERE();
 
@@ -328,12 +335,13 @@
 
 /*
  - p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(struct parse *p, int stop);
+ == static void p_ere(struct parse *p, int stop, size_t reclimit);
  */
 static void
 p_ere(
     struct parse *p,
-    int stop)                  /* character this ERE should end at */
+    int stop,                  /* character this ERE should end at */
+    size_t reclimit)
 {
        char c;
        sopno prevback = 0;     /* pacify gcc */
@@ -343,11 +351,16 @@
 
        _DIAGASSERT(p != NULL);
 
+       if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+               p->error = REG_ESPACE;
+               return;
+       }
+
        for (;;) {
                /* do a bunch of concatenated expressions */
                conc = HERE();
                while (MORE() && (c = PEEK()) != '|' && c != stop)
-                       p_ere_exp(p);
+                       p_ere_exp(p, reclimit);
                REQUIRE(HERE() != conc, REG_EMPTY);     /* require nonempty */
 
                if (!EAT('|'))
@@ -376,11 +389,12 @@
 
 /*
  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- == static void p_ere_exp(struct parse *p);
+ == static void p_ere_exp(struct parse *p, size_t reclimit);
  */
 static void
 p_ere_exp(
-    struct parse *p)
+    struct parse *p,
+    size_t reclimit)
 {
        char c;
        sopno pos;
@@ -404,7 +418,7 @@
                        p->pbegin[subno] = HERE();
                EMIT(OLPAREN, subno);
                if (!SEE(')'))
-                       p_ere(p, ')');
+                       p_ere(p, ')', reclimit);
                if (subno < NPAREN) {
                        p->pend[subno] = HERE();
                        assert(p->pend[subno] != 0);
@@ -506,7 +520,7 @@
                                count2 = INFINITY;
                } else          /* just a single number */
                        count2 = count;
-               repeat(p, pos, count, count2);
+               repeat(p, pos, count, count2, 0);
                if (!EAT('}')) {        /* error heuristics */
                        while (MORE() && PEEK() != '}')
                                NEXT();
@@ -544,7 +558,7 @@
 /*
  - p_bre - BRE parser top level, anchoring and concatenation
  == static void p_bre(struct parse *p, int end1, \
- ==    int end2);
+ ==    int end2, size_t reclimit);
  * Giving end1 as OUT essentially eliminates the end1/end2 check.
  *
  * This implementation is a bit of a kludge, in that a trailing $ is first
@@ -557,7 +571,8 @@
 p_bre(
     struct parse *p,
     int end1,          /* first terminating character */
-    int end2)          /* second terminating character */
+    int end2,          /* second terminating character */
+    size_t reclimit)
 {
        sopno start;
        int first = 1;                  /* first subexpression? */
@@ -565,6 +580,11 @@
 
        _DIAGASSERT(p != NULL);
 
+       if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+               p->error = REG_ESPACE;
+               return;
+       }
+
        start = HERE();
 
        if (EAT('^')) {
@@ -573,7 +593,7 @@
                p->g->nbol++;
        }
        while (MORE() && !SEETWO(end1, end2)) {
-               wasdollar = p_simp_re(p, first);
+               wasdollar = p_simp_re(p, first, reclimit);
                first = 0;
        }
        if (wasdollar) {        /* oops, that was a trailing anchor */
@@ -588,12 +608,13 @@
 
 /*
  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(struct parse *p, int starordinary);
+ == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
  */
 static int                     /* was the simple RE an unbackslashed $? */
 p_simp_re(
     struct parse *p,
-    int starordinary)          /* is a leading * an ordinary character? */
+    int starordinary,          /* is a leading * an ordinary character? */
+    size_t reclimit)
 {
        int c;
        int count;
@@ -634,7 +655,7 @@
                EMIT(OLPAREN, subno);
                /* the MORE here is an error heuristic */
                if (MORE() && !SEETWO('\\', ')'))
-                       p_bre(p, '\\', ')');
+                       p_bre(p, '\\', ')', reclimit);
                if (subno < NPAREN) {
                        p->pend[subno] = HERE();
                        assert(p->pend[subno] != 0);
@@ -693,7 +714,7 @@
                                count2 = INFINITY;
                } else          /* just a single number */
                        count2 = count;
-               repeat(p, pos, count, count2);
+               repeat(p, pos, count, count2, 0);
                if (!EATTWO('\\', '}')) {       /* error heuristics */
                        while (MORE() && !SEETWO('\\', '}'))
                                NEXT();
@@ -745,6 +766,8 @@
        _DIAGASSERT(p != NULL);
 
        cs = allocset(p);
+       if (cs == NULL)
+               return;
 
        /* Dept of Truly Sickening Special-Case Kludges */
        if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
@@ -1101,14 +1124,16 @@
 
 /*
  - repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(struct parse *p, sopno start, int from, int to);
+ == static void repeat(struct parse *p, sopno start, int from, int to,
+ == size_t reclimit);
  */
 static void
 repeat(
     struct parse *p,
     sopno start,               /* operand from here to end of strip */
     int from,                  /* repeated from this number */
-    int to)                    /* to this number of times (maybe INFINITY) */
+    int to,                    /* to this number of times (maybe INFINITY) */
+    size_t reclimit)
 {
        sopno finish;
 #      define  N       2
@@ -1119,10 +1144,12 @@
 
        _DIAGASSERT(p != NULL);
 
-       finish = HERE();
+       if (reclimit++ > RECLIMIT) 
+               p->error = REG_ESPACE;
+       if (p->error)
+               return;
 
-       if (p->error != 0)      /* head off possible runaway recursion */
-               return;
+       finish = HERE();
 
        assert(from <= to);
 
@@ -1135,7 +1162,7 @@
        case REP(0, INF):               /* as x{1,}? */
                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
                INSERT(OCH_, start);            /* offset is wrong... */
-               repeat(p, start+1, 1, to);
+               repeat(p, start+1, 1, to, reclimit);



Home | Main Index | Thread Index | Old Index