Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src add reg{,a}sub



details:   https://anonhg.NetBSD.org/src/rev/e67a8848c8f6
branches:  trunk
changeset: 342928:e67a8848c8f6
user:      christos <christos%NetBSD.org@localhost>
date:      Thu Jan 14 20:41:47 2016 +0000

description:
add reg{,a}sub

diffstat:

 include/regex.h             |    6 +-
 lib/libc/regex/Makefile.inc |    6 +-
 lib/libc/regex/regex.3      |   59 +++++++++++++++-
 lib/libc/regex/regsub.c     |  162 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 226 insertions(+), 7 deletions(-)

diffs (truncated from 308 to 300 lines):

diff -r c4c99e76eea5 -r e67a8848c8f6 include/regex.h
--- a/include/regex.h   Thu Jan 14 20:41:23 2016 +0000
+++ b/include/regex.h   Thu Jan 14 20:41:47 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: regex.h,v 1.13 2005/09/13 01:44:32 christos Exp $      */
+/*     $NetBSD: regex.h,v 1.14 2016/01/14 20:42:03 christos Exp $      */
 
 /*-
  * Copyright (c) 1992, 1993
@@ -137,6 +137,10 @@
 int    regexec(const regex_t * __restrict,
            const char * __restrict, size_t, regmatch_t [], int);
 void   regfree(regex_t *);
+#ifdef _NETBSD_SOURCE
+ssize_t regsub(char *, size_t, const char *, const regmatch_t *, const char *);
+ssize_t regasub(char **buf, const char *, const regmatch_t *, const char *);
+#endif
 __END_DECLS
 
 #endif /* !_REGEX_H_ */
diff -r c4c99e76eea5 -r e67a8848c8f6 lib/libc/regex/Makefile.inc
--- a/lib/libc/regex/Makefile.inc       Thu Jan 14 20:41:23 2016 +0000
+++ b/lib/libc/regex/Makefile.inc       Thu Jan 14 20:41:47 2016 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile.inc,v 1.7 1997/11/14 02:04:46 mrg Exp $
+#      $NetBSD: Makefile.inc,v 1.8 2016/01/14 20:41:47 christos Exp $
 #      @(#)Makefile.inc        8.1 (Berkeley) 6/4/93
 
 # regex sources
@@ -6,9 +6,9 @@
 
 CPPFLAGS+=-DPOSIX_MISTAKE
 
-SRCS+= regcomp.c regerror.c regexec.c regfree.c
+SRCS+= regcomp.c regerror.c regexec.c regfree.c regsub.c
 
 MAN+=  regex.3 re_format.7
 
 MLINKS+=regex.3 regcomp.3 regex.3 regexec.3 regex.3 regerror.3 \
-       regex.3 regfree.3
+       regex.3 regfree.3 regex.3 regsub.3 regex.3 regasub.3
diff -r c4c99e76eea5 -r e67a8848c8f6 lib/libc/regex/regex.3
--- a/lib/libc/regex/regex.3    Thu Jan 14 20:41:23 2016 +0000
+++ b/lib/libc/regex/regex.3    Thu Jan 14 20:41:47 2016 +0000
@@ -1,4 +1,4 @@
-.\"    $NetBSD: regex.3,v 1.22 2011/05/17 03:35:38 enami Exp $
+.\"    $NetBSD: regex.3,v 1.23 2016/01/14 20:41:47 christos Exp $
 .\"
 .\" Copyright (c) 1992, 1993, 1994
 .\"    The Regents of the University of California.  All rights reserved.
@@ -65,7 +65,7 @@
 .\"
 .\"    @(#)regex.3     8.4 (Berkeley) 3/20/94
 .\"
-.Dd December 29, 2003
+.Dd January 8, 2016
 .Dt REGEX 3
 .Os
 .Sh NAME
@@ -73,7 +73,9 @@
 .Nm regcomp ,
 .Nm regexec ,
 .Nm regerror ,
-.Nm regfree
+.Nm regfree ,
+.Nm regasub ,
+.Nm regsub
 .Nd regular-expression library
 .Sh LIBRARY
 .Lb libc
@@ -87,6 +89,10 @@
 .Fn regerror "int errcode" "const regex_t * restrict preg" "char * restrict errbuf" "size_t errbuf_size"
 .Ft void
 .Fn regfree "regex_t *preg"
+.Ft ssize_t
+.Fn regsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str"
+.Ft ssize_t
+.Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr"
 .Sh DESCRIPTION
 These routines implement
 .St -p1003.2-92
@@ -466,6 +472,46 @@
 None of these functions references global variables except for tables
 of constants;
 all are safe for use from multiple threads if the arguments are safe.
+.Pp
+The
+.Fn regsub
+and
+.Fn regasub
+functions perform substitutions using
+.Xr sed 1
+like syntax.
+They return the length of the string that would have been created
+if there was enough space or
+.Dv \-1
+on error, setting
+.Dv errno .
+The result
+is being placed in
+.Fa buf
+which is user-supplied in
+.Fn regsub
+and dynamically allocated in
+.Fn regasub .
+The
+.Fa sub
+argument contains a substitution string which might refer to the first
+9 regular expression strings using
+.Dq \e<n>
+to refer to the nth matched
+item, or
+.Dq &
+(which is equivalent to
+.Dq \e0 )
+to refer to the full match.
+The
+.Fa rm
+array must be at least 10 elements long, and should contain the result
+of the matches from a previous
+.Fn regexec
+call.
+The
+.Fa str
+argument contains the source string to apply the transformation to.
 .Sh IMPLEMENTATION CHOICES
 There are a number of decisions that
 .St -p1003.2-92
@@ -576,6 +622,13 @@
 Altered for inclusion in the
 .Bx 4.4
 distribution.
+.Pp
+The
+.Fn regsub
+and
+.Fn regasub
+functions appeared in
+.Nx 8 .
 .Sh BUGS
 There is one known functionality bug.
 The implementation of internationalization is incomplete:
diff -r c4c99e76eea5 -r e67a8848c8f6 lib/libc/regex/regsub.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/regex/regsub.c   Thu Jan 14 20:41:47 2016 +0000
@@ -0,0 +1,162 @@
+/*     $NetBSD: regsub.c,v 1.1 2016/01/14 20:41:47 christos Exp $      */
+
+/*-
+ * Copyright (c) 2015 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: regsub.c,v 1.1 2016/01/14 20:41:47 christos Exp $");
+
+#include <sys/param.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+
+struct str {
+       char *s_ptr;
+       size_t s_max;
+       size_t s_len;
+       int s_fixed;
+};
+
+#define        REINCR  64
+
+static int
+addspace(struct str *s, size_t len)
+{
+       void *v;
+
+       if (s->s_max - s->s_len > len)
+               return 0;
+
+       if (s->s_fixed)
+               return -1;
+
+       s->s_max += len + REINCR;
+
+       v = realloc(s->s_ptr, s->s_max);
+       if (v == NULL)
+               return -1;
+       s->s_ptr = v;
+
+       return 0;
+}
+
+static void
+addchar(struct str *s, int c)
+{
+       if (addspace(s, 1) == -1)
+               s->s_len++;
+       else
+               s->s_ptr[s->s_len++] = c;
+       if (c == 0) {
+               --s->s_len;
+               s->s_ptr[s->s_max - 1] = c;     
+       }
+}
+
+static void
+addnstr(struct str *s, const char *buf, size_t len)
+{
+       if (addspace(s, len) != -1)
+               memcpy(s->s_ptr + s->s_len, buf, len);
+       s->s_len += len;
+}
+
+static int
+initstr(struct str *s, char *buf, size_t len)
+{
+       s->s_max = len;
+       s->s_ptr = buf == NULL ? malloc(len) : buf;
+       s->s_fixed = buf != NULL;
+       s->s_len = 0;
+       return s->s_ptr == NULL ? -1 : 0;
+}
+
+static ssize_t
+regsub1(char **buf, size_t len, const char *sub,
+    const regmatch_t *rm, const char *str)
+{
+        ssize_t i;
+        char c; 
+       struct str s;
+
+       if (initstr(&s, *buf, len) == -1)
+               return -1;
+
+        while ((c = *sub++) != '\0') {
+
+               switch (c) {
+               case '&':
+                       i = 0;
+                       break;
+               case '\\':
+                       if (isdigit((unsigned char)*sub))
+                               i = *sub++ - '0';
+                       else
+                               i = -1;
+                       break;
+               default:
+                       i = -1;
+                       break;
+               }
+
+                if (i == -1) {
+                        if (c == '\\' && (*sub == '\\' || *sub == '&'))
+                                c = *sub++;
+                       addchar(&s, c);
+                } else if (rm[i].rm_so != -1 && rm[i].rm_eo != -1) {
+                        size_t l = (size_t)(rm[i].rm_eo - rm[i].rm_so);
+                       addnstr(&s, str + rm[i].rm_so, l);
+                }
+        }
+
+       addchar(&s, '\0');
+       if (!s.s_fixed) {
+               if (s.s_len >= s.s_max) {
+                       free(s.s_ptr);
+                       return -1;
+               }
+               *buf = s.s_ptr;
+       }
+       return s.s_len;
+}
+
+ssize_t
+regsub(char *buf, size_t len, const char *sub, const regmatch_t *rm,
+    const char *str)
+{
+       return regsub1(&buf, len, sub, rm, str);



Home | Main Index | Thread Index | Old Index