pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/textproc/split-thai



Module Name:    pkgsrc
Committed By:   scole
Date:           Thu Aug 20 14:20:27 UTC 2020

Modified Files:
        pkgsrc/textproc/split-thai: Makefile
        pkgsrc/textproc/split-thai/files: st-icu.cc

Log Message:
Update to 0.7
- don't skip strings with numbers [0-9] for st-icu


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.2 -r1.3 pkgsrc/textproc/split-thai/files/st-icu.cc

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.6 pkgsrc/textproc/split-thai/Makefile:1.7
--- pkgsrc/textproc/split-thai/Makefile:1.6     Wed Aug 19 16:52:25 2020
+++ pkgsrc/textproc/split-thai/Makefile Thu Aug 20 14:20:27 2020
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.6 2020/08/19 16:52:25 scole Exp $
+# $NetBSD: Makefile,v 1.7 2020/08/20 14:20:27 scole Exp $
 
-PKGNAME=       split-thai-0.6
+PKGNAME=       split-thai-0.7
 CATEGORIES=    textproc
 MAINTAINER=    pkgsrc-users%NetBSD.org@localhost
 COMMENT=       Utilities to split UTF-8 Thai text into words

Index: pkgsrc/textproc/split-thai/files/st-icu.cc
diff -u pkgsrc/textproc/split-thai/files/st-icu.cc:1.2 pkgsrc/textproc/split-thai/files/st-icu.cc:1.3
--- pkgsrc/textproc/split-thai/files/st-icu.cc:1.2      Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/st-icu.cc  Thu Aug 20 14:20:27 2020
@@ -7,8 +7,8 @@
 #include <unicode/brkiter.h>
 #include <unicode/regex.h>
 #include <unicode/ucnv.h>
-#include <unicode/ustream.h>
 #include <unicode/ustdio.h>
+#include <unicode/ustream.h>
 
 using namespace std;
 using namespace icu;
@@ -18,6 +18,7 @@ using namespace icu;
 const UnicodeString thai_rexp = "[\\u0e00-\\u0e7f]+";
 const UnicodeString thai_consonant = "[\\u0e01-\\u0e2e]+";
 const UnicodeString thai_num_rexp = "[\\u0e50-\\u0e59]+";
+const UnicodeString number_rexp = "[0-9\\u0e50-\\u0e59]+";
 const UnicodeString thai_nonnum_rexp = "[\\u0e01-\\u0e4f\\u0e5a-\\u0e7f]+";
 
 void usage() {
@@ -56,7 +57,7 @@ bool matches_regexp(const UnicodeString 
 // add spaces to string with thai numbers
 UnicodeString space_thai_numbers(const UnicodeString &s) {
        // return string unmodified if no numbers
-       if ( ! matches_regexp(s, thai_num_rexp) ) {
+       if ( ! matches_regexp(s, number_rexp) ) {
                return s;
        }
 
@@ -105,7 +106,7 @@ UnicodeString split_words_consolidated(c
        }
 
        // only one word found, trim and done
-       if ( vbreak.size() == 1 ) {
+       if ( vbreak.size() <= 1 ) {
                UnicodeString ss(s);
                return ss.trim();
        }



Home | Main Index | Thread Index | Old Index