pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc/split-thai
Module Name: pkgsrc
Committed By: scole
Date: Thu Aug 20 14:20:27 UTC 2020
Modified Files:
pkgsrc/textproc/split-thai: Makefile
pkgsrc/textproc/split-thai/files: st-icu.cc
Log Message:
Update to 0.7
- don't skip strings with numbers [0-9] for st-icu
To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.2 -r1.3 pkgsrc/textproc/split-thai/files/st-icu.cc
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.6 pkgsrc/textproc/split-thai/Makefile:1.7
--- pkgsrc/textproc/split-thai/Makefile:1.6 Wed Aug 19 16:52:25 2020
+++ pkgsrc/textproc/split-thai/Makefile Thu Aug 20 14:20:27 2020
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.6 2020/08/19 16:52:25 scole Exp $
+# $NetBSD: Makefile,v 1.7 2020/08/20 14:20:27 scole Exp $
-PKGNAME= split-thai-0.6
+PKGNAME= split-thai-0.7
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
Index: pkgsrc/textproc/split-thai/files/st-icu.cc
diff -u pkgsrc/textproc/split-thai/files/st-icu.cc:1.2 pkgsrc/textproc/split-thai/files/st-icu.cc:1.3
--- pkgsrc/textproc/split-thai/files/st-icu.cc:1.2 Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/st-icu.cc Thu Aug 20 14:20:27 2020
@@ -7,8 +7,8 @@
#include <unicode/brkiter.h>
#include <unicode/regex.h>
#include <unicode/ucnv.h>
-#include <unicode/ustream.h>
#include <unicode/ustdio.h>
+#include <unicode/ustream.h>
using namespace std;
using namespace icu;
@@ -18,6 +18,7 @@ using namespace icu;
const UnicodeString thai_rexp = "[\\u0e00-\\u0e7f]+";
const UnicodeString thai_consonant = "[\\u0e01-\\u0e2e]+";
const UnicodeString thai_num_rexp = "[\\u0e50-\\u0e59]+";
+const UnicodeString number_rexp = "[0-9\\u0e50-\\u0e59]+";
const UnicodeString thai_nonnum_rexp = "[\\u0e01-\\u0e4f\\u0e5a-\\u0e7f]+";
void usage() {
@@ -56,7 +57,7 @@ bool matches_regexp(const UnicodeString
// add spaces to string with thai numbers
UnicodeString space_thai_numbers(const UnicodeString &s) {
// return string unmodified if no numbers
- if ( ! matches_regexp(s, thai_num_rexp) ) {
+ if ( ! matches_regexp(s, number_rexp) ) {
return s;
}
@@ -105,7 +106,7 @@ UnicodeString split_words_consolidated(c
}
// only one word found, trim and done
- if ( vbreak.size() == 1 ) {
+ if ( vbreak.size() <= 1 ) {
UnicodeString ss(s);
return ss.trim();
}
Home |
Main Index |
Thread Index |
Old Index