pkgsrc-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc/textproc Restore split-thai 2.0 package after moving s...



details:   https://anonhg.NetBSD.org/pkgsrc/rev/a51927454785
branches:  trunk
changeset: 449072:a51927454785
user:      scole <scole%pkgsrc.org@localhost>
date:      Sat Mar 20 15:46:23 2021 +0000

description:
Restore split-thai 2.0 package after moving source from pkgsrc to MASTER_SITE_LOCAL

diffstat:

 textproc/Makefile            |   3 +-
 textproc/split-thai/DESCR    |   8 ++++
 textproc/split-thai/Makefile |  88 ++++++++++++++++++++++++++++++++++++++++++++
 textproc/split-thai/PLIST    |  11 +++++
 textproc/split-thai/distinfo |  10 +++++
 5 files changed, 119 insertions(+), 1 deletions(-)

diffs (150 lines):

diff -r 775fc2b9a99c -r a51927454785 textproc/Makefile
--- a/textproc/Makefile Sat Mar 20 15:29:39 2021 +0000
+++ b/textproc/Makefile Sat Mar 20 15:46:23 2021 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.1203 2021/03/19 22:18:13 markd Exp $
+# $NetBSD: Makefile,v 1.1204 2021/03/20 15:46:23 scole Exp $
 #
 
 COMMENT=       Text processing utilities (does not include desktop publishing)
@@ -1116,6 +1116,7 @@
 SUBDIR+=       sord
 SUBDIR+=       source-highlight
 SUBDIR+=       sphinxsearch
+SUBDIR+=       split-thai
 SUBDIR+=       stardic
 SUBDIR+=       stava
 SUBDIR+=       sub2srt
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/DESCR
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/DESCR Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,8 @@
+A collection of utilities to split Thai Unicode UTF-8 text by word
+boundaries, also known as word tokenization or word breaking.  The
+utilities use emacs, swath, perl, and a c++ icu-project program.  All
+use dictionary-based word splitting.
+
+Also included is a merged dictionary file of Thai words, a perl script
+to grep Thai UTF-8 words, and an emacs library that can split and play
+audio for Thai words.
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/Makefile      Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,88 @@
+# $NetBSD: Makefile,v 1.16 2021/03/20 15:46:23 scole Exp $
+
+ST_VERSION=    2.0
+PKGNAME=       split-thai-${ST_VERSION}
+PKGREVISION=   1
+CATEGORIES=    textproc
+
+MAINTAINER=    scole%NetBSD.org@localhost
+HOMEPAGE=      https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
+COMMENT=       Utilities and an emacs library to split UTF-8 Thai text into words
+# pthai.el, other code, icu dict, swath dict
+LICENSE=       2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
+
+GITHUB_ICU_TAG=        61607c27732906d36c5bd4d23ecc092f89f53a2b
+DISTFILES=     split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt
+
+SITES.split-thai-${ST_VERSION}.tgz=    ${MASTER_SITE_LOCAL}
+SITES.thaidict-${GITHUB_ICU_TAG}.txt=  -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
+
+USE_LANGUAGES= c++11   # darwin needed 11?
+
+USE_TOOLS=     pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
+USE_TOOLS+=    perl:run
+BUILD_DEPENDS+=        libdatrie-[0-9]*:../../devel/libdatrie
+DEPENDS+=      emacs-[0-9]*:../../editors/emacs
+DEPENDS+=      swath-[0-9]*:../../textproc/swath
+DEPENDS+=      mpg123-[0-9]*:../../audio/mpg123
+
+REPLACE_PERL=  st-wordbreak tgrep
+REPLACE_SH=    st-swath
+
+UTF8_ENV=      env LC_ALL=C.UTF-8
+
+ST_SHARE_DIR=          share/split-thai
+ST_SHARE_BIN=          bin
+INSTALLATION_DIRS=     ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+
+ST_SHARE_FILES=                README.txt pthai.el sampledict.txt words words.tri
+
+# xxx REPLACE_EMACS_SCRIPT
+SUBST_CLASSES+=                        st-emacs-app
+SUBST_STAGE.st-emacs-app=      pre-configure
+SUBST_MESSAGE.st-emacs-app=    Fixing emacs script paths.
+SUBST_FILES.st-emacs-app=      st-emacs
+SUBST_SED.st-emacs-app=                -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+
+SUBST_CLASSES+=                        dictionary-app
+SUBST_STAGE.dictionary-app=    post-extract
+SUBST_MESSAGE.dictionary-app=  Fixing dictionary paths.
+SUBST_FILES.dictionary-app=    st-emacs st-swath st-wordbreak pthai.el
+SUBST_SED.dictionary-app=      -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
+SUBST_SED.dictionary-app+=     -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
+
+pre-build:
+       cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
+               --eval='(setq pthai-bootstrap t)' \
+               --eval='(load-file "pthai.el")' \
+               --eval='(pthai-twt-table-save "thai-word-dict")'
+       cp ${WRKDIR}/thaidict-${GITHUB_ICU_TAG}.txt ${WRKSRC}/icu-dict
+       cd ${PREFIX}/share/swath && \
+               ${UTF8_ENV} trietool swathdic list | \
+               awk '{print $$1}' > ${WRKSRC}/swath-dict
+       cd ${WRKSRC} && \
+               ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
+                       grep -v '#' | sort | uniq > words
+       cd ${WRKSRC} && \
+               ${UTF8_ENV} trietool words add-list -e utf-8 words
+.for i in thai-word-dict icu-dict swath-dict
+       @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
+.endfor
+       @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
+               unique words in combined dictionary
+
+do-build:
+       cd ${WRKSRC} && \
+               ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
+               `pkg-config --libs --cflags icu-io`
+
+do-install:
+       ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
+               ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
+       ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
+.for i in ${ST_SHARE_FILES}
+       ${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
+.endfor
+
+.include "../../textproc/icu/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/PLIST
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/PLIST Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,11 @@
+@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
+bin/st-emacs
+bin/st-icu
+bin/st-swath
+bin/st-wordbreak
+bin/tgrep
+share/split-thai/README.txt
+share/split-thai/pthai.el
+share/split-thai/sampledict.txt
+share/split-thai/words
+share/split-thai/words.tri
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/distinfo
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/distinfo      Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,10 @@
+$NetBSD: distinfo,v 1.3 2021/03/20 15:46:23 scole Exp $
+
+SHA1 (split-thai-2.0.tgz) = 80eb473e2038d889f12fd684388f88017fdec2d2
+RMD160 (split-thai-2.0.tgz) = 2280da7813940dc9eee1a2680425ba7366901566
+SHA512 (split-thai-2.0.tgz) = a9e0a101718857b8b3817918ff3cb4e9e0b6436a0df5cf0d2871ed5afd94f635cf07b1dba624c75aea1ce98da3a4e4403077f518683b92800aa383d71aa53829
+Size (split-thai-2.0.tgz) = 28541 bytes
+SHA1 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 2a2ad127cc279835cb4df04eb69401a0d4927774
+RMD160 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 0a6df7b7dd6ef502c5dd20020e37b2ca1a5514a2
+SHA512 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 88800fe2a453fc40f16ff54c21c852a8ea8e1496e42d5d187e5b5ac0ff58050830fc0816239e4f88cb23ed301f894d1ca52eb4676fd85c13c285cec815ae7c42
+Size (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 493044 bytes



Home | Main Index | Thread Index | Old Index