pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/textproc



Module Name:    pkgsrc
Committed By:   scole
Date:           Sat Mar 20 15:46:23 UTC 2021

Modified Files:
        pkgsrc/textproc: Makefile
Added Files:
        pkgsrc/textproc/split-thai: DESCR Makefile PLIST distinfo

Log Message:
Restore split-thai 2.0 package after moving source from pkgsrc to MASTER_SITE_LOCAL


To generate a diff of this commit:
cvs rdiff -u -r1.1203 -r1.1204 pkgsrc/textproc/Makefile
cvs rdiff -u -r0 -r1.6 pkgsrc/textproc/split-thai/DESCR
cvs rdiff -u -r0 -r1.16 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r0 -r1.7 pkgsrc/textproc/split-thai/PLIST
cvs rdiff -u -r0 -r1.3 pkgsrc/textproc/split-thai/distinfo

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/textproc/Makefile
diff -u pkgsrc/textproc/Makefile:1.1203 pkgsrc/textproc/Makefile:1.1204
--- pkgsrc/textproc/Makefile:1.1203     Fri Mar 19 22:18:13 2021
+++ pkgsrc/textproc/Makefile    Sat Mar 20 15:46:23 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.1203 2021/03/19 22:18:13 markd Exp $
+# $NetBSD: Makefile,v 1.1204 2021/03/20 15:46:23 scole Exp $
 #
 
 COMMENT=       Text processing utilities (does not include desktop publishing)
@@ -1116,6 +1116,7 @@ SUBDIR+=  soprano
 SUBDIR+=       sord
 SUBDIR+=       source-highlight
 SUBDIR+=       sphinxsearch
+SUBDIR+=       split-thai
 SUBDIR+=       stardic
 SUBDIR+=       stava
 SUBDIR+=       sub2srt

Added files:

Index: pkgsrc/textproc/split-thai/DESCR
diff -u /dev/null pkgsrc/textproc/split-thai/DESCR:1.6
--- /dev/null   Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/DESCR    Sat Mar 20 15:46:23 2021
@@ -0,0 +1,8 @@
+A collection of utilities to split Thai Unicode UTF-8 text by word
+boundaries, also known as word tokenization or word breaking.  The
+utilities use emacs, swath, perl, and a c++ icu-project program.  All
+use dictionary-based word splitting.
+
+Also included is a merged dictionary file of Thai words, a perl script
+to grep Thai UTF-8 words, and an emacs library that can split and play
+audio for Thai words.

Index: pkgsrc/textproc/split-thai/Makefile
diff -u /dev/null pkgsrc/textproc/split-thai/Makefile:1.16
--- /dev/null   Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/Makefile Sat Mar 20 15:46:23 2021
@@ -0,0 +1,88 @@
+# $NetBSD: Makefile,v 1.16 2021/03/20 15:46:23 scole Exp $
+
+ST_VERSION=    2.0
+PKGNAME=       split-thai-${ST_VERSION}
+PKGREVISION=   1
+CATEGORIES=    textproc
+
+MAINTAINER=    scole%NetBSD.org@localhost
+HOMEPAGE=      https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
+COMMENT=       Utilities and an emacs library to split UTF-8 Thai text into words
+# pthai.el, other code, icu dict, swath dict
+LICENSE=       2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
+
+GITHUB_ICU_TAG=        61607c27732906d36c5bd4d23ecc092f89f53a2b
+DISTFILES=     split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt
+
+SITES.split-thai-${ST_VERSION}.tgz=    ${MASTER_SITE_LOCAL}
+SITES.thaidict-${GITHUB_ICU_TAG}.txt=  -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
+
+USE_LANGUAGES= c++11   # darwin needed 11?
+
+USE_TOOLS=     pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
+USE_TOOLS+=    perl:run
+BUILD_DEPENDS+=        libdatrie-[0-9]*:../../devel/libdatrie
+DEPENDS+=      emacs-[0-9]*:../../editors/emacs
+DEPENDS+=      swath-[0-9]*:../../textproc/swath
+DEPENDS+=      mpg123-[0-9]*:../../audio/mpg123
+
+REPLACE_PERL=  st-wordbreak tgrep
+REPLACE_SH=    st-swath
+
+UTF8_ENV=      env LC_ALL=C.UTF-8
+
+ST_SHARE_DIR=          share/split-thai
+ST_SHARE_BIN=          bin
+INSTALLATION_DIRS=     ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+
+ST_SHARE_FILES=                README.txt pthai.el sampledict.txt words words.tri
+
+# xxx REPLACE_EMACS_SCRIPT
+SUBST_CLASSES+=                        st-emacs-app
+SUBST_STAGE.st-emacs-app=      pre-configure
+SUBST_MESSAGE.st-emacs-app=    Fixing emacs script paths.
+SUBST_FILES.st-emacs-app=      st-emacs
+SUBST_SED.st-emacs-app=                -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+
+SUBST_CLASSES+=                        dictionary-app
+SUBST_STAGE.dictionary-app=    post-extract
+SUBST_MESSAGE.dictionary-app=  Fixing dictionary paths.
+SUBST_FILES.dictionary-app=    st-emacs st-swath st-wordbreak pthai.el
+SUBST_SED.dictionary-app=      -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
+SUBST_SED.dictionary-app+=     -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
+
+pre-build:
+       cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
+               --eval='(setq pthai-bootstrap t)' \
+               --eval='(load-file "pthai.el")' \
+               --eval='(pthai-twt-table-save "thai-word-dict")'
+       cp ${WRKDIR}/thaidict-${GITHUB_ICU_TAG}.txt ${WRKSRC}/icu-dict
+       cd ${PREFIX}/share/swath && \
+               ${UTF8_ENV} trietool swathdic list | \
+               awk '{print $$1}' > ${WRKSRC}/swath-dict
+       cd ${WRKSRC} && \
+               ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
+                       grep -v '#' | sort | uniq > words
+       cd ${WRKSRC} && \
+               ${UTF8_ENV} trietool words add-list -e utf-8 words
+.for i in thai-word-dict icu-dict swath-dict
+       @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
+.endfor
+       @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
+               unique words in combined dictionary
+
+do-build:
+       cd ${WRKSRC} && \
+               ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
+               `pkg-config --libs --cflags icu-io`
+
+do-install:
+       ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
+               ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
+       ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
+.for i in ${ST_SHARE_FILES}
+       ${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
+.endfor
+
+.include "../../textproc/icu/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"

Index: pkgsrc/textproc/split-thai/PLIST
diff -u /dev/null pkgsrc/textproc/split-thai/PLIST:1.7
--- /dev/null   Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/PLIST    Sat Mar 20 15:46:23 2021
@@ -0,0 +1,11 @@
+@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
+bin/st-emacs
+bin/st-icu
+bin/st-swath
+bin/st-wordbreak
+bin/tgrep
+share/split-thai/README.txt
+share/split-thai/pthai.el
+share/split-thai/sampledict.txt
+share/split-thai/words
+share/split-thai/words.tri

Index: pkgsrc/textproc/split-thai/distinfo
diff -u /dev/null pkgsrc/textproc/split-thai/distinfo:1.3
--- /dev/null   Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/distinfo Sat Mar 20 15:46:23 2021
@@ -0,0 +1,10 @@
+$NetBSD: distinfo,v 1.3 2021/03/20 15:46:23 scole Exp $
+
+SHA1 (split-thai-2.0.tgz) = 80eb473e2038d889f12fd684388f88017fdec2d2
+RMD160 (split-thai-2.0.tgz) = 2280da7813940dc9eee1a2680425ba7366901566
+SHA512 (split-thai-2.0.tgz) = a9e0a101718857b8b3817918ff3cb4e9e0b6436a0df5cf0d2871ed5afd94f635cf07b1dba624c75aea1ce98da3a4e4403077f518683b92800aa383d71aa53829
+Size (split-thai-2.0.tgz) = 28541 bytes
+SHA1 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 2a2ad127cc279835cb4df04eb69401a0d4927774
+RMD160 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 0a6df7b7dd6ef502c5dd20020e37b2ca1a5514a2
+SHA512 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 88800fe2a453fc40f16ff54c21c852a8ea8e1496e42d5d187e5b5ac0ff58050830fc0816239e4f88cb23ed301f894d1ca52eb4676fd85c13c285cec815ae7c42
+Size (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 493044 bytes



Home | Main Index | Thread Index | Old Index