pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc
Module Name: pkgsrc
Committed By: scole
Date: Sat Mar 20 15:46:23 UTC 2021
Modified Files:
pkgsrc/textproc: Makefile
Added Files:
pkgsrc/textproc/split-thai: DESCR Makefile PLIST distinfo
Log Message:
Restore split-thai 2.0 package after moving source from pkgsrc to MASTER_SITE_LOCAL
To generate a diff of this commit:
cvs rdiff -u -r1.1203 -r1.1204 pkgsrc/textproc/Makefile
cvs rdiff -u -r0 -r1.6 pkgsrc/textproc/split-thai/DESCR
cvs rdiff -u -r0 -r1.16 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r0 -r1.7 pkgsrc/textproc/split-thai/PLIST
cvs rdiff -u -r0 -r1.3 pkgsrc/textproc/split-thai/distinfo
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/Makefile
diff -u pkgsrc/textproc/Makefile:1.1203 pkgsrc/textproc/Makefile:1.1204
--- pkgsrc/textproc/Makefile:1.1203 Fri Mar 19 22:18:13 2021
+++ pkgsrc/textproc/Makefile Sat Mar 20 15:46:23 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.1203 2021/03/19 22:18:13 markd Exp $
+# $NetBSD: Makefile,v 1.1204 2021/03/20 15:46:23 scole Exp $
#
COMMENT= Text processing utilities (does not include desktop publishing)
@@ -1116,6 +1116,7 @@ SUBDIR+= soprano
SUBDIR+= sord
SUBDIR+= source-highlight
SUBDIR+= sphinxsearch
+SUBDIR+= split-thai
SUBDIR+= stardic
SUBDIR+= stava
SUBDIR+= sub2srt
Added files:
Index: pkgsrc/textproc/split-thai/DESCR
diff -u /dev/null pkgsrc/textproc/split-thai/DESCR:1.6
--- /dev/null Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/DESCR Sat Mar 20 15:46:23 2021
@@ -0,0 +1,8 @@
+A collection of utilities to split Thai Unicode UTF-8 text by word
+boundaries, also known as word tokenization or word breaking. The
+utilities use emacs, swath, perl, and a c++ icu-project program. All
+use dictionary-based word splitting.
+
+Also included is a merged dictionary file of Thai words, a perl script
+to grep Thai UTF-8 words, and an emacs library that can split and play
+audio for Thai words.
Index: pkgsrc/textproc/split-thai/Makefile
diff -u /dev/null pkgsrc/textproc/split-thai/Makefile:1.16
--- /dev/null Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/Makefile Sat Mar 20 15:46:23 2021
@@ -0,0 +1,88 @@
+# $NetBSD: Makefile,v 1.16 2021/03/20 15:46:23 scole Exp $
+
+ST_VERSION= 2.0
+PKGNAME= split-thai-${ST_VERSION}
+PKGREVISION= 1
+CATEGORIES= textproc
+
+MAINTAINER= scole%NetBSD.org@localhost
+HOMEPAGE= https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
+COMMENT= Utilities and an emacs library to split UTF-8 Thai text into words
+# pthai.el, other code, icu dict, swath dict
+LICENSE= 2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
+
+GITHUB_ICU_TAG= 61607c27732906d36c5bd4d23ecc092f89f53a2b
+DISTFILES= split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt
+
+SITES.split-thai-${ST_VERSION}.tgz= ${MASTER_SITE_LOCAL}
+SITES.thaidict-${GITHUB_ICU_TAG}.txt= -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
+
+USE_LANGUAGES= c++11 # darwin needed 11?
+
+USE_TOOLS= pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
+USE_TOOLS+= perl:run
+BUILD_DEPENDS+= libdatrie-[0-9]*:../../devel/libdatrie
+DEPENDS+= emacs-[0-9]*:../../editors/emacs
+DEPENDS+= swath-[0-9]*:../../textproc/swath
+DEPENDS+= mpg123-[0-9]*:../../audio/mpg123
+
+REPLACE_PERL= st-wordbreak tgrep
+REPLACE_SH= st-swath
+
+UTF8_ENV= env LC_ALL=C.UTF-8
+
+ST_SHARE_DIR= share/split-thai
+ST_SHARE_BIN= bin
+INSTALLATION_DIRS= ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+
+ST_SHARE_FILES= README.txt pthai.el sampledict.txt words words.tri
+
+# xxx REPLACE_EMACS_SCRIPT
+SUBST_CLASSES+= st-emacs-app
+SUBST_STAGE.st-emacs-app= pre-configure
+SUBST_MESSAGE.st-emacs-app= Fixing emacs script paths.
+SUBST_FILES.st-emacs-app= st-emacs
+SUBST_SED.st-emacs-app= -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+
+SUBST_CLASSES+= dictionary-app
+SUBST_STAGE.dictionary-app= post-extract
+SUBST_MESSAGE.dictionary-app= Fixing dictionary paths.
+SUBST_FILES.dictionary-app= st-emacs st-swath st-wordbreak pthai.el
+SUBST_SED.dictionary-app= -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
+SUBST_SED.dictionary-app+= -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
+
+pre-build:
+ cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
+ --eval='(setq pthai-bootstrap t)' \
+ --eval='(load-file "pthai.el")' \
+ --eval='(pthai-twt-table-save "thai-word-dict")'
+ cp ${WRKDIR}/thaidict-${GITHUB_ICU_TAG}.txt ${WRKSRC}/icu-dict
+ cd ${PREFIX}/share/swath && \
+ ${UTF8_ENV} trietool swathdic list | \
+ awk '{print $$1}' > ${WRKSRC}/swath-dict
+ cd ${WRKSRC} && \
+ ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
+ grep -v '#' | sort | uniq > words
+ cd ${WRKSRC} && \
+ ${UTF8_ENV} trietool words add-list -e utf-8 words
+.for i in thai-word-dict icu-dict swath-dict
+ @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
+.endfor
+ @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
+ unique words in combined dictionary
+
+do-build:
+ cd ${WRKSRC} && \
+ ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
+ `pkg-config --libs --cflags icu-io`
+
+do-install:
+ ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
+ ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
+ ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
+.for i in ${ST_SHARE_FILES}
+ ${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
+.endfor
+
+.include "../../textproc/icu/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
Index: pkgsrc/textproc/split-thai/PLIST
diff -u /dev/null pkgsrc/textproc/split-thai/PLIST:1.7
--- /dev/null Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/PLIST Sat Mar 20 15:46:23 2021
@@ -0,0 +1,11 @@
+@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
+bin/st-emacs
+bin/st-icu
+bin/st-swath
+bin/st-wordbreak
+bin/tgrep
+share/split-thai/README.txt
+share/split-thai/pthai.el
+share/split-thai/sampledict.txt
+share/split-thai/words
+share/split-thai/words.tri
Index: pkgsrc/textproc/split-thai/distinfo
diff -u /dev/null pkgsrc/textproc/split-thai/distinfo:1.3
--- /dev/null Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/distinfo Sat Mar 20 15:46:23 2021
@@ -0,0 +1,10 @@
+$NetBSD: distinfo,v 1.3 2021/03/20 15:46:23 scole Exp $
+
+SHA1 (split-thai-2.0.tgz) = 80eb473e2038d889f12fd684388f88017fdec2d2
+RMD160 (split-thai-2.0.tgz) = 2280da7813940dc9eee1a2680425ba7366901566
+SHA512 (split-thai-2.0.tgz) = a9e0a101718857b8b3817918ff3cb4e9e0b6436a0df5cf0d2871ed5afd94f635cf07b1dba624c75aea1ce98da3a4e4403077f518683b92800aa383d71aa53829
+Size (split-thai-2.0.tgz) = 28541 bytes
+SHA1 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 2a2ad127cc279835cb4df04eb69401a0d4927774
+RMD160 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 0a6df7b7dd6ef502c5dd20020e37b2ca1a5514a2
+SHA512 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 88800fe2a453fc40f16ff54c21c852a8ea8e1496e42d5d187e5b5ac0ff58050830fc0816239e4f88cb23ed301f894d1ca52eb4676fd85c13c285cec815ae7c42
+Size (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 493044 bytes
Home |
Main Index |
Thread Index |
Old Index