pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/textproc/split-thai



Module Name:    pkgsrc
Committed By:   scole
Date:           Wed Jul 17 17:32:16 UTC 2024

Modified Files:
        pkgsrc/textproc/split-thai: DESCR Makefile PLIST distinfo

Log Message:
Update to version 2.29
- add 'pthai-copy-word-at-point 'pthai-copy-string-at-point
- remove all external-program word splitting functionality associated
  with icu and swath
- for word splitting, use only internal elisp functions
- remove 'pthai-twt-lock, 'pthai-twt-splitter-enable.  this results in
  the 'thai-word-table always being modified when adding or removing words
- rename pthai-splitter-max-recurse-word-length to pthai-rwb-max-word-length
- remove pthai-temperature-* functions


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 pkgsrc/textproc/split-thai/DESCR \
    pkgsrc/textproc/split-thai/PLIST
cvs rdiff -u -r1.56 -r1.57 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.33 -r1.34 pkgsrc/textproc/split-thai/distinfo

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/textproc/split-thai/DESCR
diff -u pkgsrc/textproc/split-thai/DESCR:1.7 pkgsrc/textproc/split-thai/DESCR:1.8
--- pkgsrc/textproc/split-thai/DESCR:1.7        Sun Oct  3 20:40:08 2021
+++ pkgsrc/textproc/split-thai/DESCR    Wed Jul 17 17:32:15 2024
@@ -1,8 +1,6 @@
-A collection of utilities to split Thai Unicode UTF-8 text by word
-boundaries, also known as word tokenization or word breaking.  The
-utilities use emacs, swath, perl, and a c++ icu-project program.  All
-use dictionary-based word splitting.
+An emacs library for handling Thai Unicode UTF-8 words.  It can split,
+unsplit, spellcheck, download and play audio for Thai text, practice
+vocabulary, and more.
 
-Also included is a merged dictionary file of Thai words, a perl script
-to grep Thai UTF-8 words, and an emacs library that can split,
-unsplit, spellcheck, and play audio for Thai words.
+Also included is a merged dictionary file of Thai words gathered from
+the libreoffice, icu, and swath websites.
Index: pkgsrc/textproc/split-thai/PLIST
diff -u pkgsrc/textproc/split-thai/PLIST:1.7 pkgsrc/textproc/split-thai/PLIST:1.8
--- pkgsrc/textproc/split-thai/PLIST:1.7        Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/PLIST    Wed Jul 17 17:32:16 2024
@@ -1,11 +1,5 @@
-@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
-bin/st-emacs
-bin/st-icu
-bin/st-swath
-bin/st-wordbreak
-bin/tgrep
+@comment $NetBSD: PLIST,v 1.8 2024/07/17 17:32:16 scole Exp $
 share/split-thai/README.txt
 share/split-thai/pthai.el
 share/split-thai/sampledict.txt
 share/split-thai/words
-share/split-thai/words.tri

Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.56 pkgsrc/textproc/split-thai/Makefile:1.57
--- pkgsrc/textproc/split-thai/Makefile:1.56    Mon Jun  3 13:44:14 2024
+++ pkgsrc/textproc/split-thai/Makefile Wed Jul 17 17:32:16 2024
@@ -1,15 +1,14 @@
-# $NetBSD: Makefile,v 1.56 2024/06/03 13:44:14 ryoon Exp $
+# $NetBSD: Makefile,v 1.57 2024/07/17 17:32:16 scole Exp $
 
-ST_VERSION=    2.28
+ST_VERSION=    2.29
 PKGNAME=       split-thai-${ST_VERSION}
-PKGREVISION=   1
 CATEGORIES=    textproc
 
 MAINTAINER=    scole%NetBSD.org@localhost
 HOMEPAGE=      https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
-COMMENT=       Utilities and an emacs library to split UTF-8 Thai text into words
-# pthai.el, other code, icu dict, swath dict, libreoffice dict
-LICENSE=       2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2 AND gnu-lgpl-v3
+COMMENT=       Emacs library to split UTF-8 Thai text into words and more
+# pthai.el, icu dict, swath dict, libreoffice dict
+LICENSE=       2-clause-bsd AND mit AND gnu-gpl-v2 AND gnu-lgpl-v3
 
 GITHUB_ICU_TAG=                929cf40ecbf464bb133113995185c7353f2e106d
 LIBREOFFICE_VERSION=   7-6-4
@@ -20,40 +19,24 @@ SITES.split-thai-${ST_VERSION}.tgz= ${MA
 SITES.thaidict-${GITHUB_ICU_TAG}.txt=  -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
 SITES.th_TH-${LIBREOFFICE_VERSION}.dic=        -${LIBREOFFICE_SITE}
 
-USE_LANGUAGES=         c++
-FORCE_CXX_STD=         c++17
-
-USE_TOOLS=     pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
-USE_TOOLS+=    perl:run
+USE_TOOLS=     mkdir cp env awk cat sort uniq grep wc echo
 TOOL_DEPENDS+= libdatrie-[0-9]*:../../devel/libdatrie
+TOOL_DEPENDS+= swath-[0-9]*:../../textproc/swath
 DEPENDS+=      emacs-[0-9]*:../../editors/emacs
-DEPENDS+=      swath-[0-9]*:../../textproc/swath
 DEPENDS+=      sox-[0-9]*:../../audio/sox
 
-REPLACE_PERL=  st-wordbreak tgrep
-REPLACE_SH=    st-swath
-
 UTF8_ENV=      env LC_ALL=C.UTF-8
 
 ST_SHARE_DIR=          share/split-thai
-ST_SHARE_BIN=          bin
-INSTALLATION_DIRS=     ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+INSTALLATION_DIRS=     ${ST_SHARE_DIR}
 
-ST_SHARE_FILES=                README.txt pthai.el sampledict.txt words words.tri
-
-# xxx REPLACE_EMACS_SCRIPT
-SUBST_CLASSES+=                        st-emacs-app
-SUBST_STAGE.st-emacs-app=      pre-configure
-SUBST_MESSAGE.st-emacs-app=    Fixing emacs script paths.
-SUBST_FILES.st-emacs-app=      st-emacs
-SUBST_SED.st-emacs-app=                -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+ST_SHARE_FILES=                README.txt pthai.el sampledict.txt words
 
 SUBST_CLASSES+=                        dictionary-app
 SUBST_STAGE.dictionary-app=    pre-configure
 SUBST_MESSAGE.dictionary-app=  Fixing dictionary paths.
-SUBST_FILES.dictionary-app=    st-emacs st-swath st-wordbreak pthai.el
+SUBST_FILES.dictionary-app=    README.txt pthai.el
 SUBST_SED.dictionary-app=      -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
-SUBST_SED.dictionary-app+=     -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
 
 # there are some dictionary entries with '/' and '"' that emacs
 # puthash doesn't like.  skip them with grep so consoles without utf8
@@ -71,8 +54,6 @@ pre-build:
        cd ${WRKSRC} && \
                ${UTF8_ENV} cat icu-dict swath-dict th_TH.dic thai-word-dict | \
                        grep -v '[#0123456789/"]' | sort | uniq > words
-       cd ${WRKSRC} && \
-               ${UTF8_ENV} trietool words add-list -e utf-8 words
 .for i in icu-dict thai-word-dict th_TH.dic swath-dict
        @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
 .endfor
@@ -80,17 +61,10 @@ pre-build:
                unique words in combined dictionary
 
 do-build:
-       cd ${WRKSRC} && \
-               ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
-               `pkg-config --libs --cflags icu-io`
 
 do-install:
-       ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
-               ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
-       ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
 .for i in ${ST_SHARE_FILES}
        ${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
 .endfor
 
-.include "../../textproc/icu/buildlink3.mk"
 .include "../../mk/bsd.pkg.mk"

Index: pkgsrc/textproc/split-thai/distinfo
diff -u pkgsrc/textproc/split-thai/distinfo:1.33 pkgsrc/textproc/split-thai/distinfo:1.34
--- pkgsrc/textproc/split-thai/distinfo:1.33    Sun Jan  7 03:28:09 2024
+++ pkgsrc/textproc/split-thai/distinfo Wed Jul 17 17:32:16 2024
@@ -1,8 +1,8 @@
-$NetBSD: distinfo,v 1.33 2024/01/07 03:28:09 scole Exp $
+$NetBSD: distinfo,v 1.34 2024/07/17 17:32:16 scole Exp $
 
-BLAKE2s (split-thai-2.28.tgz) = f8926c4225774cb67180dc4aa09089880d8671c35399a0ed79191fdd65462768
-SHA512 (split-thai-2.28.tgz) = 2872f3dffba2e592493241aae2f6153c6566217d7c3fb070ade7885db176c772d35221b78222df5a0a264a2056205f82e39c91150b06c47ac87d30bf9a89b40b
-Size (split-thai-2.28.tgz) = 41273 bytes
+BLAKE2s (split-thai-2.29.tgz) = 37ed06703ca9fdcd7cda20017d82661075a4f8624772fa7257508292f22493ce
+SHA512 (split-thai-2.29.tgz) = aeb6ec0e6210adf7b509f357856d5571b853a1e1cc5dcdc73c87c50182762028a345844ec301513c86bb600cf40bdf6f2f2face7fe340ecf667f0ec6befc793a
+Size (split-thai-2.29.tgz) = 31649 bytes
 BLAKE2s (th_TH-7-6-4.dic) = 976e7d64726682c474f8ae202e5aa94e495a862712e1f6351332d1ff324ef99d
 SHA512 (th_TH-7-6-4.dic) = e2280e4a15fea69deaa20868909010342341ad8622337291492e2a4faf84fabc3441f5d34f47ace1d74cf41a21e0b983848ec686f1e4b5adea920f601f2e634b
 Size (th_TH-7-6-4.dic) = 1251425 bytes



Home | Main Index | Thread Index | Old Index