pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc/split-thai
Module Name: pkgsrc
Committed By: scole
Date: Thu Mar 18 17:53:42 UTC 2021
Modified Files:
pkgsrc/textproc/split-thai: DESCR Makefile PLIST
pkgsrc/textproc/split-thai/files: README.txt st-emacs st-swath
st-wordbreak
Added Files:
pkgsrc/textproc/split-thai/files: pthai.el sampledict.txt words.abm
Removed Files:
pkgsrc/textproc/split-thai/files: thai-utility.el thaidict.abm
Log Message:
Update to version 2.0
- add new emacs module pthai.el, merging all emacs lisp code into it
- rename files thaidict* to words*
- modify other scripts to use new file names
To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 pkgsrc/textproc/split-thai/DESCR
cvs rdiff -u -r1.13 -r1.14 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/PLIST
cvs rdiff -u -r1.5 -r1.6 pkgsrc/textproc/split-thai/files/README.txt
cvs rdiff -u -r0 -r1.1 pkgsrc/textproc/split-thai/files/pthai.el \
pkgsrc/textproc/split-thai/files/sampledict.txt \
pkgsrc/textproc/split-thai/files/words.abm
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/files/st-emacs
cvs rdiff -u -r1.2 -r1.3 pkgsrc/textproc/split-thai/files/st-swath
cvs rdiff -u -r1.1 -r1.2 pkgsrc/textproc/split-thai/files/st-wordbreak
cvs rdiff -u -r1.8 -r0 pkgsrc/textproc/split-thai/files/thai-utility.el
cvs rdiff -u -r1.1 -r0 pkgsrc/textproc/split-thai/files/thaidict.abm
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/split-thai/DESCR
diff -u pkgsrc/textproc/split-thai/DESCR:1.3 pkgsrc/textproc/split-thai/DESCR:1.4
--- pkgsrc/textproc/split-thai/DESCR:1.3 Sat Sep 5 18:02:36 2020
+++ pkgsrc/textproc/split-thai/DESCR Thu Mar 18 17:53:41 2021
@@ -3,5 +3,6 @@ boundaries, also known as word tokenizat
utilities use emacs, swath, perl, and a c++ icu-project program. All
use dictionary-based word splitting.
-Also included is a merged dictionary file of Thai words and a perl
-script to grep Thai UTF-8 words.
+Also included is a merged dictionary file of Thai words, a perl script
+to grep Thai UTF-8 words, and an emacs library that can split and play
+audio for Thai words.
Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.13 pkgsrc/textproc/split-thai/Makefile:1.14
--- pkgsrc/textproc/split-thai/Makefile:1.13 Thu Nov 5 09:09:15 2020
+++ pkgsrc/textproc/split-thai/Makefile Thu Mar 18 17:53:41 2021
@@ -1,11 +1,11 @@
-# $NetBSD: Makefile,v 1.13 2020/11/05 09:09:15 ryoon Exp $
+# $NetBSD: Makefile,v 1.14 2021/03/18 17:53:41 scole Exp $
-PKGNAME= split-thai-1.1
-PKGREVISION= 1
+PKGNAME= split-thai-2.0
CATEGORIES= textproc
-MAINTAINER= pkgsrc-users%NetBSD.org@localhost
-COMMENT= Utilities to split UTF-8 Thai text into words
-LICENSE= public-domain AND mit AND gnu-gpl-v2 # code, icu dict, swath dict
+MAINTAINER= scole%NetBSD.org@localhost
+COMMENT= Utilities and an emacs library to split UTF-8 Thai text into words
+# pthai.el, other code, icu dict, swath dict
+LICENSE= 2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
# xxx fetching a specific version of a file out of a github project
EXTRACT_SUFX= # none
@@ -20,6 +20,7 @@ USE_TOOLS+= perl:run
BUILD_DEPENDS+= libdatrie-[0-9]*:../../devel/libdatrie
DEPENDS+= emacs-[0-9]*:../../editors/emacs
DEPENDS+= swath-[0-9]*:../../textproc/swath
+DEPENDS+= mpg123-[0-9]*:../../audio/mpg123
REPLACE_PERL= st-wordbreak tgrep
REPLACE_SH= st-swath
@@ -30,8 +31,7 @@ ST_SHARE_DIR= share/split-thai
ST_SHARE_BIN= bin
INSTALLATION_DIRS= ${ST_SHARE_BIN} ${ST_SHARE_DIR}
-ST_SHARE_FILES= README.txt thaidict thai-dict.el thai-dict.elc
-ST_SHARE_FILES+= thai-utility.el thai-utility.elc thaidict.tri
+ST_SHARE_FILES= README.txt pthai.el sampledict.txt words words.tri
# xxx REPLACE_EMACS_SCRIPT
SUBST_CLASSES+= st-emacs-app
@@ -41,39 +41,36 @@ SUBST_FILES.st-emacs-app= st-emacs
SUBST_SED.st-emacs-app= -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
SUBST_CLASSES+= dictionary-app
-SUBST_STAGE.dictionary-app= pre-configure
+SUBST_STAGE.dictionary-app= post-extract
SUBST_MESSAGE.dictionary-app= Fixing dictionary paths.
-SUBST_FILES.dictionary-app= st-emacs st-swath st-wordbreak
+SUBST_FILES.dictionary-app= st-emacs st-swath st-wordbreak pthai.el
SUBST_SED.dictionary-app= -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
SUBST_SED.dictionary-app+= -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
pre-extract:
mkdir -p ${WRKSRC}
- cd files && cp README.txt st-emacs st-icu.cc st-swath \
- st-wordbreak tgrep thai-utility.el thaidict.abm ${WRKSRC}
+ cd files && cp README.txt pthai.el sampledict.txt \
+ st-emacs st-icu.cc st-swath st-wordbreak tgrep \
+ words.abm ${WRKSRC}
-post-extract:
+pre-build:
cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
- -f batch-byte-compile thai-utility.el
- cd ${WRKSRC} && ${UTF8_ENV} emacs --batch -l thai-utility.elc \
- --eval '(thai-word-table-save "emacs-dict")'
+ --eval='(setq pthai-bootstrap t)' \
+ --eval='(load-file "pthai.el")' \
+ --eval='(pthai-twt-table-save "thai-word-dict")'
cp ${WRKDIR}/${DISTFILES} ${WRKSRC}/icu-dict
cd ${PREFIX}/share/swath && \
${UTF8_ENV} trietool swathdic list | \
awk '{print $$1}' > ${WRKSRC}/swath-dict
cd ${WRKSRC} && \
- ${UTF8_ENV} cat icu-dict swath-dict emacs-dict | \
- grep -v '#' | sort | uniq > thaidict
+ ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
+ grep -v '#' | sort | uniq > words
cd ${WRKSRC} && \
- ${UTF8_ENV} trietool thaidict add-list -e utf-8 thaidict
- cd ${WRKSRC} && ${UTF8_ENV} emacs --batch -l thai-utility.elc \
- --eval '(thai-word-table-save-defvar "thaidict" "thai-dict.el")'
- cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
- -f batch-byte-compile thai-dict.el
-.for i in emacs-dict icu-dict swath-dict
+ ${UTF8_ENV} trietool words add-list -e utf-8 words
+.for i in thai-word-dict icu-dict swath-dict
@${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
.endfor
- @${ECHO} `wc -l ${WRKSRC}/thaidict | awk '{print $$1}'` \
+ @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
unique words in combined dictionary
do-build:
Index: pkgsrc/textproc/split-thai/PLIST
diff -u pkgsrc/textproc/split-thai/PLIST:1.4 pkgsrc/textproc/split-thai/PLIST:1.5
--- pkgsrc/textproc/split-thai/PLIST:1.4 Sat Sep 5 18:02:36 2020
+++ pkgsrc/textproc/split-thai/PLIST Thu Mar 18 17:53:41 2021
@@ -1,13 +1,11 @@
-@comment $NetBSD: PLIST,v 1.4 2020/09/05 18:02:36 scole Exp $
+@comment $NetBSD: PLIST,v 1.5 2021/03/18 17:53:41 scole Exp $
bin/st-emacs
bin/st-icu
bin/st-swath
bin/st-wordbreak
bin/tgrep
share/split-thai/README.txt
-share/split-thai/thai-dict.el
-share/split-thai/thai-dict.elc
-share/split-thai/thai-utility.el
-share/split-thai/thai-utility.elc
-share/split-thai/thaidict
-share/split-thai/thaidict.tri
+share/split-thai/pthai.el
+share/split-thai/sampledict.txt
+share/split-thai/words
+share/split-thai/words.tri
Index: pkgsrc/textproc/split-thai/files/README.txt
diff -u pkgsrc/textproc/split-thai/files/README.txt:1.5 pkgsrc/textproc/split-thai/files/README.txt:1.6
--- pkgsrc/textproc/split-thai/files/README.txt:1.5 Sat Sep 5 18:02:36 2020
+++ pkgsrc/textproc/split-thai/files/README.txt Thu Mar 18 17:53:42 2021
@@ -4,6 +4,7 @@ NAME
st-swath
st-wordbreak
tgrep
+ pthai.el
SYNOPSIS
st-emacs|st-icu|st-swath|st-wordbreak [filename|text1 text2 ...|'blank']
@@ -25,6 +26,9 @@ DESCRIPTION
tgrep: grep-like utility using perl, see "tgrep -h"
+ pthai.el: emacs library for handling thai text in an emacs buffer,
+ including word splitting
+
EXAMPLES
split one or more text strings:
# st-swath แมวและหมา
@@ -74,8 +78,13 @@ NOTES
icu4c/source/data/brkitr/dictionaries/thaidict.txt and then
rebuild the whole library.
+ Also included in this package is an emacs library called "pthai"
+ (practice-thai). It can do word splitting, play mp3 audio for
+ thai words and a few other things.
+
SEE ALSO
swath(1), libthai(1), emacs(1), locale(1), uconv(1), iconv(1)
+ trans(1) from pkgsrc/textproc/translate-shell
BUGS
st-icu should also use the combined dictionary words.
Index: pkgsrc/textproc/split-thai/files/st-emacs
diff -u pkgsrc/textproc/split-thai/files/st-emacs:1.4 pkgsrc/textproc/split-thai/files/st-emacs:1.5
--- pkgsrc/textproc/split-thai/files/st-emacs:1.4 Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/st-emacs Thu Mar 18 17:53:42 2021
@@ -1,24 +1,27 @@
#!/bin/emacs --script
;;
-;; break thai string into words separated by spaces
+;; break thai string into words separated by spaces, using only
+;; 'thai-break-words from 'thai-word library with merged wordlist
;;
;; - if no args, process stdin
;; - if one arg and file exists with arg name, process file
-;; - else join get remainder of args and process
+;; - else join remainder of args and process
;;
;;(toggle-debug-on-error) ;; debug
-;; load custom dictionary first, 'thai-word-table is defvar
-(load "ST_SHARE_DIR/thai-dict" nil t)
-(load "ST_SHARE_DIR/thai-utility" nil t)
+;; turn off loading messages
+(setq pthai-verbose-wordloads nil)
+
+;; load pthai library
+(load "ST_SHARE_DIR/pthai" nil t)
;; split a thai line by spaces, return new line
(defun process-thai-line(line)
(with-temp-buffer
(insert line)
(goto-char (point-min))
- (split-thai-line)
+ (thai-break-words " " (line-end-position))
(buffer-string)))
;; hack to process stdin
@@ -48,6 +51,6 @@
(insert (mapconcat 'identity (cdddr command-line-args) " "))
(insert "\n"))
(goto-char (point-min))
- (split-thai)
+ (thai-break-words " " (point-max))
(write-region nil nil "/dev/stdout"))
(kill-emacs 0)
Index: pkgsrc/textproc/split-thai/files/st-swath
diff -u pkgsrc/textproc/split-thai/files/st-swath:1.2 pkgsrc/textproc/split-thai/files/st-swath:1.3
--- pkgsrc/textproc/split-thai/files/st-swath:1.2 Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/st-swath Thu Mar 18 17:53:42 2021
@@ -10,7 +10,7 @@ swath_cmd=ST_SHARE_BIN/swath
# use merged dictionary unless specified otherwise
if [ -z "$SWATHDICT" ]; then
- dictarg="-d ST_SHARE_DIR/thaidict.tri"
+ dictarg="-d ST_SHARE_DIR/words.tri"
fi
if [ "$#" -eq 0 ]; then
Index: pkgsrc/textproc/split-thai/files/st-wordbreak
diff -u pkgsrc/textproc/split-thai/files/st-wordbreak:1.1 pkgsrc/textproc/split-thai/files/st-wordbreak:1.2
--- pkgsrc/textproc/split-thai/files/st-wordbreak:1.1 Sat Sep 5 18:02:36 2020
+++ pkgsrc/textproc/split-thai/files/st-wordbreak Thu Mar 18 17:53:42 2021
@@ -25,7 +25,7 @@ my @tmp_combos = ();
#
sub read_dictionary {
# default dictionary
- my $fname = "ST_SHARE_DIR/thaidict";
+ my $fname = "ST_SHARE_DIR/words";
$fname = $opt_d if defined $opt_d;
die "error reading dictionary $fname" unless -f $fname && -s $fname;
Added files:
Index: pkgsrc/textproc/split-thai/files/pthai.el
diff -u /dev/null pkgsrc/textproc/split-thai/files/pthai.el:1.1
--- /dev/null Thu Mar 18 17:53:42 2021
+++ pkgsrc/textproc/split-thai/files/pthai.el Thu Mar 18 17:53:42 2021
@@ -0,0 +1,1848 @@
+;; Copyright (c) 2021 Sean Cole <scole%NetBSD.org@localhost>
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions
+;; are met:
+;;
+;; 1. Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;; 2. Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+;; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+;; BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;; POSSIBILITY OF SUCH DAMAGE.
+
+;; utilities for working with thai text in emacs buffers. includes
+;; functions to split/unsplit thai strings, thai time conversion,
+;; download and play mp3 audio for thai words from thai-language.com,
+;; clickthai-online.com, and thai2english.com, and currency
+;; conversions from x-rates.com. Also, it can look up thai words if
+;; vocabulary files are created. The 'customize interface is available
+;; for a few settings.
+;;
+;; HOW TO USE
+;; - in ~/.emacs/init.el, add library path and a require, e.g.:
+;; (add-to-list 'load-path "ST_SHARE_DIR")
+;; (require 'pthai)
+;; or load module directly:
+;; (load-file "ST_SHARE_DIR/pthai.el")
+;;
+;; - also in ~/.emacs/init.el, possibly bind keys to some commonly
+;; used functions:
+;; (global-set-key [f8] 'pthai-lookup)
+;; (global-set-key [f9] 'pthai-say-word)
+;; (global-set-key [f10] 'pthai-say-line)
+;; - 'customize can be used to set or add paths to words lists and
+;; dictionaries
+;; - M-x apropos pthai for available functions
+;;
+;; TODO
+;; - make info file for this module
+;; - when splitting, handle unknown/misspelled words better
+;; - look up word definitions on the fly (?)
+;; - keep original spacing when possible when splitting (?)
+;; - interface with pkgsrc/textproc/translate-shell (?),
+;; not sure about licensing issues
+;; - don't try to download and say single letters in pthai-say except maybe ๆ. this
+;; happens sometimes when word misspelled or unknown words found
+;; - do better breaking of words with ๆ in dictionaries, like สั้นๆ
+;; - for pthai-rwb, maybe use non-brute-force/dynamic-programming algorithm
+;; - get byte compile working
+;; - get initial loading of default wordlist and sample dictionary working
+;; with customize*
+;; - create directories as needed through customize interface (?)
+;; pthai-default-directory ~/.emacs.d/pthai
+;; pthai-audio-directory ~/.emacs.d/pthai/audio or specified
+;; dictionaries ~/.emacs.d/pthai/dictionary or specified w/ pthai-dictionary-list
+;; wordlists ~/.emacs.d/pthai/wordlist or specified w/ pthai-wordlist-list
+
+;; known issues:
+;; - doesn't always handle "long" strings well
+;; - pthai-say-word after pthai-split line, last word is not always played.
+;; this seems to be emacs *shell* issue
+;; - when linting or compiling file, need to "(require 'seq)" first(?)
+;;
+(require 'cus-edit) ;; custom* customize*
+(require 'ido) ;; ido-completing-read*
+(require 'mule-util) ;; nested alist functions
+(require 'seq) ;; seq-* functions
+(require 'subr-x) ;; string-trim* functions
+(require 'thai-word) ;; thai-word-table
+(require 'thingatpt) ;; thing-at-point*
+(require 'url) ;; url-* functions
+
+;; xxx "special" vars to set before loading module, not the emacs way?
+(unless (boundp 'pthai-bootstrap)
+ (defvar pthai-bootstrap nil "nil unless building for pkgsrc"))
+
+(unless (boundp 'pthai-verbose-wordloads)
+ (defvar pthai-verbose-wordloads t
+ "if non-nil, display word counts when loading dictionaries or wordlists"))
+
+;; "normal" module variables
+(defvar pthai-default-directory (concat user-emacs-directory "pthai/")
+ "default pthai directory (ensure ends with directory separator)")
+
+(defvar pthai-wordlist (make-hash-table :test 'equal)
+ "hash table of thai words mapped to 1")
+
+(defvar pthai-dictionary (make-hash-table :test 'equal)
+ "thaiword => '( def eng_class thai_class where definition, eng_classifiers, thai_classifiers are all lists of strings. empty definitions should be defined as nil \"ไก่\" => ( '(\"chicken\") nil
'(\"ตัว\") )")
+
+(defvar pthai-misc-punctuation-regexp
+ (regexp-opt
+ (list "~" "`" "!" "@" "#" "\$" "%" "^" "&" "*" "(" ")"
+ "-" "_" "=" "+" "\\" "|" "{" "}" "[" "]"
+ ";" ":" "'" "\"" "<" ">" "." "," "/" "?"
+ "ๆ" "ฯาฯ" "ฯ" "฿" "๏" "๚" "๛"))
+ "regexp of misc punctuation used for word splitting")
+
+(defvar pthai-rwb-tmp nil "temporary variable for pthai-rwb")
+
+(defgroup pthai nil
+ "Pthai dictionary, wordlist, and word-splitting."
+ :group 'applications)
+
+(defcustom pthai-use-external-splitters t
+ "use external programs to help word splitting, which may be slower"
+ :group 'pthai
+ :type 'boolean)
+
+(defcustom pthai-split-mode "biggest"
+ "Type of word splitting"
+ :group 'pthai
+ :set (lambda (sym val) (set-default sym val))
+ :type '(radio (const :tag "biggest words possible/fewest words" :value "biggest")
+ (const :tag "smallest words possible/most words" :value "smallest")
+ (const :tag "interactively display choices" :value "interactive")))
+
+(defcustom pthai-mp3-player nil
+ "default command of audio player for mp3 files"
+ :group 'pthai
+ :type 'string)
+
+(defcustom pthai-mp3-player-flags nil
+ "default command flag arguments used for pthai-mp3-player"
+ :group 'pthai
+ :type '(repeat string))
+
+(defun pthai-directory-create(dir)
+ "prompt user to create a directory if it doesn't exist"
+ (unless (file-directory-p dir)
+ (if (yes-or-no-p (concat dir " does not exist, create?"))
+ (or (make-directory dir)
+ (error "failed to create dir %s" dir)))))
+
+(defcustom pthai-audio-directory
+ (concat pthai-default-directory "audio")
+ "directory where mp3 audio files are download and saved to, as
+well as read from"
+ :group 'pthai
+ :set (lambda (sym val)
+ (set-default sym val)
+ (unless load-in-progress
+ (pthai-directory-create val)))
+ :type 'directory)
+
+;; utilities to manipulate the 'thai-word-table from the 'thai-word
+;; library. these functions should be merged into the 'thai-word
+;; module itself, but that module looks to have been written when
+;; 'thai-tis620 encoding was commonly used instead of utf-8 for thai
+;;
+;; 'thai-word-table uses a nested-alist from mule-util which looks
+;; like this:
+;; '(3585 1 ;; ก word ก
+;; (3591 1 ;; ง word กง
+;; (3585 t ;; ก
+;; (3634 t ;; า
+;; (3619 1))));; ร word กงการ
+;; (3585 1 ;; ก word กก
+;; (3621 1)))) ;; ล word กกล
+;;
+
+(defun pthai-twt-extract(nlist thaistr)
+ "helper function to reconstruct thai words from a nested alist,
+uses recursion"
+ (let ((ucode)
+ (complete))
+ (cond
+ ;; finished
+ ((not nlist) nil)
+
+ ;; (3591 1 ...
+ ((integerp (car nlist))
+ ;; xxx care about coding-system vars here?
+ (setq ucode (char-to-string (car nlist)))
+ (setq complete (cadr nlist))
+ (setq thaistr (concat thaistr ucode))
+ (cond
+ ;; t => no word at this depth
+ ((equal complete t)
+ (pthai-twt-extract (cddr nlist) thaistr))
+ ;; 1 => word at this depth
+ ((equal complete 1)
+ (append (list thaistr)
+ (pthai-twt-extract (cddr nlist) thaistr)))
+ (t
+ (error "invalid parsing for complete var"))))
+
+ ;; not finished
+ (t
+ (append (pthai-twt-extract (car nlist) thaistr)
+ (pthai-twt-extract (cdr nlist) thaistr))))))
+
+(defun pthai-twt-in-p(thaiword)
+ "return t if thaiword is in 'thai-word-table, nil otherwise"
+ (let ((result
+ (lookup-nested-alist
+ thaiword thai-word-table (length thaiword) 0 t)))
+ (and (listp result)
+ (integerp (car result))
+ (= 1 (car result)))))
+
+(defun pthai-twt-table-save(filename &optional alist)
+ "save thai words extracted from a nested-alist table to
+filename in utf8 format, one word per line. default is to save
+'thai-word-table if no alist argument given. Returns number of
+dictionary words."
+ (interactive "FName of file to save to: \nP")
+ (let ((thaiwords)
+ (elem)
+ (line_count)
+ (coding-system-for-read 'utf-8)
+ (coding-system-for-write 'utf-8)
+ (buffer-file-coding-system 'utf-8))
+ ;; default list or not
+ (setq alist (or alist
+ thai-word-table))
+
+ (unless (nested-alist-p alist)
+ (error "Invalid argument %s" alist))
+
+ ;; remove 'thai-words from 'thai-word-table
+ (setq alist (cdr alist))
+
+ (with-temp-buffer
+ ;; process per-letter list one at a time. could process whole
+ ;; list at once but maybe try to conserve memory resources
+ (while (setq elem (car alist))
+ (setq alist (cdr alist))
+ (setq thaiwords (pthai-twt-extract elem ""))
+
+ (dolist (elem thaiwords)
+ (insert elem "\n")))
+
+ (sort-lines nil (point-min) (point-max))
+ (setq line_count (count-lines (point-min) (point-max)))
+ (write-region nil nil filename)
+ line_count)))
+
+(defun pthai-twt-count (&optional alist)
+ "Count number of words in a nested alist. if no arg given,
+count 'thai-word-table words"
+ (interactive)
+ (let ((count 0)
+ (elem)
+ (thaiwords))
+ ;; default list or not
+ (setq alist (or alist thai-word-table))
+ (unless (nested-alist-p alist)
+ (error "Invalid argument %s" alist))
+ ;; remove 'thai-words from 'thai-word-table
+ (setq alist (cdr alist))
+ (while (setq elem (car alist))
+ (setq alist (cdr alist))
+ (setq thaiwords (pthai-twt-extract elem ""))
+ (setq count (+ count (length thaiwords))))
+ count))
+
+(defun pthai-twt-update (file &optional append)
+ "Update Thai word table by replacing the current word list with
+FILE, which is utf-8. If called with optional append argument,
+FILE is appended instead to the current word list. Does the same
+as 'thai-update-word-table, except that function expects
+'thai-tis620 encoding which appears to be the default format for
+at least emacs version <= 28.0"
+ (interactive "FThai word table file: \nP")
+ (let* ((coding-system-for-read 'utf-8)
+ (coding-system-for-write 'utf-8)
+ (buffer-file-coding-system 'utf-8)
+ (temp_file (make-temp-file "pthai-twt-update-620_")))
+ (unwind-protect
+ (with-temp-buffer
+ (insert-file-contents file)
+ (setq coding-system-for-write 'thai-tis620)
+ (write-file temp_file))
+ (thai-update-word-table temp_file append)
+ (delete-file temp_file)
+ (if pthai-verbose-wordloads
+ (message "%d words in 'thai-word-table" (pthai-twt-count)))
+ thai-word-table)))
+
+(defun pthai-split-tokenizer(text)
+ "subdivide text string into list of string tokens for hopefully
+more optimal word splitting, e.g., แมว99ไก่แม่ => '(แมว 99 ไก่แม่)"
+ (let (tmp tlist
+ (misc_punc (concat "^\\(" pthai-misc-punctuation-regexp "+\\)")))
+ (while (> (length text) 0)
+ ;; misc punctuation, numbers, thai text w/o numbers, thai numbers, non-thai text
+ (or (string-match misc_punc text)
+ (string-match "^\\([\u0e00-\u0e4f\u0e5a-\u0e7f]+\\)" text)
+ (string-match "^\\([\u0e50-\u0e59\.]+\\)" text)
+ (string-match "^\\(\\Ct+\\)" text)
+ (error (concat "invalid text/match for : " text)))
+ (setq tmp (match-string 1 text))
+ (setq text (replace-match "" t nil text))
+ ;; if match contains arabic number or space, split up more
+ (if (string-match-p "[0-9 \f\t\n\r\v]" tmp)
+ (setq tlist (append (reverse (pthai-split-subtokenizer tmp)) tlist))
+ (setq tlist (cons tmp tlist))))
+ (reverse tlist)))
+
+(defun pthai-split-subtokenizer(text)
+ "subdivide string into string list of numbers, whitespace, and
+non-numbers/non-whitespace. helper function for
+pthai-split-tokenizer"
+ (let (tmp tlist)
+ (while (> (length text) 0)
+ (or (string-match "^\\([0-9]+\\)" text) ;; number
+ (string-match "^\\([ \f\t\n\r\v]+\\)" text) ;; whitespace
+ (string-match "^\\([^0-9 \f\t\n\r\v]+\\)" text) ;; other
+ (error "invalid match"))
+ (setq tmp (match-string 1 text))
+ (setq text (replace-match "" t nil text))
+ (setq tlist (cons tmp tlist)))
+ (reverse tlist)))
+
+(defun pthai-split-all(text)
+ "function to split up thai text string using various
+programs. returns all found word segmentations as a list of
+strings, where each string is a group of words separated by
+spaces"
+ (let (tmp x slist olist)
+ (dolist (tmp (pthai-split-tokenizer text))
+ (if (string-match-p "^\\Ct+$" tmp)
+ ;; no thai chars, just keep as is
+ (setq olist (nconc olist (list (list tmp))))
+ ;; else match thai chars
+ (setq slist (pthai-thai-break-words tmp)) ;; thai-break-words
+ (when pthai-use-external-splitters
+ (dolist (x (list "st-icu" "st-swath")) ;; st-icu st-swath
+ (if (executable-find x)
+ (setq slist
+ (append
+ (split-string
+ (shell-command-to-string
+ (concat x " \"" tmp "\"")) "\n" t)
+ slist)))))
+ ;; pthai-rwb & st-wordbreak are recursive, only use with smaller strings
+ (when (< (length tmp) 60)
+ (setq slist (append (pthai-rwb tmp) slist))
+ (and pthai-use-external-splitters
+ (executable-find "st-wordbreak")
+ (setq slist
+ (append
+ (split-string
+ (shell-command-to-string
+ (concat "st-wordbreak -a " " \"" tmp "\"")) "\n" t)
+ slist))))
+ (setq slist (seq-uniq slist))
+ (setq olist (nconc olist (list slist)))))
+
+ (if (<= (length olist) 1)
+ (setq olist (car olist))
+ (setq olist (pthai-combinate olist))
+ (setq olist (mapcar 'pthai-join-strings olist)))
+
+ (setq olist (seq-uniq olist))))
+
+(defun pthai-split-string(text &optional smode)
+ "split up thai string into thai words. words are broken up
+either by biggest, smallest or interactively depending on
+'pthai-split-mode setting. split mode can be specified with
+optional arg smode using same values as 'pthai-split-mode
+setting. returns a string containing words separated by spaces"
+ (interactive "sword: ")
+ (let (slist)
+ (setq smode (or smode pthai-split-mode))
+ (setq slist (pthai-split-all text))
+ (setq slist (sort slist (lambda (a b) (< (length (split-string a))
+ (length (split-string b))))))
+ (cond
+ ((< (length slist) 1)
+ text)
+ ((or (= (length slist) 1)
+ (string= smode "biggest"))
+ (car slist))
+ ((string= smode "smallest")
+ (car (reverse slist)))
+ ((string= smode "interactive")
+ (ido-completing-read "split words: " slist))
+ (t
+ (car slist)))))
+
+(defun pthai-cycle-split-mode()
+ "cycle to next word splitting mode. return string of current
+mode. message user when called interactively"
+ (interactive)
+ (cond ((string= pthai-split-mode "biggest")
+ (setq pthai-split-mode "smallest"))
+ ((string= pthai-split-mode "smallest")
+ (setq pthai-split-mode "interactive"))
+ ((string= pthai-split-mode "interactive")
+ (setq pthai-split-mode "biggest")))
+ (if (called-interactively-p 'any)
+ (message pthai-split-mode))
+ pthai-split-mode)
+
+(defun pthai-thai-break-words(text)
+ "split thai string using thai-break-words from 'thai-word
+library. returns list of lines with word-strings separated by
+spaces"
+ (with-temp-buffer
+ (insert text)
+ (goto-char (point-min))
+ (thai-break-words " ")
+ (split-string (buffer-string) "\n" t)))
+
+(defun pthai-join-strings(alist)
+ "join list of strings to form a string of words, only adding
+ spaces where needed, ('a ' 'b' ' d ') => 'a b d '"
+ (let (n tmp rlist
+ (f_space "^[ \t\n\r]+")
+ (b_space "[ \t\n\r]+$"))
+
+ (if (<= (length alist) 1)
+ (setq rlist alist)
+ (dotimes (n (- (length alist) 1))
+ (setq tmp (nth n alist))
+ (setq rlist (cons tmp rlist))
+ (unless (or (string-match-p b_space tmp)
+ (string-match-p f_space (nth (+ 1 n) alist)))
+ (setq rlist (cons " " rlist))))
+ (setq rlist (cons (nth (- (length alist) 1) alist) rlist))
+ (setq rlist (reverse rlist)))
+ (string-join rlist)))
+
+(defun pthai-unsplit-line()
+ "remove spaces for thai words in line starting at point"
+ (interactive)
+ (pthai-unsplit-region (point) (line-end-position)))
+
+(defun pthai-unsplit-buffer()
+ "remove spaces for thai words in buffer"
+ (interactive)
+ (pthai-unsplit-region (point-min) (point-max)))
+
+(defun pthai-unsplit-region(&optional p1 p2)
+ "remove space and tabs from thai words in region"
+ (interactive)
+ (let ((num_rexp "\\([0-9\u0e50-\u0e59]+\\)")
+ (nonnum_rexp "\\([\u0e00-\u0e4f\u0e5a-\u0e7f]\\)")
+ (thai_rexp "\\(\\ct+\\)")
+ (nonthai_rexp "\\(\\Ct+\\)")
+ (tmp_regexp))
+ (cond
+ ((and p1 p2)
+ nil)
+ ((use-region-p)
+ (setq p1 (region-beginning))
+ (setq p2 (region-end)))
+ (t
+ (error "no region/text selected")))
+ (save-restriction
+ (narrow-to-region p1 p2)
+ (goto-char (point-min))
+ (setq tmp_regexp (concat nonnum_rexp "\\([ \t]+\\)" nonnum_rexp))
+ ;; remove spaces between thai words excluding numbers
+ (while (re-search-forward tmp_regexp nil t)
+ (replace-match (concat (match-string 1) (match-string 3)))))
+ (goto-char p1)))
+
+(defun pthai-split-region(&optional p1 p2)
+ "in a region defined by p1 and p2, split thai words into
+subwords in separated by spaces"
+ (interactive)
+ (let (line)
+ (cond
+ ((and p1 p2)
+ nil)
+ ((use-region-p)
+ (setq p1 (region-beginning))
+ (setq p2 (region-end)))
+ (t
+ (error "no region/text selected")))
+ (save-excursion
+ (save-restriction
+ (narrow-to-region p1 p2)
+ (goto-char (point-min))
+ (while (not (eobp))
+ (setq line
+ (buffer-substring-no-properties (line-beginning-position)
+ (line-end-position)))
+ (when (string-match-p "\\ct" line)
+ (kill-line)
+ (insert (mapconcat 'pthai-split-string (pthai-split-tokenizer line) "")))
+ (forward-line 1))
+ (goto-char p1)))))
+
+(defun pthai-split-word()
+ "split thai word at point"
+ (interactive)
+ (let (bounds p1 p2)
+ (setq bounds (bounds-of-thing-at-point 'word))
+ (setq p1 (car bounds))
+ (setq p2 (cdr bounds))
+ (unless (and p1 p2)
+ (error "word not found"))
+ (pthai-split-region p1 p2)))
+
+(defun pthai-split-line()
+ "split thai words from point to end of line"
+ (interactive)
+ (pthai-split-region (point) (line-end-position)))
+
+(defun pthai-split-buffer()
+ "split thai words in buffer"
+ (interactive)
+ (pthai-split-region (point-min) (point-max)))
+
+(defun pthai-combinate(a)
+ "take a list of lists, returns a list of lists with 1 element
+ from each list combined:
+ '((A B) (1) (c d)) => '((A 1 c) (A 1 d) (B 1 c) (B 1 d))"
+ (let (ans_elem ans_list left_elem left_list tmp_elem tmp_list)
+ (setq ans_list (mapcar 'list (car a)))
+ (setq left_list a)
+ (while (setq left_list (cdr left_list))
+ (setq tmp_list nil)
+ (dolist (ans_elem ans_list)
+ (dolist (left_elem (nth 0 left_list))
+ (setq tmp_elem (append ans_elem (list left_elem)))
+ (setq tmp_list (append tmp_list (list tmp_elem)))))
+ (setq ans_list tmp_list))
+ ans_list))
+
+(defun pthai-url-clickthai(thaiword)
+ "return mp3 url for a thai word from clickthai-online.com or
+nil if unavailable"
+ (let ((url "https://www.clickthai-online.com/wbtde/dictionary.php")
+ (url-request-method "POST")
+ (url-request-extra-headers
+ `(("Content-Type" . "application/x-www-form-urlencoded; charset=utf-8")))
+ (url-request-data
+ (concat "action=search&search=" (url-encode-url thaiword)))
+ (play_url "http://www.clickthai-online.com/cgi-bin/playmp3.pl?")
+ (sound_url)
+ (tmp)
+ (tmp_url))
+ (with-current-buffer (url-retrieve-synchronously url)
+ (goto-char (point-min))
+ ;; <a href="javascript:play('../cgi-bin/playmp3.pl?322476');">วัน</a>
+ (while (and (not sound_url)
+ (re-search-forward "playmp3\\.pl\\?\\([0-9]+\\)');\">\\(.*\\)</a>" nil t))
+ (and (setq tmp (decode-coding-string (match-string 2) 'utf-8))
+ (setq tmp_url (concat play_url (match-string 1)))
+ (string= tmp thaiword)
+ (setq sound_url tmp_url))))
+ sound_url))
+
+(defun pthai-url-thai-language(thaiword)
+ "return mp3 url for a thai word from thai-language.com or nil
+if unavailable"
+ (let* ((title_word)
+ ;; <title>thai-language.com - แมว</title>
+ (title_regexp "<title>thai-language.com - \\(.*\\)</title>")
+ (id)
+ ;; <link rel="canonical" href="http://www.thai-language.com/id/131188" />
+ (id_regexp "<link rel=\"canonical\" href=\"http://www.thai-language.com/id/\\([0-9]+\\)\"")
+ (click_regexp)
+ (sound_url)
+ (tmmode 0) ;; exact=0 contains=1 starts_with=2 ends_with=3
+ (mp3_url "http://thai-language.com/mp3/")
+ (url "http://thai-language.com/dict")
+ (url-request-method "POST")
+ (url-request-extra-headers
+ `(("Content-Type" . "application/x-www-form-urlencoded; charset=utf-8")))
+ (url-request-data
+ (concat "action=/default.aspx&tmode="
+ (number-to-string tmmode) "&search=" (url-encode-url thaiword))))
+ (with-current-buffer (url-retrieve-synchronously url)
+ (and (goto-char (point-min))
+ (re-search-forward title_regexp nil t 1)
+ (setq title_word (decode-coding-string (match-string 1) 'utf-8)))
+ (and (goto-char (point-min))
+ (re-search-forward id_regexp nil t 1)
+ (setq id (match-string 1)))
+ (and id
+ (goto-char (point-min))
+ ;; <a onClick=PlayAudioFile('/mp3/E131188.mp3') style='cursor:pointer;'>
+ ;; <img src='/img/speaker.gif' /></a>
+ (setq click_regexp
+ (concat "a onClick=PlayAudioFile('/mp3/\\([A-Z]" id "\\.mp3\\)') "))
+ (re-search-forward click_regexp nil t 1)
+ (setq sound_url (concat mp3_url (match-string 1)))))
+ ;; title_word needs to match thaiword because query may return
+ ;; other pages, e.g., "บักหำ" returns page for "หำ"
+ ;;(message (concat "title=" title_word " id=" id " url=" sound_url)) ;; debug
+ (and title_word
+ (string= title_word thaiword)
+ id
+ sound_url)))
+
+(defun pthai-url-thai2english(thaiword)
+ "return mp3 url for a thai word from thai2english.com or nil if
+unavailable"
+ (let* ((t2e_url "https://www.thai2english.com")
+ (fetch_url (url-encode-url (concat t2e_url "/search?q=" thaiword)))
+ (id_match (concat t2e_url "/dictionary/\\([0-9]+\\)\\.html"))
+ (speaker_url "https://ast.thai2english.com/img/circle-icons/speaker.svg")
+ (sounds_url "https://ast.thai2english.com/sounds/"))
+ (with-current-buffer (url-retrieve-synchronously fetch_url)
+ (goto-char (point-min))
+ (and (search-forward speaker_url nil t)
+ (goto-char (point-min))
+ (re-search-forward id_match nil t 1)
+ (concat sounds_url (match-string 1) ".mp3")))))
+
+(defun pthai-x-rates-currencies()
+ "from www.x-rates.com, get an alist of supported currencies and their names
+'((\"USD\" . \"US Dollar\") (\"THB\" . \"Thai Baht\") ... )"
+ (let ((x_url "https://www.x-rates.com")
+ (clist)
+ ;; match a string like
+ ;;<li><a href='https://www.x-rates.com/table/?from=ARS' onclick="submitConverterArgs(this)" rel='ratestable'>Argentine Peso</a></li>
+ (currency_rexp
+ (concat "<li><a href='https://www.x-rates.com/table/\\?from=\\([A-Z]\\{3\\}\\)' "
+ "onclick=.* rel='ratestable'>\\(.*\\)</a></li>")))
+ (with-current-buffer (url-retrieve-synchronously x_url)
+ (goto-char (point-min))
+ (while (re-search-forward currency_rexp nil t)
+ (setq clist (cons (cons (match-string 1) (match-string 2)) clist))))
+ (unless (> (length clist) 40)
+ (error "error fetching currency types"))
+ (reverse clist)))
+
+(defun pthai-x-rates-convert(from to amount)
+ "convert a currency (USD THB ...) amount to another currency,
+from x-rates.com. returns converted amount as a number. message
+user conversion info if called interactively"
+ (let ((x_url "https://www.x-rates.com")
+ (curr_alist (pthai-x-rates-currencies))
+ (from_name)
+ (to_name)
+ (url)
+ (mstr)
+ (converted))
+ (unless (and amount (numberp amount) (> amount 0.0))
+ (error "invalid monetary amount"))
+ (setq amount (number-to-string amount))
+ ;; sanity check dollar & baht, and then look up names
+ (unless (and (assoc "USD" curr_alist)
+ (assoc "THB" curr_alist)
+ (setq to_name (cdr (assoc to curr_alist)))
+ (setq from_name (cdr (assoc from curr_alist))))
+ (error (concat "unknown currency to/from type(s): to=" to " from=" from)))
+ ;; fetch current rates, then match something like
+ ;; <a href='https://www.x-rates.com/graph/?from=USD&to=THB'>30.171404</a>
+ (setq url (concat x_url "/table/?from=" from "&amount=" amount))
+ (setq mstr (concat "?from=" from "&to=" to "'>"))
+ (with-current-buffer (url-retrieve-synchronously url)
+ (goto-char (point-min))
+ (and (search-forward mstr nil t)
+ (re-search-forward "\\([0-9\.]+\\)" nil t)
+ (setq converted (format "%3.2f" (string-to-number (match-string 1))))))
+ (unless converted
+ (error (concat "download conversion match not found for " url)))
+ (if (not (or executing-kbd-macro noninteractive))
+ (message "%s %s = %s %s" amount from_name converted to_name))
+ (string-to-number converted)))
+
+(defun pthai-currency-dollar-to-baht(&optional amount)
+ "convert us dollars to baht"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "dollars: " 1)))
+ (pthai-x-rates-convert "USD" "THB" amount))
+
+(defun pthai-currency-baht-to-dollar(&optional amount)
+ "convert baht to us dollars"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "baht: " 1)))
+ (pthai-x-rates-convert "THB" "USD" amount))
+
+(defun pthai-currency-dollar-to-euro(&optional amount)
+ "convert us dollars to euros"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "dollars: " 1)))
+ (pthai-x-rates-convert "USD" "EUR" amount))
+
+(defun pthai-currency-euro-to-dollar(&optional amount)
+ "convert euros to us dollars"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "euros: " 1)))
+ (pthai-x-rates-convert "EUR" "USD" amount))
+
+(defun pthai-currency-dollar-to-pound(&optional amount)
+ "convert us dollars to british pounds"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "dollars: " 1)))
+ (pthai-x-rates-convert "USD" "GBP" amount))
+
+(defun pthai-currency-pound-to-dollar(&optional amount)
+ "convert british pounds to us dollars"
+ (interactive)
+ (unless (numberp amount)
+ (setq amount (read-number "pounds: " 1)))
+ (pthai-x-rates-convert "GBP" "USD" amount))
+
+(defun pthai-randomize(lst)
+ "take a list and return it in random order"
+ (let (rlst n)
+ (while lst
+ (setq n (random (length lst)))
+ (setq rlst (cons (nth n lst) rlst))
+ ;; drop nth element
+ (setq lst (append (seq-take lst n) (seq-drop lst (1+ n)))))
+ rlst))
+
+(defun pthai-soundurl(thaiword)
+ "return mp3 sound url for thai word from either thai-language,
+clickthai or thai2english, or nil if not found. returns nil if no
+thaiword given"
+ (let ((fetchers (list 'pthai-url-clickthai
+ 'pthai-url-thai-language
+ 'pthai-url-thai2english))
+ (tmp)
+ (url))
+ (when (string-match-p "\\ct+" thaiword)
+ (setq fetchers (pthai-randomize fetchers))
+ (while (and (not url) fetchers)
+ (setq tmp (car fetchers))
+ (setq fetchers (cdr fetchers))
+ (setq url (funcall tmp thaiword))))
+ url))
+
+(defun pthai-download(thaiword)
+ "fetch mp3 soundfile for thai word download from web to default
+audio directory. returns path to downloaded file or nil if unable
+to download"
+ (let ((file (pthai-soundpath thaiword))
+ (url (pthai-soundurl thaiword)))
+ (and (string-match-p "\\ct+" thaiword)
+ url
+ (url-copy-file url file t)
+ file)))
+
+(defun pthai-soundpath(thaiword)
+ "generate default sound path for a thaiword. file may or may
+not already exist"
+ (unless (and pthai-audio-directory
+ (file-directory-p pthai-audio-directory))
+ (error "pthai-audio-directory \"%s\" not found" pthai-audio-directory))
+ (expand-file-name (concat pthai-audio-directory "/" thaiword ".mp3")))
+
+(defun pthai-soundfile-downloaded-p(thaiword)
+ "return non-nil if a soundfile download already attempted for a
+thai word"
+ (file-exists-p (pthai-soundpath thaiword)))
+
+(defun pthai-soundfile-playable-p(thaiword)
+ "return non-nil if a soundfile downloaded successfully for a
+thai word"
+ (and (pthai-soundfile-downloaded-p thaiword)
+ (> (file-attribute-size (file-attributes (pthai-soundpath thaiword))) 0)
+ t))
+
+(defun pthai-soundfiles-available-p(tlist)
+ "return non-nil if the all sound files for a list of thai words
+are available in audio directory"
+ (let (tmp)
+ (setq tlist (mapcar 'pthai-soundfile-playable-p tlist))
+ (setq tmp (seq-filter (lambda (x) x) tlist))
+ (= (length tmp) (length tlist))))
+
+(defun pthai-soundfiles-sort(tlist)
+ "sort list of list of thai strings in order of most available
+audio files as a percentage, or containing the biggest words"
+ (sort tlist
+ (lambda (a b)
+ (let (a_total a_found a_percent b_total b_found b_percent)
+ (setq a_total (length a))
+ (setq b_total (length b))
+ (setq a_found (length (seq-filter (lambda (x) x) (mapcar 'pthai-soundfile-playable-p a))))
+ (setq b_found (length (seq-filter (lambda (x) x) (mapcar 'pthai-soundfile-playable-p b))))
+ (setq a_percent (round (* 100.0 (/ (float a_found) (float a_total)))))
+ (setq b_percent (round (* 100.0 (/ (float b_found) (float b_total)))))
+ (cond
+ ;; use biggest length of first word if all rest same
+ ((and (= a_total b_total)
+ (= a_percent b_percent))
+ (> (length (car a)) (length (car b))))
+ ;; use biggest words if percentage same
+ ((= a_percent b_percent)
+ (< a_total b_total))
+ ;; else highest percent of words available
+ (t
+ (> a_percent b_percent)))))))
+
+(defun pthai-say(&optional text force_dl no_message)
+ "split up text string, download and play audio for thai
+words. return definitions of words as a string. also say arabic
+or thai numbers in thai. how words are split depends on variable
+'pthai-split-mode and words in dictionary and wordlist"
+ (interactive "sword: ")
+ (let (audio-p tmp tlist word words defs)
+ (unless text
+ (setq text (read-string "word: ")))
+ (setq text (downcase (string-trim text)))
+
+ ;; check input word as given, subdividing as necessary
+ (cond
+ ;; empty string, do nothing
+ ((< (length text) 1)
+ nil)
+ ;; "known" thai word
+ ((pthai-any-p text)
+ (setq tlist (list text)))
+ ;; non-thai word that has thai-word definition
+ ((and (string-match-p "^\\Ct+$" text)
+ (setq tmp (pthai-lookup text)))
+ (setq tlist tmp))
+ ;; everything else: thai words, mixed, english words. doing
+ ;; multiple splits here trying to keep the string lengths "small"
+ (t
+ (setq tlist
+ (apply #'append
+ (mapcar 'split-string
+ (mapcar 'pthai-split-string (split-string text)))))))
+
+ ;; "translate" each word if not thai
+ (dolist (word tlist)
+ (cond
+ ;; number
+ ((or (string-match-p "^[0-9]+$" word)
+ (string-match-p "^[๐๑๒๓๔๕๖๗๘๙]+$" word))
+ (setq words (cons word words)))
+ ;; non-thai word with definition
+ ((and (string-match-p "^\\Ct+$" word)
+ (setq tmp (pthai-lookup word)))
+ (setq words (nconc tmp words)))
+ ;; thai word, non-thai word w/o definition, remainder
+ (t
+ (setq words (cons word words)))))
+ (setq words (reverse words))
+
+ ;; play audio and display meaning for each word when possible
+ (dolist (word words)
+ (cond
+ ;; arabic numbers & thai numbers more than one digit
+ ((or (string-match-p "^[0-9]+" word)
+ (string-match-p "^[๐๑๒๓๔๕๖๗๘๙]\\{2,\\}" word))
+ (pthai-say-number word)
+ (setq tmp (format "%s = %s" word (pthai-number word)))
+ (setq defs (cons tmp defs)))
+ ;; english words, implies thai meaning not found
+ ((string-match-p "[A-Za-z]" word)
+ (setq tmp (format "? = %s" word))
+ (unless no_message (message tmp))
+ (setq defs (cons tmp defs)))
+ ;; thai strings & single thai digits
+ ((string-match-p "^\\ct+$" word)
+ (setq audio-p t)
+ (catch 'pthai-say-word
+ ;; file already there and no forced download
+ (and (not force_dl)
+ (pthai-soundfile-playable-p word)
+ (pthai-play-thaiword word)
+ (throw 'pthai-say-word t))
+ ;; file not there or forced download, and word in dictionary
+ (when (and (or force_dl (not (pthai-soundfile-downloaded-p word)))
+ (pthai-any-p word))
+ (if (not (pthai-download word))
+ (write-region "" nil (pthai-soundpath word))
+ (pthai-play-thaiword word)
+ (throw 'pthai-say-word t)))
+ ;; split up word into possible combinations
+ (setq tlist (mapcar 'split-string (pthai-split-all word)))
+ ;; delete original since if here it wasn't playable
+ ;;(setq tlist (delete (list word) tlist))
+ ;; sort by fewest missing pieces
+ (setq tlist
+ (sort tlist
+ (lambda (a b)
+ (< (length (seq-filter
+ (lambda (x) x) (mapcar 'pthai-soundfile-playable-p a)))
+ (length (seq-filter
+ (lambda (x) x) (mapcar 'pthai-soundfile-playable-p b)))))))
+ ;; play if all pieces found
+ (dolist (tmp tlist)
+ (when (pthai-soundfiles-available-p tmp)
+ (mapc 'pthai-play-thaiword tmp)
+ (throw 'pthai-say-word t)))
+ ;; for each split group, try to download missing pieces and play if successful
+ (dolist (tmp tlist)
+ (mapc (lambda (x)
+ (when (or force_dl (not (pthai-soundfile-downloaded-p x)))
+ (or (pthai-download x)
+ (write-region "" nil (pthai-soundpath x)))))
+ tmp)
+ (when (pthai-soundfiles-available-p tmp)
+ (mapc 'pthai-play-thaiword tmp)
+ (throw 'pthai-say-word t)))
+ ;; if any audio available, play the splitting with the most
+ ;; words available e.g., เสื้อโค้ทน้ำผลไม้ปั่นแม่น้ำใจ
+ (setq tlist (pthai-soundfiles-sort tlist))
+ (when (and (setq tmp (car tlist))
+ (seq-contains-p (mapcar 'pthai-soundfile-playable-p tmp) 't))
+ (mapc (lambda (x)
+ (and (pthai-soundfile-playable-p x)
+ (pthai-play-thaiword x)))
+ tmp)
+ (throw 'pthai-say-word t))
+ ;; if made it here, no audio found for word(s)
+ (setq audio-p nil))
+
+ ;; message meaning after playing
+ (setq tmp (or (pthai-lookup word)
+ (and (pthai-classifier-p word) (list "classifier"))))
+ (if tmp
+ (setq tmp (format "%s = %s" word (string-join tmp ", ")))
+ (setq tmp (format "%s = ?" word)))
+ (when (and (not audio-p) (not no_message))
+ (setq tmp (concat tmp " (audio not found)")))
+ (unless no_message (message tmp))
+ (setq defs (cons tmp defs)))))
+ (setq defs (reverse defs))
+ ;; message all defs found at end
+ (unless (or no_message (not defs))
+ (message (string-join defs "\n")))
+ defs))
+
+(defun pthai-play-thaiword(thaiword &optional exe exe_args)
+ "play mp3 audio file for thai word, assumes thaiword in default
+sound directory. depends on external mp3 player. optionally,
+pass executable and list of command args to play audio
+or set 'pthai-mp3-player/'pthai-mp3-player-flags. returns t if
+playing successful"
+ ;; there is play-sound-file built-in with emacs but it only supports
+ ;; .wav or .au, not .mp3
+ (let
+ ;; list of command and its args. args for quiet not required
+ ;; since output discarded anyway
+ ((commands (list (list "mpg123" "-q")
+ (list "afplay")
+ (list "mplayer" "-really-quiet")
+ (list "mpv" "--really-quiet")
+ (list "play" "-q") ;; sox
+ (list "amp" "-q")
+ (list "mpg321" "-q")))
+ (command)
+ (tmp))
+ ;; custom setting
+ (when pthai-mp3-player
+ (if (not pthai-mp3-player-flags)
+ (setq tmp (list pthai-mp3-player))
+ (setq tmp (cons pthai-mp3-player pthai-mp3-player-flags)))
+ (setq commands (cons tmp commands)))
+ ;; passed-in commands
+ (when exe
+ (if (not exe_args)
+ (setq tmp (list exe))
+ (setq tmp (cons exe exe_args)))
+ (setq commands (cons tmp commands)))
+ ;; find first audio command available, from passed-in command,
+ ;; custom setting or default values
+ (setq command
+ (seq-find (lambda (x) (executable-find (car x))) commands))
+
+ (cond
+ ((not command)
+ (error "no mp3 player found"))
+ ((not (pthai-soundfile-playable-p thaiword))
+ (error "file not playable or existing: %s" (pthai-soundpath thaiword)))
+ (t
+ (setq tmp
+ (apply #'call-process (car command) nil nil nil
+ (append (cdr command)
+ (list (expand-file-name (pthai-soundpath thaiword))))))
+ (unless (= tmp 0)
+ (error "non-zero exit (%d) for \"%s %s\""
+ tmp
+ (string-join command " ")
+ (pthai-soundpath thaiword)))
+ tmp))))
+
+(defun pthai-say-word(&optional thaiword force_dl)
+ "download and play audio for thai word. if no word given, get
+word at point. if no word at point, prompt user. optionally
+force downloading of audio. see also pthai-say"
+ (interactive)
+ (setq thaiword (or thaiword
+ (thing-at-point 'word t)
+ (read-string "thai word: ")))
+ (pthai-say thaiword force_dl))
+
+(defun pthai-say-line(&optional thailine force_dl)
+ "download and play audio for current thai line, optionally force
+downloading of audio"
+ (interactive)
+ (setq thailine (or thailine
+ (thing-at-point 'line)
+ (read-string "thai line: ")))
+ (setq thailine (string-trim thailine))
+ (pthai-say thailine force_dl))
+
+(defun pthai-say-buffer(&optional force_dl)
+ "download and play audio for words in buffer, optionally force
+downloading of audio"
+ (interactive)
+ (let (line)
+ (goto-char (point-min))
+ ;; process line-by-line to help reduce memory/stack usage
+ (save-excursion
+ (while (not (eobp))
+ (setq line
+ (buffer-substring-no-properties (line-beginning-position)
+ (line-end-position)))
+ (setq line (string-trim line))
+ (pthai-say line force_dl)
+ (forward-line 1)))))
+
+(defun pthai-say-region(&optional force_dl)
+ "download and play audio for highlighted region"
+ (interactive)
+ (save-excursion
+ (let (line region)
+ (unless (use-region-p)
+ (error "no region selected"))
+ (setq region (buffer-substring-no-properties
+ (region-beginning) (region-end)))
+ (with-temp-buffer
+ (insert region)
+ (pthai-say-buffer force_dl)))))
+
+(defun pthai-say-number(num)
+ "say number (thai or arabic) in thai. if arabic number, first
+convert to thai. returns thai number (string), or nil if can't
+convert. only integers < abs(10 million) are supported. for
+interactive use, use 'pthai-say instead"
+ (let* ((powers '(("1" . "เอ็ด") ("10" . "สิบ") ("20" . "ยี่สิบ")
+ ("100" . "ร้อย") ("1000" . "พัน") ("10000" . "หมื่น")
+ ("100000" . "แสน") ("1000000" . "ล้าน")))
+ (fn2w (lambda (x) (cdr (assoc x powers))))
+ (negative "ลบ")
+ (max_num 9999999)
+ (is_neg)
+ (anum)
+ (rnum)
+ (tlist)
+ (tmp)
+ (power)
+ (n))
+ (unless (stringp num)
+ (setq num (number-to-string num)))
+ ;; negative number
+ (when (string-match-p "^-" num)
+ (setq num (replace-regexp-in-string "^-" "" num))
+ (setq is_neg t))
+ (unless (string-match-p "^[0-9๐๑๒๓๔๕๖๗๘๙]+" num)
+ (error (concat "not valid number " num)))
+ (if (string-match-p "^[๐๑๒๓๔๕๖๗๘๙]+" num)
+ (setq num (pthai-number num)))
+ (unless (string-match-p "^[0-9]+$" num)
+ (setq num (replace-regexp-in-string "[^0-9]" "" num)))
+ (setq anum (string-to-number num))
+ (cond ((> anum max_num)
+ (warn (concat "audio only for numbers <= abs(" (number-to-string max_num) ")")))
+ ((< anum 10)
+ (setq tlist (list anum)))
+ (t
+ (setq rnum (reverse (split-string (pthai-number num) "" t)))
+ ;; digit 1
+ (setq tmp (nth 0 rnum))
+ (cond ((string= "๑" tmp)
+ (setq tlist (cons (funcall fn2w "1") tlist)))
+ ((not (string= "๐" tmp))
+ (setq tlist (cons tmp tlist))))
+ ;; digit 2
+ (when (setq tmp (nth 1 rnum))
+ (cond ((string= "๑" tmp)
+ (setq tlist (cons (funcall fn2w "10") tlist)))
+ ((string= "๒" tmp)
+ (setq tlist (cons (funcall fn2w "20") tlist)))
+ ((not (string= "๐" tmp))
+ (setq tlist (cons (funcall fn2w "10") tlist))
+ (setq tlist (cons tmp tlist)))))
+ ;; digits 3-7
+ (setq power "10")
+ (dotimes (n 5)
+ (when (setq tmp (nth (+ n 2) rnum))
+ (setq power (concat power "0"))
+ (when (not (string= tmp "๐"))
+ (setq tlist (cons (funcall fn2w power) tlist))
+ (setq tlist (cons tmp tlist)))))))
+ ;; convert all to strings
+ (setq tlist
+ (mapcar (lambda (x) (if (numberp x) (number-to-string x) x))
+ tlist))
+ ;; convert all arabic numbers to thai
+ (setq tlist
+ (mapcar (lambda (x) (if (string-match-p "^[0-9]+" x)
+ (pthai-number x) x))
+ tlist))
+ ;; negative
+ (if is_neg (setq tlist (cons negative tlist)))
+ ;; say the number
+ (dolist (tmp tlist)
+ (pthai-say tmp))
+ ;; return thai number as a string
+ (setq tmp (pthai-number num))
+ (if is_neg (setq tmp (concat "-" tmp)))
+ (if (called-interactively-p 'any)
+ (message tmp))
+ tmp))
+
+(defun pthai-rwb(thaistr)
+ "return list of space separated string of possible word breaks
+for a thai string, uses recursion. can only handle thai words"
+ (cond ((<= (length thaistr) 0)
+ nil)
+ ((string-match-p "\\Ct" thaistr)
+ (error (concat "cannot split non-thai strings: " thaistr)))
+ (t (let ((retval))
+ (setq thaistr (replace-regexp-in-string "[[:space:]]+" "" thaistr))
+ (setq pthai-rwb-tmp nil)
+ (pthai-rwb-helper thaistr (length thaistr) "")
+ (setq retval pthai-rwb-tmp)
+ (setq pthai-rwb-tmp nil)
+ (or retval
+ (list thaistr))))))
+
+(defun pthai-rwb-helper(str n result)
+ "recursive helper function for pthai-rwb"
+ (let ((i 1)
+ (prefix)
+ (ss))
+ (catch 'done
+ (while (<= i n)
+ (setq prefix (substring-no-properties str 0 i))
+ (when (pthai-any-p prefix)
+ (when (= i n)
+ (setq result (concat result prefix))
+ (setq pthai-rwb-tmp (cons result pthai-rwb-tmp))
+ (throw 'done nil))
+ (setq ss (substring-no-properties str i nil))
+ (pthai-rwb-helper ss (length ss) (concat result prefix " ")))
+ (setq i (1+ i))))))
+
+(defun pthai-number(&optional num)
+ "convert a number between thai and arabic, and vice versa.
+message user converted value if called interactively"
+ (interactive)
+ (let ((nlist '(("0" . "๐") ("1" . "๑") ("2" . "๒") ("3" . "๓")
+ ("4" . "๔") ("5" . "๕") ("6" . "๖") ("7" . "๗")
+ ("8" . "๘") ("9" . "๙")))
+ (numa)
+ (res))
+ (unless num
+ (setq num (read-string "number: ")))
+ (unless (stringp num)
+ (setq num (number-to-string num)))
+ (setq numa (split-string num "" t))
+ (if (string-match-p "[0-9]+" num)
+ (setq res
+ (mapconcat (lambda(x) (or (cdr (assoc x nlist)) x)) numa ""))
+ (setq res
+ (mapconcat (lambda(x) (or (car (rassoc x nlist)) x)) numa "")))
+ (if (called-interactively-p 'any)
+ (message res))
+ res))
+
+(defun pthai-parse-hour-minute-second(timestr &optional thaitime)
+ "parse hour-minute-second strings like 7 1pm 12:32pm 22:07
+9:45p.m. 0645 3.27am 15:23:55 or \"now\". returns list of (SEC
+MIN HOUR), with nil for each element not parsed"
+ (let ((ampm_rexp "\\([AaPp]\.?[Mm]\.?\\)$")
+ am_pm second minute hour tlist)
+ ;; "now"
+ (if (string-match-p "^[Nn][Oo][Ww]$" timestr)
+ (setq timestr
+ (nth 3 (split-string
+ (pthai-time-now (not thaitime))))))
+ ;; strip out anything that doesn't look like a time
+ (setq timestr
+ (replace-regexp-in-string "[^0-9:\.pPaAmM]" "" timestr))
+ ;; determine am/pm and remove from string
+ (when (string-match-p ampm_rexp timestr)
+ (if (string-match-p "[Aa]" timestr)
+ (setq am_pm "am")
+ (setq am_pm "pm"))
+ (setq timestr
+ (replace-regexp-in-string ampm_rexp "" timestr)))
+ ;; parse hour/minute/seconds digits
+ (cond
+ ;; 1 or 2 digit, 1 11
+ ((string-match "^\\([0-9]\\{1,2\\}\\)$" timestr)
+ (setq hour (match-string 1 timestr))
+ (setq minute "0")
+ (setq second "0"))
+ ;; 4 digits, 0645
+ ((string-match "^\\([0-9]\\{4\\}\\)$" timestr)
+ (setq hour (substring timestr 0 2))
+ (setq minute (substring timestr 2 4))
+ (setq second "0"))
+ ;; 6 digits, 064532
+ ((string-match "^\\([0-9]\\{6\\}\\)$" timestr)
+ (setq hour (substring timestr 0 2))
+ (setq minute (substring timestr 2 4))
+ (setq second (substring timestr 4 6)))
+ ;; 12:32 22:07 3.27 2:35
+ ((string-match "^\\([0-9]\\{1,2\\}\\)[:\.]\\([0-9]\\{1,2\\}\\)$" timestr)
+ (setq hour (match-string 1 timestr))
+ (setq minute (match-string 2 timestr))
+ (setq second "0"))
+ ;; 12:43:54 or 12.43.54
+ ((string-match
+ "^\\([0-9]\\{1,2\\}\\)[:\.]\\([0-9]\\{1,2\\}\\)[:\.]\\([0-9]\\{1,2\\}\\)$" timestr)
+ (setq hour (match-string 1 timestr))
+ (setq minute (match-string 2 timestr))
+ (setq second (match-string 3 timestr))))
+ ;; convert to strings
+ (if second (setq second (string-to-number second)))
+ (if minute (setq minute (string-to-number minute)))
+ (if hour (setq hour (string-to-number hour)))
+ ;; adjust/sanity checks for am/pm
+ (when (and am_pm hour)
+ (cond
+ ((and am_pm (> hour 12))
+ (setq hour nil))
+ ((and (string= am_pm "am") (= 12 hour))
+ (setq hour 0))
+ ((and (string= am_pm "pm") (< hour 12))
+ (setq hour (+ 12 hour)))))
+ ;; return nil unless all seem valid
+ (if (and hour (>= hour 0) (<= hour 24)
+ minute (>= minute 0) (< minute 60)
+ second (>= second 0) (< second 60))
+ (list second minute hour)
+ (list nil nil nil))))
+
+(defun pthai-time-loc2th(timestr &optional reverse)
+"convert an hour:minute:second time string to thai time. use
+\"now\" for current time. if optional reverse, convert thai time
+to local time"
+ (interactive "stime: ")
+ (let* ((c_time (current-time))
+ (time_format (concat "%a %b %e %H:%M:%S %Z %Y"))
+ (thai_offset (* 7 3600)) ;; UTC+7 or ICT or +07 or "Asia/Bangkok"
+ (thai_zone (current-time-zone nil thai_offset))
+ (tlist)
+ (enc_time) (loc_fts) (thai_fts) (o_str))
+ ;; keep reading until valid second/minute/hour
+ (while (not
+ (and (setq tlist (pthai-parse-hour-minute-second timestr reverse))
+ (nth 0 tlist)
+ (nth 1 tlist)
+ (nth 2 tlist)))
+ (setq timestr (read-string "time: ")))
+ (cond
+ ;; thai time
+ (reverse
+ (setq tlist (append tlist (nthcdr 3 (decode-time c_time thai_zone))))
+ (setq enc_time (apply 'encode-time tlist))
+ (setq loc_fts (format-time-string time_format enc_time (current-time-zone)))
+ (setq thai_fts (format-time-string time_format enc_time thai_zone))
+ (setq o_str (concat "thai time = " thai_fts "\n" "time = " loc_fts)))
+ ;; local time
+ (t
+ (setq tlist (append tlist (nthcdr 3 (decode-time c_time))))
+ (setq enc_time (apply 'encode-time tlist))
+ (setq loc_fts (format-time-string time_format enc_time (current-time-zone)))
+ (setq thai_fts (format-time-string time_format enc_time thai_zone))
+ (setq o_str (concat "time = " loc_fts "\n" "thai time = " thai_fts))))
+ (message o_str)))
+
+(defun pthai-time-th2loc(timestr)
+"convert a thai time (hour/minute/second) to local time. use
+\"now\" for current time"
+ (interactive "stime: ")
+ (pthai-time-loc2th timestr t))
+
+(defun pthai-time-now(&optional loctime)
+ "return thai current time. if optional loctime, return current
+local time. message user time if called interactively"
+ (interactive "P")
+ (let ((c_zone (current-time-zone))
+ (t_zone (current-time-zone nil (* 7 3600)))
+ (time_format (concat "%a %b %e %H:%M:%S %Z %Y"))
+ (tstr))
+ (setq tstr
+ (format-time-string time_format (current-time)
+ (if loctime c_zone t_zone)))
+ (if (called-interactively-p 'any)
+ (message tstr))
+ tstr))
+
+(defun pthai-say-time(timestr)
+ "say a time in thai, use time like 1pm, 12:45pm, 17:34. use
+\"now\" for current time. returns time string"
+ (interactive "stime: ")
+ (let (tlist hour minute mhour tmp)
+ (while (not (and (setq tlist (pthai-parse-hour-minute-second timestr))
+ (setq minute (nth 1 tlist))
+ (setq hour (nth 2 tlist))))
+ (setq timestr (read-string "time: ")))
+ (setq mhour (mod hour 6))
+ ;; get hour
+ (cond
+ ;; 12am = เที่ยงคืน or หกทุ่ม or สองยาม
+ ((or (< hour 1) (and (= hour 24) (= minute 0)))
+ (setq tmp (random 4))
+ (cond ((< tmp 2)
+ (setq tlist (list "เที่ยงคืน")))
+ ((= tmp 2)
+ (setq tlist (list "หกทุ่ม")))
+ (t
+ (setq tlist (list "สองยาม")))))
+ ;; 1am 2am 3am 4am 5am = ตี + number[1-5]
+ ((< hour 6)
+ (setq tlist (list "ตี" hour)))
+ ;; 6am = หก โมง เช้า or ตีหก or ย่ำรุ่ง
+ ((< hour 7)
+ (setq tmp (random 4))
+ (cond ((< tmp 2)
+ (setq tlist (list "หก" "โมง" "เช้า")))
+ ((= tmp 2)
+ (setq tlist (list "ตีหก")))
+ (t
+ (setq tlist (list "ย่ำรุ่ง")))))
+ ;; 7am 8am 9am 10am 11am = 1-5 + โมง + เช้า
+ ;; = 7-11 + โมง + (เช้า)
+ ((< hour 12)
+ (if (= 0 (random 2)) ;; 1-5
+ (if (= mhour 1)
+ (setq tlist (list "โมง" "เช้า"))
+ (setq tlist (list mhour "โมง" "เช้า")))
+ (setq tlist (list hour "โมง")) ;; 7-11
+ (if (= 0 (random 2))
+ (setq tlist (append tlist (list "เช้า"))))))
+ ;; 12pm = เที่ยงวัน
+ ((< hour 13)
+ (setq tlist (list "เที่ยงวัน")))
+ ;; 1pm 2pm 3pm 4pm 5pm = บ่าย + number[1-5] + โมง or
+ ;; 4pm = สี่ + โมง + เย็น or
+ ;; 5pm = ห้า + โมง + เย็น
+ ((< hour 18)
+ (cond
+ ((= hour 13) ;; 1pm
+ (setq tlist (list "บ่าย" "โมง")))
+ ((or (= hour 14) (= hour 15))
+ (setq tlist (list "บ่าย" mhour "โมง")))
+ ((= hour 16) ;; 4pm
+ (if (= 0 (random 2))
+ (setq tlist (list "สี่" "โมง" "เย็น"))
+ (setq tlist (list "บ่าย" mhour "โมง"))))
+ (t ;; 5pm
+ (if (= 0 (random 2))
+ (setq tlist (list "ห้า" "โมง" "เย็น"))
+ (setq tlist (list "บ่าย" mhour "โมง"))))))
+ ;; 6pm = หก โมง เย็น or ย่ำค่ำ
+ ((< hour 19)
+ (if (= 0 (random 3))
+ (setq tlist (list "ย่ำค่ำ"))
+ (setq tlist (list "หก" "โมง" "เย็น"))))
+ ;; 7pm 8pm 9pm 10pm 11pm = number[1-5] + ทุ่ม
+ ((< hour 24)
+ (setq tlist (list mhour "ทุ่ม")))
+ (t
+ (error (concat "bad hour " hour))))
+ ;; minutes are appended 10:07am = สิบ โมง (เช้า) + เจ็ด นาที
+ (unless (or (= minute 0) (= hour 24))
+ (setq tlist (append tlist (list minute "นาที"))))
+ ;; convert everything to strings
+ (setq tlist (mapcar (lambda (x) (if (numberp x) (number-to-string x) x))
+ tlist))
+ ;; play audio
+ (dolist (tmp tlist)
+ (pthai-say tmp))
+ (setq tmp
+ (concat (number-to-string hour) ":" (format "%02d" minute)))
+ (message tmp)
+ tmp))
+
+(defun pthai-wordlist-read-thai-word-table(&optional clear)
+ "read 'thai-word-table from 'thai-word into wordlist,
+ optionally clearing existing wordlist"
+ (interactive)
+ (let ((temp_file (make-temp-file (concat "thai-word-table-" user-login-name)))
+ (count 0))
+ (if clear
+ (clrhash pthai-wordlist))
+ (with-temp-file temp_file
+ (insert (string-join (pthai-twt-extract (cdr thai-word-table) "") "\n"))
+ (goto-char (point-min))
+ (while (not (eobp))
+ (setq count (+ 1 count))
+ (forward-line)))
+ (pthai-wordlist-read temp_file)
+ (delete-file temp_file)
+ count))
+
+(defun pthai-parse-word-files(files)
+ "take a list of files and directories, return full paths of
+ files, and non-dotfiles in directories"
+ (let (tmp dfiles rfiles)
+ (while files
+ (setq files (mapcar 'expand-file-name files))
+ (setq files (sort (seq-uniq files) 'string-lessp))
+ (setq tmp (car files))
+ (setq files (cdr files))
+ (when tmp
+ (cond
+ ;; skip dot files
+ ((string-match "^\\." (file-name-nondirectory tmp))
+ nil)
+ ;; save list of files in dirs, skipping dot files and subdirs
+ ((file-directory-p tmp)
+ (setq dfiles (directory-files tmp nil "^\\([^.]\\|\\.[^.]\\|\\.\\..\\)"))
+ (setq dfiles (mapcar (lambda (x) (concat tmp "/" x)) dfiles))
+ (setq dfiles (seq-filter (lambda (x) (not (file-directory-p x))) dfiles))
+ (setq files (append files dfiles)))
+ ;; normal file
+ ((file-exists-p tmp)
+ (setq rfiles (append rfiles (list tmp)))))))
+ (setq rfiles (sort (seq-uniq rfiles) 'string-lessp))))
+
+(defcustom pthai-wordlist-list
+ (list (concat pthai-default-directory "wordlist"))
+ "list of files that contain thai word lists. note
+'thai-word-table from 'thai-word is used as well"
+ :group 'pthai
+ :set (lambda (sym val)
+ (set-default sym val)
+ (if (seq-find #'file-exists-p val)
+ (pthai-wordlist-readall t)))
+ :type '(repeat file))
+
+(defun pthai-wordlist-readall(&optional clear)
+ "read wordlists from pthai-wordlist-list, optionally clearing
+previously loaded words. returns count of unique words"
+ (interactive "P")
+ (let (tmp)
+ (if clear
+ (clrhash pthai-wordlist))
+ (dolist (tmp (pthai-parse-word-files pthai-wordlist-list))
+ (pthai-wordlist-read tmp))
+ (pthai-wordlist-read-thai-word-table)
+ (setq tmp (hash-table-count pthai-wordlist))
+ (if pthai-verbose-wordloads
+ (message "%d unique words in pthai-wordlist" tmp))
+ tmp))
+
+(defun pthai-wordlist-read(fname &optional clear)
+ "read a wordlist file into 'pthai-wordlist hash. return count
+of words in file. assumes file is in utf-8 format and 1 word per
+line at start of line. returns count of words"
+ (interactive "fenter path to wordlist: \n")
+ (let ((coding-system-for-read 'utf-8)
+ (coding-system-for-write 'utf-8)
+ (buffer-file-coding-system 'utf-8)
+ (count 0))
+ (unless (and (> (length fname) 0)
+ (setq fname (expand-file-name fname))
+ (file-exists-p fname)
+ (not (file-directory-p fname)))
+ (error (concat "invalid wordlist \"" fname "\"")))
+ (if clear
+ (clrhash pthai-wordlist))
+ (with-temp-buffer
+ (insert-file-contents fname)
+ (goto-char (point-min))
+ (while (re-search-forward "^\\(\\ct+\\)$" nil t)
+ (setq count (+ 1 count))
+ (puthash (match-string 1) 1 pthai-wordlist)))
+ (if pthai-verbose-wordloads
+ (message "%d words in %s" count fname))
+ count))
+
+(defun pthai-wordlist-add-word(thaiword)
+ "add thai word to pthai-wordlist. return true if word not in
+list, nil if word already in list"
+ (interactive "sthai word to add: ")
+ (cond ((gethash thaiword pthai-wordlist)
+ nil)
+ (t
+ (puthash thaiword 1 pthai-wordlist)
+ t)))
+
+(defun pthai-wordlist-remove-word(thaiword)
+ "remove thai word from pthai-wordlist. return t if word was in
+wordlist, nil otherwise"
+ (interactive "sthai word to remove: ")
+ (cond ((gethash thaiword pthai-wordlist)
+ (remhash thaiword pthai-wordlist)
+ t)
+ (t
+ nil)))
+
+(defun pthai-dictionary-add-word(thaiword def english_class thai_class)
+ "update pthai-dictionary with new entry or merge with existing
+entry. thaiword is a thai string, other args can be nil, string,
+or a list of strings. returns merged definition of word that was
+inserted"
+ (interactive)
+ (let (ilist tmp)
+ ;; convert to lists
+ (setq ilist (mapcar (lambda(x) (if (listp x) x (list x)))
+ (list def english_class thai_class)))
+ ;; merge if entry already exists
+ (if (setq tmp (gethash thaiword pthai-dictionary))
+ (setq ilist (seq-mapn #'append ilist tmp)))
+ ;; delete duplicates, nil
+ (setq ilist
+ (mapcar (lambda(x) (sort (delq nil (delete-dups x)) 'string-lessp))
+ ilist))
+ (puthash thaiword ilist pthai-dictionary)))
+
+(defun pthai-dictionary-remove-word(thaiword)
+ "remove word from dictionary, return t if word in dictionary,
+ nil otherwise"
+ (interactive "sthai word to remove: ")
+ (cond ((gethash thaiword pthai-dictionary)
+ (remhash thaiword pthai-dictionary)
+ t)
+ (t
+ nil)))
+
+(defun pthai-parse-classifiers(str)
+ "helper function to parse definition line and return list like
+((a b c) (ก ด ห))"
+ (let (eclass tclass tmp pos)
+ (setq pos 0)
+ (while (string-match "\\({.*?}\\)" str pos)
+ (setq tmp (match-string 1 str))
+ (setq pos (match-end 1))
+ (if (string-match-p "\\ct+" tmp)
+ (setq tclass (append (split-string tmp " *[{} \t]+ *" t) tclass))
+ (setq eclass (append (split-string tmp " *[{},\t]+ *" t) eclass))))
+ (list eclass tclass)))
+
+(defun pthai-dictionary-read(fname &optional clear)
+ "read a dictionary file, adding words to pthai-dictionary hash.
+if optional clear_dict arg, reset dictionary first. returns
+number of words read in file"
+ (interactive "fpath to dictionary name: \n")
+ (let ((coding-system-for-read 'utf-8)
+ (coding-system-for-write 'utf-8)
+ (buffer-file-coding-system 'utf-8)
+ (count 0)
+ (tword)
+ (def_line)
+ (class_info))
+ (unless (and (> (length fname) 0)
+ (setq fname (expand-file-name fname))
+ (file-exists-p fname)
+ (not (file-directory-p fname)))
+ (error (concat "invalid dictionary \"" fname "\"")))
+ (if clear
+ (clrhash pthai-dictionary))
+ (with-temp-buffer
+ (insert-file-contents fname)
+ (goto-char (point-min)) ;; remove # comments
+ (while (re-search-forward "#.*$" nil t)
+ (replace-match ""))
+ (goto-char (point-min)) ;; remove stuff in parentheses
+ (while (re-search-forward "[ \t]*(.*?)[ \t]*" nil t)
+ (replace-match " "))
+ (goto-char (point-min)) ;; consolidate spaces
+ (while (re-search-forward "[ \t]+" nil t)
+ (replace-match " "))
+ (goto-char (point-min)) ;; remove double quote marks
+ (while (re-search-forward "\"" nil t)
+ (replace-match ""))
+ ;; convert all to lowercase
+ (downcase-region (point-min) (point-max))
+
+ (goto-char (point-min))
+ (while (re-search-forward "^\\(\\ct+\\) \\(.*?\\) *$" nil t)
+ (when (and (setq tword (match-string 1))
+ (setq def_line (match-string 2))
+ (> (length def_line) 0))
+ (cond
+ ;; parse classifier info and strip from definition
+ ((string-match-p "[{}]" def_line)
+ (setq class_info (pthai-parse-classifiers def_line))
+ (setq def_line (replace-regexp-in-string " ?{.*?} ?" "" def_line)))
+ (t
+ (setq class_info nil)))
+ ;; save thai word, definition, classifier info
+ (pthai-dictionary-add-word tword
+ (split-string def_line " ?, ?" t)
+ (nth 0 class_info)
+ (nth 1 class_info))
+ (setq count (+ 1 count)))))
+ (if pthai-verbose-wordloads
+ (message "%d words in %s" count fname))
+ count))
+
+(defcustom pthai-dictionary-list
+ (list (concat pthai-default-directory "dictionary"))
+ "list of files and directories that contain thai dictionary files"
+ :group 'pthai
+ :set (lambda (sym val)
+ (set-default sym val)
+ (let (tmp)
+ (dolist (tmp (pthai-parse-word-files val))
+ (if (file-exists-p tmp)
+ (pthai-dictionary-read tmp)))
+ (unless pthai-bootstrap
+ (when pthai-verbose-wordloads
+ (setq tmp (hash-table-count pthai-dictionary))
+ (message "%d unique words in pthai-dictionary" tmp)))))
+ :type '(repeat file))
+
+(defun pthai-dictionary-readall(&optional clear)
+ "read dictionary files defined in pthai-dictionary-list. if
+optional clear, clear existing entries. returns total number of
+words in dictionary"
+ (interactive "P")
+ (let ((files (pthai-parse-word-files pthai-dictionary-list))
+ (tmp))
+ (if clear
+ (clrhash pthai-dictionary))
+ (dolist (tmp files)
+ (pthai-dictionary-read tmp))
+ (when pthai-verbose-wordloads
+ (setq tmp (hash-table-count pthai-dictionary))
+ (message "%d unique words in pthai-dictionary" tmp))
+ tmp))
+
+(defun pthai-reverse-lookup(word ltype)
+ "helper function for reverse look ups:
+ word ltype returns (list of ... or nil if not found) example
+ ==== ===== ================ =======
+ english word 'def thai definitions dog => (สุนัข หมา)
+ english classifier 'eclass thai words that are classified by english word laws => (กฎ)
+ thai classifier 'tclass thai words that are classified by thai word วงกบ => (กรอบ วง)"
+ (let* (tmp vals rlist
+ (ltypes (list 'def 'eclass 'tclass))
+ (n (seq-position ltypes ltype)))
+ (unless n
+ (error "invalid ltype: %s" ltype))
+ (dolist (tmp (hash-table-keys pthai-dictionary))
+ (setq vals (nth n (gethash tmp pthai-dictionary)))
+ (if (member word vals)
+ (setq rlist (append rlist (list tmp)))))
+ (setq rlist (sort (delete-dups rlist) 'string-lessp))))
+
+(defun pthai-dictionary-p(thaiword)
+ "return non-nil if thaiword in dictionary"
+ (interactive)
+ (if (gethash thaiword pthai-dictionary)
+ t nil))
+
+(defun pthai-wordlist-p(thaiword)
+ "return non-nil if thaiword in word list"
+ (interactive)
+ (if (gethash thaiword pthai-wordlist)
+ t nil))
+
+(defun pthai-any-p(thaiword)
+ "return non-nil if thaiword in dictionary or word list"
+ (interactive)
+ (if (or (gethash thaiword pthai-wordlist)
+ (gethash thaiword pthai-dictionary))
+ t nil))
+
+(defun pthai-classifier-p(thaiword)
+ "return non-nil if thaiword is a classifier in dictionary"
+ (interactive "sthai word: ")
+ (let (tmp)
+ (if (and (setq tmp (gethash thaiword pthai-dictionary))
+ (or (nth 1 tmp) (nth 2 tmp)))
+ t nil)))
+
+(defun pthai-lookup(word)
+ "look up definition of thai or english word. returns list of
+english or thai words. if called interactively, message user
+definition"
+ (interactive "sword: \n")
+ (let (wlist)
+ (setq word (downcase word))
+ (if (string-match-p "\\ct+" word)
+ (setq wlist (nth 0 (gethash word pthai-dictionary)))
+ (setq wlist (pthai-reverse-lookup word 'def)))
+ (when (called-interactively-p 'any)
+ (if wlist
+ (message "%s = %s" word (string-join wlist ", "))
+ (message "%s: not in dictionary" word)))
+ wlist))
+
+(defun pthai-lookup-and-insert(word)
+ "look up a word (english or thai) and insert meaning at point.
+if more than one definition found, select from choices found"
+ (interactive "sword: ")
+ (let (tlist tlen)
+ (save-excursion
+ (setq word (downcase word)
+ tlist (pthai-lookup word)
+ tlen (length tlist))
+ (cond
+ ((= 0 tlen)
+ (message "%s not found in dictionary" word))
+ ((= 1 tlen)
+ (insert (car tlist)))
+ (t
+ (insert (ido-completing-read "meanings: " tlist)))))))
+
+(defun pthai-lookup-classifier-detail(word)
+ "return detailed classifier information for word. see
+'pthai-lookup-classifier"
+ (interactive "sword to lookup: \n")
+ (pthai-lookup-classifier word t))
+
+(defun pthai-lookup-classifier(word &optional detail)
+ "return classifier information for word:
+word return value (list of ...)
+==== ============
+thai word, classifier '( (defintions) '(eng classifiers) '(thai classifiers) )
+thai word, not classifer '( thai classifiers for thai word )
+english word '( thai classifiers for english word )
+
+ message user information if called interactively. if optional
+detail arg given, print out complete definitions for all
+classifier words (for 2nd and 3rd cases, 1st case will by default
+when interactive"
+ (interactive "sword to lookup: \n")
+ (let (tmp clist ret)
+ (setq word (downcase word))
+ (cond
+ ;; thai word that is a classifier, e.g. ตัว
+ ((pthai-classifier-p word)
+ (setq ret (gethash word pthai-dictionary))
+ (setq clist (list word)))
+ ;; thai word that has classifiers, e.g. แมว
+ ((string-match-p "\\ct+" word)
+ (setq ret (pthai-reverse-lookup word 'tclass))
+ (setq clist ret))
+ ;; english word, e.g. dog
+ ((string-match-p "^\\Ct+$" word)
+ ;; look up classifiers directly and reverse lookup thai defs and
+ ;; the classifiers for them
+ (setq clist (pthai-reverse-lookup word 'eclass))
+ (dolist (tmp (pthai-lookup word))
+ (setq clist (append (pthai-reverse-lookup tmp 'tclass) clist)))
+ (setq clist (seq-filter (lambda (x) x) clist))
+ (setq clist (sort (delete-dups clist) 'string-lessp))
+ (setq ret clist))
+ (t
+ (error "error parsing classifier info")))
+
+ (when (or detail (called-interactively-p 'any))
+ (cond
+ ;; word is a classifier, print out info
+ ((pthai-classifier-p word)
+ (message (pthai-classifier-info word)))
+ ;; found classifiers for word, print list of classifiers, or details
+ ;; for each classifier found
+ (clist
+ (if detail
+ (message (mapconcat 'pthai-classifier-info clist "\n"))
+ (message "%s classified by: %s" word (string-join clist ", "))))
+ ;; no classifier info found
+ (t
+ (message (pthai-classifier-info word)))))
+ ret))
+
+(defun pthai-classifier-info(word)
+ "return classifer information as a string"
+ (let (cinfo cistr)
+ (if (not (pthai-classifier-p word))
+ (setq cistr (concat "no classifier info found for " word))
+
+ (setq cinfo (gethash word pthai-dictionary))
+ (if (nth 0 cinfo)
+ (setq cistr (concat word " = " (string-join (nth 0 cinfo) ", ") "\n"))
+ (setq cistr (concat word " is a classifier\n")))
+ (if (nth 1 cinfo)
+ (setq cistr (concat cistr "classifier for: "
+ (string-join (nth 1 cinfo) ", ") "\n")))
+ (if (nth 2 cinfo)
+ (setq cistr (concat cistr "classifier for: "
+ (string-join (nth 2 cinfo) " ") "\n")))
+ (setq cistr (string-trim-right cistr)))
+ cistr))
+
+(defun pthai-temperature-cel2fah(celsius)
+ "convert celsius to fahrenheit. message user if called
+interactively"
+ (interactive "ncelsius: ")
+ (let (fah)
+ (setq fah (+ (* celsius (/ 9.0 5.0)) 32.0))
+ (if (called-interactively-p 'any)
+ (message "%3.1f C = %3.1f F" celsius fah))
+ fah))
+
+(defun pthai-temperature-fah2cel(fahrenheit)
+ "convert fahrenheit to celsius. message user if called
+interactively"
+ (interactive "nfahrenheit: ")
+ (let (cel)
+ (setq cel (* (- fahrenheit 32.0) (/ 5.0 9.0)))
+ (if (called-interactively-p 'any)
+ (message "%3.1f F = %3.1f C" fahrenheit cel))
+ cel))
+
+(defun pthai-year-greg2bud(&optional year)
+ "convert gregorian year (A.D.) to buddhist year (B.E.). message
+user if called interactively"
+ (interactive)
+ (let ((bud_year)
+ (cur_year (string-to-number (format-time-string "%Y"))))
+ (unless (integerp year)
+ (setq year (round (read-number "year: " cur_year))))
+ (setq bud_year (+ 543 year))
+ (when (called-interactively-p 'any)
+ (if (>= year 0)
+ (message "%d A.D. = B.E. %d" year bud_year)
+ (message "%d B.C. = B.E. %d" (* -1 year) bud_year)))
+ bud_year))
+
+(defun pthai-year-bud2greg(&optional year)
+ "convert buddhist year (B.E.) to gregorian year (A.D./B.C.).
+message user if called interactively"
+ (interactive)
+ (let ((greg_year)
+ (cur_year (+ 543
+ (string-to-number (format-time-string "%Y")))))
+ (unless (integerp year)
+ (setq year (round (read-number "year: " cur_year))))
+ (setq greg_year (- year 543))
+ (when (called-interactively-p 'any)
+ (if (>= greg_year 0)
+ (message "B.E. %d = %d A.D." year greg_year)
+ (message "B.E. %d = %d B.C. " year (* -1 greg_year))))
+ greg_year))
+
+;; initialize default wordlist and dictionary
+(unless pthai-bootstrap
+ (pthai-twt-update "ST_SHARE_DIR/words" t)
+ (pthai-dictionary-read "ST_SHARE_DIR/sampledict.txt"))
+
+(provide 'pthai)
Index: pkgsrc/textproc/split-thai/files/sampledict.txt
diff -u /dev/null pkgsrc/textproc/split-thai/files/sampledict.txt:1.1
--- /dev/null Thu Mar 18 17:53:42 2021
+++ pkgsrc/textproc/split-thai/files/sampledict.txt Thu Mar 18 17:53:42 2021
@@ -0,0 +1,45 @@
+# sample dictionary file for pthai.el emacs module
+#
+# format of vocabulary files:
+# thai_word[whitespace]eng1,eng2,eng3, {thaiclass1 thaiclass2}, {engclass1, engclass1, engclass1}
+# definitions can be arranged in any order
+#
+# anything after '#' is ignored
+# anything in () inclusively is ignored
+# empty lines/whitespace/no-thai-character-in-line are ignored
+#
+# thai words with multiple definition lines will be merged
+#
+# anything in definition of word within {} is classifier info,
+# either the actual thai word(s) used with classifier, or english
+# definitions of what the word classifies, note english classifiers
+# need to be separated by commas, thai classifiers by spaces.
+#
+# see http://thai-language.com/ref/classifier-list for a large list of classifiers
+#
+
+# thai word with one definition
+แมว cat
+
+# thai words that similar meanings, with some comments
+หมา dog
+สุนัข dog (more polite)
+ดิฉัน I (female)
+ผม I (male)
+
+# thai word with multiple definitions
+แม่น accurately, exactly, precisely
+
+# thai word that includes classifier information
+# กรง word that has a definition and is also a classifier
+# cage one of definitions of กรง
+# coop one of definitions of กรง
+# {cage} means กรง is a classifier for "cage"
+# {กรง} means กรง is a classifier for "กรง"
+กรง cage, coop, {cage}, {กรง}
+
+# classifier-only defintion of thai word
+# กระทง classifier
+# statements, points, plots of land กระทง is a classifier for these english words
+# กระทง ข้อหา กระทงความ กระทง is a classifer for these thai words
+กระทง {statements, points, plots of land}, {กระทง ข้อหา กระทงความ}
Index: pkgsrc/textproc/split-thai/files/words.abm
diff -u /dev/null pkgsrc/textproc/split-thai/files/words.abm:1.1
--- /dev/null Thu Mar 18 17:53:42 2021
+++ pkgsrc/textproc/split-thai/files/words.abm Thu Mar 18 17:53:42 2021
@@ -0,0 +1,2 @@
+[0x002d,0x002e]
+[0x0e01,0x0e5b]
Home |
Main Index |
Thread Index |
Old Index