pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc/split-thai
Module Name: pkgsrc
Committed By: scole
Date: Sat Aug 15 16:52:29 UTC 2020
Modified Files:
pkgsrc/textproc/split-thai: Makefile
pkgsrc/textproc/split-thai/files: st-emacs thai-utility.el
Log Message:
Update to version 0.3
all changes for emacs splitter:
- load custom dictionary first because 'thai-word-table is a defvar
- add count function and return word counts for a few funcs
- add lisp wrappers functions split-thai, split-thai-line which
can split thai text in an emacs buffer using 'thai-break-words
To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.2 -r1.3 pkgsrc/textproc/split-thai/files/st-emacs \
pkgsrc/textproc/split-thai/files/thai-utility.el
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.2 pkgsrc/textproc/split-thai/Makefile:1.3
--- pkgsrc/textproc/split-thai/Makefile:1.2 Fri Aug 14 17:31:34 2020
+++ pkgsrc/textproc/split-thai/Makefile Sat Aug 15 16:52:28 2020
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.2 2020/08/14 17:31:34 scole Exp $
+# $NetBSD: Makefile,v 1.3 2020/08/15 16:52:28 scole Exp $
-PKGNAME= split-thai-0.2
+PKGNAME= split-thai-0.3
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
Index: pkgsrc/textproc/split-thai/files/st-emacs
diff -u pkgsrc/textproc/split-thai/files/st-emacs:1.2 pkgsrc/textproc/split-thai/files/st-emacs:1.3
--- pkgsrc/textproc/split-thai/files/st-emacs:1.2 Fri Aug 14 17:31:34 2020
+++ pkgsrc/textproc/split-thai/files/st-emacs Sat Aug 15 16:52:29 2020
@@ -8,11 +8,10 @@
;;
;;(toggle-debug-on-error) ;; debug
-(require 'thai-word)
-;; load custom dictionary
-(load "ST_SHARE_DIR/thai-utility" nil t)
+;; load custom dictionary first, 'thai-word-table is defvar
(load "ST_SHARE_DIR/thai-dict" nil t)
+(load "ST_SHARE_DIR/thai-utility" nil t)
;; split a thai line by spaces, return new line
(defun process-thai-line(line)
Index: pkgsrc/textproc/split-thai/files/thai-utility.el
diff -u pkgsrc/textproc/split-thai/files/thai-utility.el:1.2 pkgsrc/textproc/split-thai/files/thai-utility.el:1.3
--- pkgsrc/textproc/split-thai/files/thai-utility.el:1.2 Fri Aug 14 17:31:34 2020
+++ pkgsrc/textproc/split-thai/files/thai-utility.el Sat Aug 15 16:52:29 2020
@@ -44,10 +44,12 @@ uses recursion"
(defun thai-word-table-save(filename &optional alist)
"save thai words extracted from a nested-alist table to
filename in utf8 format, one word per line. default is to save
-'thai-word-table if no alist argument given."
- (interactive)
+'thai-word-table if no alist argument given. Returns number of
+dictionary words."
+ (interactive "FName of file to save to: \nP")
(let ((thaiwords)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
@@ -72,8 +74,29 @@ filename in utf8 format, one word per li
(insert elem "\n")))
(sort-lines nil (point-min) (point-max))
+ (setq line_count (count-lines (point-min) (point-max)))
(write-region nil nil filename)
- (buffer-string))))
+ line_count)))
+
+(defun count-words-nested-alist (&optional alist)
+ "Count number of words in a nested alist. if no arg given,
+count 'thai-word-table words"
+ (interactive)
+ (let ((count 0)
+ (elem)
+ (thaiwords))
+ ;; default list or not
+ (setq alist (or alist thai-word-table))
+ (or (nested-alist-p alist)
+ (error "Invalid argument %s" alist))
+ ;; remove 'thai-words from 'thai-word-table
+ (setq alist (cdr alist))
+ (while (setq elem (car alist))
+ (setq alist (cdr alist))
+ (setq thaiwords (extract-thai-na elem ""))
+ (setq count (+ count (length thaiwords))))
+ (message "%d words in nested alist" count)
+ count))
;; 'thai-tis620 is default for emacs <= 28
(defun thai-update-word-table-utf8 (file &optional append)
@@ -99,25 +122,32 @@ is appended instead to the current word
(defun thai-word-table-save-defvar(dictfile lispfile)
"read a utf8 thai dictionary file and save to a lisp file
suitable for initializing the 'thai-word-table as a \"defvar\".
-Overwrites the lisp file if it exists."
+Overwrites the lisp file if it exists. Returns count of
+dictionary words."
(interactive)
(let ((header)
(footer)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
- (setq header (list "(defvar thai-word-table"
- "(let ((table (list 'thai-words)))"
- "(dolist (elt"
- "'(" ))
- (setq footer (list "))"
- "(set-nested-alist elt 1 table))"
- "table)"
- "\"Nested alist of Thai words.\")" ))
+ (setq header (list
+ ";; file auto-generated from thai-word-table-save-defvar"
+ ""
+ "(defvar thai-word-table"
+ "(let ((table (list 'thai-words)))"
+ "(dolist (elt"
+ "'(" ))
+ (setq footer (list
+ "))"
+ "(set-nested-alist elt 1 table))"
+ "table)"
+ "\"Nested alist of Thai words.\")" ))
(with-temp-buffer
(insert-file-contents dictfile)
(goto-char (point-min))
+ (setq line_count (count-lines (point-min) (point-max)))
;; quote each thai word
(while (not (eobp))
(beginning-of-line)
@@ -135,4 +165,18 @@ Overwrites the lisp file if it exists."
(insert elem "\n"))
(lisp-mode)
(indent-region (point-min) (point-max))
- (write-region nil nil lispfile))))
+ (write-region nil nil lispfile))
+ line_count))
+
+(defun split-thai-line(&optional separator)
+ "Break Thai words from point to end of line by inserting a
+separator string at word boundaries. (wrapper for 'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (line-end-position)))
+
+(defun split-thai(&optional separator)
+ "Break Thai words from point to end of buffer by inserting a
+separator string at word boundaries. (wrapper for
+'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (point-max)))
Home |
Main Index |
Thread Index |
Old Index