Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to version 0.3
details:   https://anonhg.NetBSD.org/pkgsrc/rev/2602c57302c0
branches:  trunk
changeset: 437064:2602c57302c0
user:      scole <scole%pkgsrc.org@localhost>
date:      Sat Aug 15 16:52:28 2020 +0000
description:
Update to version 0.3
all changes for emacs splitter:
- load custom dictionary first because 'thai-word-table is a defvar
- add count function and return word counts for a few funcs
- add lisp wrappers functions split-thai, split-thai-line which
  can split thai text in an emacs buffer using 'thai-break-words
diffstat:
 textproc/split-thai/Makefile              |   4 +-
 textproc/split-thai/files/st-emacs        |   5 +-
 textproc/split-thai/files/thai-utility.el |  70 +++++++++++++++++++++++++-----
 3 files changed, 61 insertions(+), 18 deletions(-)
diffs (140 lines):
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile      Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/Makefile      Sat Aug 15 16:52:28 2020 +0000
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.2 2020/08/14 17:31:34 scole Exp $
+# $NetBSD: Makefile,v 1.3 2020/08/15 16:52:28 scole Exp $
 
-PKGNAME=       split-thai-0.2
+PKGNAME=       split-thai-0.3
 CATEGORIES=    textproc
 MAINTAINER=    pkgsrc-users%NetBSD.org@localhost
 COMMENT=       Utilities to split UTF-8 Thai text into words
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/files/st-emacs
--- a/textproc/split-thai/files/st-emacs        Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/files/st-emacs        Sat Aug 15 16:52:28 2020 +0000
@@ -8,11 +8,10 @@
 ;;
 
 ;;(toggle-debug-on-error) ;; debug
-(require 'thai-word)
 
-;; load custom dictionary
+;; load custom dictionary first, 'thai-word-table is defvar
+(load "ST_SHARE_DIR/thai-dict" nil t)
 (load "ST_SHARE_DIR/thai-utility" nil t)
-(load "ST_SHARE_DIR/thai-dict" nil t)
 
 ;; split a thai line by spaces, return new line
 (defun process-thai-line(line)
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/files/thai-utility.el
--- a/textproc/split-thai/files/thai-utility.el Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/files/thai-utility.el Sat Aug 15 16:52:28 2020 +0000
@@ -44,10 +44,12 @@
 (defun thai-word-table-save(filename &optional alist)
   "save thai words extracted from a nested-alist table to
 filename in utf8 format, one word per line.  default is to save
-'thai-word-table if no alist argument given."
-  (interactive)
+'thai-word-table if no alist argument given.  Returns number of
+dictionary words."
+  (interactive "FName of file to save to: \nP")
   (let ((thaiwords)
        (elem)
+       (line_count)
        (coding-system-for-read 'utf-8)
        (coding-system-for-write 'utf-8)
        (buffer-file-coding-system 'utf-8))
@@ -72,8 +74,29 @@
          (insert elem "\n")))
 
       (sort-lines nil (point-min) (point-max))
+      (setq line_count (count-lines (point-min) (point-max)))
       (write-region nil nil filename)
-      (buffer-string))))
+      line_count)))
+
+(defun count-words-nested-alist (&optional alist)
+  "Count number of words in a nested alist. if no arg given,
+count 'thai-word-table words"
+  (interactive)
+  (let ((count 0)
+       (elem)
+       (thaiwords))
+    ;; default list or not
+    (setq alist (or alist thai-word-table))
+    (or (nested-alist-p alist)
+       (error "Invalid argument %s" alist))
+    ;; remove 'thai-words from 'thai-word-table
+    (setq alist (cdr alist))
+    (while (setq elem (car alist))
+      (setq alist (cdr alist))
+      (setq thaiwords (extract-thai-na elem ""))
+      (setq count (+ count (length thaiwords))))
+    (message "%d words in nested alist" count)
+    count))
 
 ;; 'thai-tis620 is default for emacs <= 28
 (defun thai-update-word-table-utf8 (file &optional append)
@@ -99,25 +122,32 @@
 (defun thai-word-table-save-defvar(dictfile lispfile)
   "read a utf8 thai dictionary file and save to a lisp file
 suitable for initializing the 'thai-word-table as a \"defvar\".
-Overwrites the lisp file if it exists."
+Overwrites the lisp file if it exists.  Returns count of
+dictionary words."
   (interactive)
   (let ((header)
        (footer)
        (elem)
+       (line_count)
        (coding-system-for-read 'utf-8)
        (coding-system-for-write 'utf-8)
        (buffer-file-coding-system 'utf-8))
-    (setq header (list "(defvar thai-word-table"
-                      "(let ((table (list 'thai-words)))"
-                      "(dolist (elt"
-                      "'(" ))
-    (setq footer (list "))"
-                      "(set-nested-alist elt 1 table))"
-                      "table)"
-                      "\"Nested alist of Thai words.\")" ))
+    (setq header (list
+                 ";; file auto-generated from thai-word-table-save-defvar"
+                 ""
+                 "(defvar thai-word-table"
+                 "(let ((table (list 'thai-words)))"
+                 "(dolist (elt"
+                 "'(" ))
+    (setq footer (list
+                 "))"
+                 "(set-nested-alist elt 1 table))"
+                 "table)"
+                 "\"Nested alist of Thai words.\")" ))
     (with-temp-buffer
       (insert-file-contents dictfile)
       (goto-char (point-min))
+      (setq line_count (count-lines (point-min) (point-max)))
       ;; quote each thai word
       (while (not (eobp))
        (beginning-of-line)
@@ -135,4 +165,18 @@
        (insert elem "\n"))
       (lisp-mode)
       (indent-region (point-min) (point-max))
-      (write-region nil nil lispfile))))
+      (write-region nil nil lispfile))
+    line_count))
+
+(defun split-thai-line(&optional separator)
+  "Break Thai words from point to end of line by inserting a
+separator string at word boundaries. (wrapper for 'thai-break-words)"
+  (interactive)
+    (thai-break-words (or separator " ") (line-end-position)))
+
+(defun split-thai(&optional separator)
+  "Break Thai words from point to end of buffer by inserting a
+separator string at word boundaries. (wrapper for
+'thai-break-words)"
+  (interactive)
+    (thai-break-words (or separator " ") (point-max)))
Home |
Main Index |
Thread Index |
Old Index