Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to 0.5



details:   https://anonhg.NetBSD.org/pkgsrc/rev/1317708f5891
branches:  trunk
changeset: 437206:1317708f5891
user:      scole <scole%pkgsrc.org@localhost>
date:      Tue Aug 18 17:10:28 2020 +0000

description:
Update to 0.5
- clean up some comments and simplify lisp code a bit

diffstat:

 textproc/split-thai/Makefile              |   4 ++--
 textproc/split-thai/files/thai-utility.el |  31 ++++++++++++-------------------
 2 files changed, 14 insertions(+), 21 deletions(-)

diffs (91 lines):

diff -r 35614c88b960 -r 1317708f5891 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile      Tue Aug 18 14:28:27 2020 +0000
+++ b/textproc/split-thai/Makefile      Tue Aug 18 17:10:28 2020 +0000
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.4 2020/08/17 17:43:15 scole Exp $
+# $NetBSD: Makefile,v 1.5 2020/08/18 17:10:28 scole Exp $
 
-PKGNAME=       split-thai-0.4
+PKGNAME=       split-thai-0.5
 CATEGORIES=    textproc
 MAINTAINER=    pkgsrc-users%NetBSD.org@localhost
 COMMENT=       Utilities to split UTF-8 Thai text into words
diff -r 35614c88b960 -r 1317708f5891 textproc/split-thai/files/thai-utility.el
--- a/textproc/split-thai/files/thai-utility.el Tue Aug 18 14:28:27 2020 +0000
+++ b/textproc/split-thai/files/thai-utility.el Tue Aug 18 17:10:28 2020 +0000
@@ -98,13 +98,13 @@
     (message "%d words in nested alist" count)
     count))
 
-;; 'thai-tis620 is default for emacs <= 28
 (defun thai-update-word-table-utf8 (file &optional append)
   "Update Thai word table by replacing the current word list with
-FILE, which is in utf-8.  If called with a prefix argument, FILE
-is appended instead to the current word list.  Does the same as
+FILE, which is utf-8.  If called with a prefix argument, FILE is
+appended instead to the current word list.  Does the same as
 'thai-update-word-table, except that function expects
-'thai-tis620 encoding"
+'thai-tis620 encoding which appears to be the default format for
+at least emacs version <= 28"
   (interactive "FThai word table file: \nP")
   (let* ((coding-system-for-read 'utf-8)
         (coding-system-for-write 'utf-8)
@@ -146,23 +146,19 @@
                  "\"Nested alist of Thai words.\")" ))
     (with-temp-buffer
       (insert-file-contents dictfile)
+      ;; quote each thai word
       (goto-char (point-min))
+      (while (search-forward-regexp "\\ct+" nil t)
+       (replace-match (concat "\"" (match-string 0) "\"")))
       (setq line_count (count-lines (point-min) (point-max)))
-      ;; quote each thai word
-      (while (not (eobp))
-       (beginning-of-line)
-       (insert "\"")
-       (end-of-line)
-       (insert "\"")
-       (forward-line 1))
-
+      ;; insert lisp code for defvar 
       (goto-char (point-min))
       (dolist (elem header)
        (insert elem "\n"))
-
       (goto-char (point-max))
       (dolist (elem footer)
        (insert elem "\n"))
+      ;; indent for lisp and save
       (lisp-mode)
       (indent-region (point-min) (point-max))
       (write-region nil nil lispfile))
@@ -170,15 +166,14 @@
 
 (defun split-thai-line()
   "Break Thai words from point to end of line by inserting a
-separator string at word boundaries. (wrapper for 'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
   (interactive)
   (thai-break-words " " (line-end-position))
   (split-thai-numbers (point) (line-end-position)))
 
 (defun split-thai()
   "Break Thai words from point to end of buffer by inserting a
-separator string at word boundaries. (wrapper for
-'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
   (interactive)
   (thai-break-words " " (point-max))
   (split-thai-numbers (point) (point-max)))
@@ -188,9 +183,7 @@
 'thai-break-words doesn't always split numbers properly. this may
 improve tokenization somewhat."
   ;; xxx this really should be fixed in 'thai-word lib
-  (let* (
-        ;; "\\([๐๑๒๓๔๕๖๗๘๙0123456789]+\\)"
-        (num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
+  (let* ((num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
         (nonnum_rexp "\\([\u0e00-\u0e4f\u0e5a-\u0e7f]\\)") ;; "non-numbers"
         (trailing_rexp (concat num_rexp nonnum_rexp))
         (leading_rexp (concat nonnum_rexp num_rexp)))


Home | Main Index | Thread Index | Old Index