pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/textproc/split-thai



Module Name:    pkgsrc
Committed By:   scole
Date:           Tue Aug 18 17:10:28 UTC 2020

Modified Files:
        pkgsrc/textproc/split-thai: Makefile
        pkgsrc/textproc/split-thai/files: thai-utility.el

Log Message:
Update to 0.5
- clean up some comments and simplify lisp code a bit


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/files/thai-utility.el

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.4 pkgsrc/textproc/split-thai/Makefile:1.5
--- pkgsrc/textproc/split-thai/Makefile:1.4     Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/Makefile Tue Aug 18 17:10:28 2020
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.4 2020/08/17 17:43:15 scole Exp $
+# $NetBSD: Makefile,v 1.5 2020/08/18 17:10:28 scole Exp $
 
-PKGNAME=       split-thai-0.4
+PKGNAME=       split-thai-0.5
 CATEGORIES=    textproc
 MAINTAINER=    pkgsrc-users%NetBSD.org@localhost
 COMMENT=       Utilities to split UTF-8 Thai text into words

Index: pkgsrc/textproc/split-thai/files/thai-utility.el
diff -u pkgsrc/textproc/split-thai/files/thai-utility.el:1.4 pkgsrc/textproc/split-thai/files/thai-utility.el:1.5
--- pkgsrc/textproc/split-thai/files/thai-utility.el:1.4        Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/thai-utility.el    Tue Aug 18 17:10:28 2020
@@ -98,13 +98,13 @@ count 'thai-word-table words"
     (message "%d words in nested alist" count)
     count))
 
-;; 'thai-tis620 is default for emacs <= 28
 (defun thai-update-word-table-utf8 (file &optional append)
   "Update Thai word table by replacing the current word list with
-FILE, which is in utf-8.  If called with a prefix argument, FILE
-is appended instead to the current word list.  Does the same as
+FILE, which is utf-8.  If called with a prefix argument, FILE is
+appended instead to the current word list.  Does the same as
 'thai-update-word-table, except that function expects
-'thai-tis620 encoding"
+'thai-tis620 encoding which appears to be the default format for
+at least emacs version <= 28"
   (interactive "FThai word table file: \nP")
   (let* ((coding-system-for-read 'utf-8)
         (coding-system-for-write 'utf-8)
@@ -146,23 +146,19 @@ dictionary words."
                  "\"Nested alist of Thai words.\")" ))
     (with-temp-buffer
       (insert-file-contents dictfile)
+      ;; quote each thai word
       (goto-char (point-min))
+      (while (search-forward-regexp "\\ct+" nil t)
+       (replace-match (concat "\"" (match-string 0) "\"")))
       (setq line_count (count-lines (point-min) (point-max)))
-      ;; quote each thai word
-      (while (not (eobp))
-       (beginning-of-line)
-       (insert "\"")
-       (end-of-line)
-       (insert "\"")
-       (forward-line 1))
-
+      ;; insert lisp code for defvar 
       (goto-char (point-min))
       (dolist (elem header)
        (insert elem "\n"))
-
       (goto-char (point-max))
       (dolist (elem footer)
        (insert elem "\n"))
+      ;; indent for lisp and save
       (lisp-mode)
       (indent-region (point-min) (point-max))
       (write-region nil nil lispfile))
@@ -170,15 +166,14 @@ dictionary words."
 
 (defun split-thai-line()
   "Break Thai words from point to end of line by inserting a
-separator string at word boundaries. (wrapper for 'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
   (interactive)
   (thai-break-words " " (line-end-position))
   (split-thai-numbers (point) (line-end-position)))
 
 (defun split-thai()
   "Break Thai words from point to end of buffer by inserting a
-separator string at word boundaries. (wrapper for
-'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
   (interactive)
   (thai-break-words " " (point-max))
   (split-thai-numbers (point) (point-max)))
@@ -188,9 +183,7 @@ separator string at word boundaries. (wr
 'thai-break-words doesn't always split numbers properly. this may
 improve tokenization somewhat."
   ;; xxx this really should be fixed in 'thai-word lib
-  (let* (
-        ;; "\\([๐๑๒๓๔๕๖๗๘๙0123456789]+\\)"
-        (num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
+  (let* ((num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
         (nonnum_rexp "\\([\u0e00-\u0e4f\u0e5a-\u0e7f]\\)") ;; "non-numbers"
         (trailing_rexp (concat num_rexp nonnum_rexp))
         (leading_rexp (concat nonnum_rexp num_rexp)))



Home | Main Index | Thread Index | Old Index