pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc/split-thai
Module Name: pkgsrc
Committed By: scole
Date: Tue Aug 18 17:10:28 UTC 2020
Modified Files:
pkgsrc/textproc/split-thai: Makefile
pkgsrc/textproc/split-thai/files: thai-utility.el
Log Message:
Update to 0.5
- clean up some comments and simplify lisp code a bit
To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.4 -r1.5 pkgsrc/textproc/split-thai/files/thai-utility.el
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.4 pkgsrc/textproc/split-thai/Makefile:1.5
--- pkgsrc/textproc/split-thai/Makefile:1.4 Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/Makefile Tue Aug 18 17:10:28 2020
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.4 2020/08/17 17:43:15 scole Exp $
+# $NetBSD: Makefile,v 1.5 2020/08/18 17:10:28 scole Exp $
-PKGNAME= split-thai-0.4
+PKGNAME= split-thai-0.5
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
Index: pkgsrc/textproc/split-thai/files/thai-utility.el
diff -u pkgsrc/textproc/split-thai/files/thai-utility.el:1.4 pkgsrc/textproc/split-thai/files/thai-utility.el:1.5
--- pkgsrc/textproc/split-thai/files/thai-utility.el:1.4 Mon Aug 17 17:43:15 2020
+++ pkgsrc/textproc/split-thai/files/thai-utility.el Tue Aug 18 17:10:28 2020
@@ -98,13 +98,13 @@ count 'thai-word-table words"
(message "%d words in nested alist" count)
count))
-;; 'thai-tis620 is default for emacs <= 28
(defun thai-update-word-table-utf8 (file &optional append)
"Update Thai word table by replacing the current word list with
-FILE, which is in utf-8. If called with a prefix argument, FILE
-is appended instead to the current word list. Does the same as
+FILE, which is utf-8. If called with a prefix argument, FILE is
+appended instead to the current word list. Does the same as
'thai-update-word-table, except that function expects
-'thai-tis620 encoding"
+'thai-tis620 encoding which appears to be the default format for
+at least emacs version <= 28"
(interactive "FThai word table file: \nP")
(let* ((coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
@@ -146,23 +146,19 @@ dictionary words."
"\"Nested alist of Thai words.\")" ))
(with-temp-buffer
(insert-file-contents dictfile)
+ ;; quote each thai word
(goto-char (point-min))
+ (while (search-forward-regexp "\\ct+" nil t)
+ (replace-match (concat "\"" (match-string 0) "\"")))
(setq line_count (count-lines (point-min) (point-max)))
- ;; quote each thai word
- (while (not (eobp))
- (beginning-of-line)
- (insert "\"")
- (end-of-line)
- (insert "\"")
- (forward-line 1))
-
+ ;; insert lisp code for defvar
(goto-char (point-min))
(dolist (elem header)
(insert elem "\n"))
-
(goto-char (point-max))
(dolist (elem footer)
(insert elem "\n"))
+ ;; indent for lisp and save
(lisp-mode)
(indent-region (point-min) (point-max))
(write-region nil nil lispfile))
@@ -170,15 +166,14 @@ dictionary words."
(defun split-thai-line()
"Break Thai words from point to end of line by inserting a
-separator string at word boundaries. (wrapper for 'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
(interactive)
(thai-break-words " " (line-end-position))
(split-thai-numbers (point) (line-end-position)))
(defun split-thai()
"Break Thai words from point to end of buffer by inserting a
-separator string at word boundaries. (wrapper for
-'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
(interactive)
(thai-break-words " " (point-max))
(split-thai-numbers (point) (point-max)))
@@ -188,9 +183,7 @@ separator string at word boundaries. (wr
'thai-break-words doesn't always split numbers properly. this may
improve tokenization somewhat."
;; xxx this really should be fixed in 'thai-word lib
- (let* (
- ;; "\\([๐๑๒๓๔๕๖๗๘๙0123456789]+\\)"
- (num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
+ (let* ((num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
(nonnum_rexp "\\([\u0e00-\u0e4f\u0e5a-\u0e7f]\\)") ;; "non-numbers"
(trailing_rexp (concat num_rexp nonnum_rexp))
(leading_rexp (concat nonnum_rexp num_rexp)))
Home |
Main Index |
Thread Index |
Old Index