pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/textproc/word2vec



Module Name:    pkgsrc
Committed By:   minskim
Date:           Mon Dec  2 02:00:42 UTC 2019

Added Files:
        pkgsrc/textproc/word2vec: DESCR Makefile PLIST distinfo
        pkgsrc/textproc/word2vec/patches: patch-makefile patch-word2phrase.c
            patch-word2vec.c

Log Message:
textproc/word2vec: Import version 0.1c

word2vec is an implementation of the Continuous Bag-of-Words (CBOW)
and the Skip-gram model (SG), as well as several demo scripts.  Given
a text corpus, the word2vec tool learns a vector for every word in the
vocabulary using the Continuous Bag-of-Words or the Skip-Gram neural
network architectures.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 pkgsrc/textproc/word2vec/DESCR \
    pkgsrc/textproc/word2vec/Makefile pkgsrc/textproc/word2vec/PLIST \
    pkgsrc/textproc/word2vec/distinfo
cvs rdiff -u -r0 -r1.1 pkgsrc/textproc/word2vec/patches/patch-makefile \
    pkgsrc/textproc/word2vec/patches/patch-word2phrase.c \
    pkgsrc/textproc/word2vec/patches/patch-word2vec.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: pkgsrc/textproc/word2vec/DESCR
diff -u /dev/null pkgsrc/textproc/word2vec/DESCR:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/DESCR      Mon Dec  2 02:00:41 2019
@@ -0,0 +1,5 @@
+word2vec is an implementation of the Continuous Bag-of-Words (CBOW)
+and the Skip-gram model (SG), as well as several demo scripts.  Given
+a text corpus, the word2vec tool learns a vector for every word in the
+vocabulary using the Continuous Bag-of-Words or the Skip-Gram neural
+network architectures.
Index: pkgsrc/textproc/word2vec/Makefile
diff -u /dev/null pkgsrc/textproc/word2vec/Makefile:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/Makefile   Mon Dec  2 02:00:41 2019
@@ -0,0 +1,22 @@
+# $NetBSD: Makefile,v 1.1 2019/12/02 02:00:41 minskim Exp $
+
+DISTNAME=      word2vec-0.1c
+CATEGORIES=    textproc
+MASTER_SITES=  ${MASTER_SITE_GITHUB:=tmikolov/}
+GITHUB_TAG=    20c129af10659f7c50e86e3be406df663beff438
+
+MAINTAINER=    minskim%NetBSD.org@localhost
+HOMEPAGE=      https://github.com/tmikolov/word2vec
+COMMENT=       Tools for computing distributed representtion of words
+LICENSE=       apache-2.0
+
+NO_CONFIGURE=  yes
+
+INSTALLATION_DIRS+=    bin
+
+do-install:
+.for cmd in compute-accuracy distance word2phrase word2vec word-analogy
+       ${INSTALL_PROGRAM} ${WRKSRC}/${cmd} ${DESTDIR}${PREFIX}/bin
+.endfor
+
+.include "../../mk/bsd.pkg.mk"
Index: pkgsrc/textproc/word2vec/PLIST
diff -u /dev/null pkgsrc/textproc/word2vec/PLIST:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/PLIST      Mon Dec  2 02:00:41 2019
@@ -0,0 +1,6 @@
+@comment $NetBSD: PLIST,v 1.1 2019/12/02 02:00:41 minskim Exp $
+bin/compute-accuracy
+bin/distance
+bin/word-analogy
+bin/word2phrase
+bin/word2vec
Index: pkgsrc/textproc/word2vec/distinfo
diff -u /dev/null pkgsrc/textproc/word2vec/distinfo:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/distinfo   Mon Dec  2 02:00:41 2019
@@ -0,0 +1,9 @@
+$NetBSD: distinfo,v 1.1 2019/12/02 02:00:41 minskim Exp $
+
+SHA1 (word2vec-0.1c-20c129af10659f7c50e86e3be406df663beff438.tar.gz) = 4f0e872348d60223ba3b8412c0b9ccd7dbd07551
+RMD160 (word2vec-0.1c-20c129af10659f7c50e86e3be406df663beff438.tar.gz) = de98886c52303242566eacd5a3eaf4459026bd71
+SHA512 (word2vec-0.1c-20c129af10659f7c50e86e3be406df663beff438.tar.gz) = 
698fa7e2e3ce3be4e4ecbe59bfe7f83640f4bc004b089b2b2cd9daa8233e98fbc5b541433317c647a0c796dd9aa2cd3aa186a1f8287e9f536104ed5fc6c1f65c
+Size (word2vec-0.1c-20c129af10659f7c50e86e3be406df663beff438.tar.gz) = 104875 bytes
+SHA1 (patch-makefile) = 2e32c5af8922008c2961fb2a7a4f59fd31ae0df9
+SHA1 (patch-word2phrase.c) = 47ccf0897b76960a6ef48ddfffc60cc4c59afaee
+SHA1 (patch-word2vec.c) = 1f0e2cf42c6156268f60075aa0a60ab750bc8bfd

Index: pkgsrc/textproc/word2vec/patches/patch-makefile
diff -u /dev/null pkgsrc/textproc/word2vec/patches/patch-makefile:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/patches/patch-makefile     Mon Dec  2 02:00:41 2019
@@ -0,0 +1,15 @@
+$NetBSD: patch-makefile,v 1.1 2019/12/02 02:00:41 minskim Exp $
+
+Do not override compiler set by pkgsrc.
+
+--- makefile.orig      2017-07-16 22:46:08.000000000 +0000
++++ makefile
+@@ -1,6 +1,6 @@
+-CC = gcc
++CC?= gcc
+ #Using -Ofast instead of -O3 might result in faster code, but is supported only by newer GCC versions
+-CFLAGS = -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
++CFLAGS+= -lm -pthread -Wall -funroll-loops -Wno-unused-result
+ 
+ all: word2vec word2phrase distance word-analogy compute-accuracy
+ 
Index: pkgsrc/textproc/word2vec/patches/patch-word2phrase.c
diff -u /dev/null pkgsrc/textproc/word2vec/patches/patch-word2phrase.c:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/patches/patch-word2phrase.c        Mon Dec  2 02:00:41 2019
@@ -0,0 +1,41 @@
+$NetBSD: patch-word2phrase.c,v 1.1 2019/12/02 02:00:41 minskim Exp $
+
+Portability fixes.
+https://github.com/tmikolov/word2vec/pull/40
+
+--- word2phrase.c.orig 2017-07-16 22:46:08.000000000 +0000
++++ word2phrase.c
+@@ -42,7 +42,7 @@ unsigned long long next_random = 1;
+ void ReadWord(char *word, FILE *fin, char *eof) {
+   int a = 0, ch;
+   while (1) {
+-    ch = fgetc_unlocked(fin);
++    ch = getc_unlocked(fin);
+     if (ch == EOF) {
+       *eof = 1;
+       break;
+@@ -246,7 +246,7 @@ void TrainModel() {
+     if (eof) break;
+     if (word[0] == '\n') {
+       //fprintf(fo, "\n");
+-      fputc_unlocked('\n', fo);
++      putc_unlocked('\n', fo);
+       continue;
+     }
+     cn++;
+@@ -286,12 +286,12 @@ void TrainModel() {
+     next_random = next_random * (unsigned long long)25214903917 + 11;
+     //if (next_random & 0x10000) score = 0;
+     if (score > threshold) {
+-      fputc_unlocked('_', fo);
++      putc_unlocked('_', fo);
+       pb = 0;
+-    } else fputc_unlocked(' ', fo);
++    } else putc_unlocked(' ', fo);
+     a = 0;
+     while (word[a]) {
+-      fputc_unlocked(word[a], fo);
++      putc_unlocked(word[a], fo);
+       a++;
+     }
+     pa = pb;
Index: pkgsrc/textproc/word2vec/patches/patch-word2vec.c
diff -u /dev/null pkgsrc/textproc/word2vec/patches/patch-word2vec.c:1.1
--- /dev/null   Mon Dec  2 02:00:42 2019
+++ pkgsrc/textproc/word2vec/patches/patch-word2vec.c   Mon Dec  2 02:00:41 2019
@@ -0,0 +1,16 @@
+$NetBSD: patch-word2vec.c,v 1.1 2019/12/02 02:00:41 minskim Exp $
+
+Portability fix.
+https://github.com/tmikolov/word2vec/pull/40
+
+--- word2vec.c.orig    2017-07-16 22:46:08.000000000 +0000
++++ word2vec.c
+@@ -71,7 +71,7 @@ void InitUnigramTable() {
+ void ReadWord(char *word, FILE *fin, char *eof) {
+   int a = 0, ch;
+   while (1) {
+-    ch = fgetc_unlocked(fin);
++    ch = getc_unlocked(fin);
+     if (ch == EOF) {
+       *eof = 1;
+       break;



Home | Main Index | Thread Index | Old Index