word2vector NodeJS Interface
This is a Node.js interface for Google's word2vector.
Here is an example of how to load large model like GoogleNews-vectors-negative300.bin by this package.
Supports both binary model and raw text model.
Installation
Linux, Unix OS are supported. Install it via npm:
npm install word2vector --save
In Node.js, require the module as below:
var w2v = ;
API Document:
Overview
train load getVector getVectors getSimilarWords getNeighbors similarity substract add
w2v.train( trainFile, modelFile, options, callback )
Click here to see example TrainFile format.
Example:
var w2v = ;var trainFile = "./data/train.data"modelFile = "./data/test.model.bin";w2v;
w2v.load( modelFile,?readType = "")
Should load model before calling any calcuation functions.
Params | Description | Default Value |
---|---|---|
readType | Model format, pass "utf-8" if using a raw text model. | "bin" |
var w2v = ;var modelFile = "./test.model.bin";w2v;// console.log(w2v.getSimilarWordsWords());
w2v.getVector(word="word")
Params | Description | Default Value |
---|---|---|
word | String to be searched. | "word" |
'use strict';var w2v = ;var modelFile = "./data/test.model.bin";w2v;console;console;
Sample Output:
// Array Type Only0104406-0160019-0604506-06228040039482-012005800735550056460099059-0419282null // Return null if this word is not in model.
w2v.getVectors(words=["word1", "word2"], ?options = {})
Params | Description | Default Value |
---|---|---|
words | Array of strings to be searched. | "word" |
var w2v = ;var modelFile = "./data/test.model.bin";w2v;console;
Sample Output:
word: '孫悟空'vector:0104406-0160019-0604506-06228040039482-012005800735550056460099059-0419282word: '李洵' vector: null// this will trigger a error log in console://'李洵' is not found in the model.
w2v.getSimilarWords(word = "word", ?options = {})
Return 40ish words that is similar to "word".
Params | Description | Default Value |
---|---|---|
word | Strings to be searched. | "word" |
options.N | return topN results | Array |
var w2v = ;var modelFile = "./data/test.model.bin";w2v;console;console;
Sample Output:
// Array Typeword: '孫悟空' similarity: 0974369word: '吳承恩' similarity: 096686word: '林黛玉' similarity: 0966664word: '北地' similarity: 096264word: '賈寶玉' similarity: 0962137word: '楚霸王' similarity: 0955795word: '梁山泊' similarity: 0932804word: '濮陽' similarity: 0927542word: '黃天霸' similarity: 0927459word: '英雄豪傑' similarity: 0921575// Return empty [] if this word is not in model.'李洵' is not found in the model
getNeighbors(vector, ?options = {})
Params | Description | Default Value |
---|---|---|
vector | Vector to be searched. | "word" |
options.N | return topN results | Array |
var w2v = ;var modelFile = "./data/test.model.bin";w2v;var a = w2v;// These are equal to use w2v.getSimilarWords("唐三藏");console;
Sample Output1:
word: '唐三藏' similarity: 09999993515200001word: '孫悟空' similarity: 0974368825898word: '吳承恩' similarity: 0966859435824word: '林黛玉' similarity: 0966663471323word: '北地' similarity: 0962639240211word: '賈寶玉' similarity: 09621371820049999word: '楚霸王' similarity: 09557946924850002word: '梁山泊' similarity: 09328033548890001word: '濮陽' similarity: 09275417727409999'唐三藏': 09999993515200001'孫悟空': 0974368825898'吳承恩': 0966859435824'林黛玉': 0966663471323'北地': 0962639240211'賈寶玉': 09621371820049999'楚霸王': 09557946924850002'梁山泊': 09328033548890001'濮陽': 09275417727409999
w2v.similarity(word1 = "word1", word2 = "word2")
w2v.similarity(vector1 = [], word2 = "word2")
w2v.similarity(word1 = "word1", vector2 = [])
w2v.similarity(vector1 = [], vector2 = [])
Compute the [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) between the two vector. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | First Strings to be compared. | No default value |
word2 | Second Strings to be compared. | No default value |
vector1 | First Vector to be compared. | No default value |
vector2 | Second Vector to be compared. | No default value |
'use strict';var w2v = ;var modelFile = "./data/test.model.bin";w2v;var a = w2v; // 0.974368825898console;var b = w2v; // 0.974368825898// same as var b = w2v.similarity("唐三藏", w2v.getVector("李洵"));// same as var b = w2v.similarity(w2v.getVector("唐三藏"), "李洵");// same as var b = w2v.similarity(w2v.getVector("唐三藏"), w2v.getVector("李洵"));console;
Sample Output:
0974368825898// '李洵' is not found in the model. // error alert in consolefalse
w2v.substract(word1 = "word1", word2 = "word2")
w2v.substract(vector1 = [], word2 = "word2")
w2v.substract(word1 = "word1", vector2 = [])
w2v.substract(vector1 = [], vector2 = [])
Substract vector1 from vector2. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | Subtrahend | No default value |
word2 | Minuend | No default value |
Example:
'use strict';var w2v = ;var modelFile = "./data/test.model.bin";w2v;var a = w2v;console;
Sample Output:
0 0 0 0 0 0 0 0 0 0
w2v.add(word1 = "word1", word2 = "word2")
w2v.add(vector1 = [], word2 = "word2")
w2v.add(word1 = "word1", vector2 = [])
w2v.add(vector1 = [], vector2 = [])
Add vector1 to vector2. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | Summand | No default value |
word2 | Addend | No default value |
Example:
'use strict';var w2v = ;var modelFile = "./data/test.model.bin";w2v;var a = w2v;var b = w2v;console;console;
Sample Output:
0208812-0320038-1209012-12456080078964-02401160147110112920198118-08385640104406-0160019-0604506-06228040039482-012005800735550056460099059-0419282