List of usage examples for org.deeplearning4j.models.embeddings.wordvectors WordVectors getWordVectorMatrix
INDArray getWordVectorMatrix(String word);
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java
License:Open Source License
/** * Converts the word vector corresponding to a specific word to a {@link ListCell} containing {@link DoubleCell}s * containing the elements of the word vector. * * @param wordVec the {@link WordVectors} model to use * @param word the word for which we want to retrieve the word vector * @return the {@link ListCell} containing the word vector as {@link DoubleCell}s *//* w w w. ja v a2s .c o m*/ private ListCell wordToListCell(final WordVectors wordVec, final String word) { final List<DoubleCell> cells = NDArrayUtils.toListOfDoubleCells(wordVec.getWordVectorMatrix(word)); return CollectionCellFactory.createListCell(cells); }
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java
License:Open Source License
/** * Calculates the mean vector of all word vectors of all words contained in a document. * * @param wordVec the {@link WordVectors} model to use * @param document the document for which the mean should be calculated * @return {@link INDArray} containing the mean vector of the document *//*from w w w . j a v a2 s. c o m*/ private INDArray calculateDocumentMean(final WordVectors wordVec, final String document) { final TokenizerFactory tokenizerFac = new DefaultTokenizerFactory(); tokenizerFac.setTokenPreProcessor(new CommonPreprocessor()); final Tokenizer t = tokenizerFac.create(document); final List<String> tokens = t.getTokens(); int numberOfWordsMatchingWithVoc = 0; for (final String token : tokens) { if (wordVec.hasWord(token)) { numberOfWordsMatchingWithVoc++; } } final INDArray documentWordVectors = Nd4j.create(numberOfWordsMatchingWithVoc, wordVec.lookupTable().layerSize()); int i = 0; for (final String token : tokens) { if (!token.isEmpty()) { if (wordVec.hasWord(token)) { documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token)); i++; } else { m_unknownWords.add(token); } } } final INDArray documentMeanVector = documentWordVectors.mean(0); return documentMeanVector; }
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel2.java
License:Open Source License
/** * Calculates the mean vector of all word vectors of all words contained in a document. * * @param wordVec the {@link WordVectors} model to use * @param document the document for which the mean should be calculated * @return {@link INDArray} containing the mean vector of the document *///from ww w .j ava 2 s . c om private INDArray calculateDocumentMean(final WordVectors wordVec, final List<String> tokens) { final INDArray documentWordVectors = Nd4j.create(tokens.size(), wordVec.lookupTable().layerSize()); int i = 0; for (final String token : tokens) { if (!token.isEmpty()) { documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token)); i++; } } final INDArray documentMeanVector = documentWordVectors.mean(0); return documentMeanVector; }
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.extract.VocabularyExtractorNodeModel2.java
License:Open Source License
/** * Create a BufferedDataTable with two columns containing the word and the corresponding word vector. * * @param tableSpec the spec to use for container creation * @param words the words to write to the container * @param wv the model containing the words and vectors * @param exec the execution context to use for container creation * @return table containing the words and vectors *//*w w w . ja va 2 s. c o m*/ private BufferedDataTable createWordVectorTableFromWordList(final DataTableSpec tableSpec, final List<String> words, final WordVectors wv, final ExecutionContext exec) { final BufferedDataContainer container = exec.createDataContainer(tableSpec); int i = 0; for (final String word : words) { incrementProgessAndSetMessage(exec, word); final List<DataCell> cells = new ArrayList<>(); cells.add(new StringCell(word)); final INDArray vector = wv.getWordVectorMatrix(word); final ListCell wordVectorollectionCell = CollectionCellFactory .createListCell(NDArrayUtils.toListOfDoubleCells(vector)); cells.add(wordVectorollectionCell); container.addRowToTable(new DefaultRow(new RowKey("Row" + i), cells)); i++; } container.close(); return container.getTable(); }