Example usage for org.deeplearning4j.models.embeddings.wordvectors WordVectors getWordVectorMatrix

List of usage examples for org.deeplearning4j.models.embeddings.wordvectors WordVectors getWordVectorMatrix

Introduction

In this page you can find the example usage for org.deeplearning4j.models.embeddings.wordvectors WordVectors getWordVectorMatrix.

Prototype

INDArray getWordVectorMatrix(String word);

Source Link

Document

Get the word vector for a given matrix

Usage

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java

License:Open Source License

/**
 * Converts the word vector corresponding to a specific word to a {@link ListCell} containing {@link DoubleCell}s
 * containing the elements of the word vector.
 *
 * @param wordVec the {@link WordVectors} model to use
 * @param word the word for which we want to retrieve the word vector
 * @return the {@link ListCell} containing the word vector as {@link DoubleCell}s
 *//* w  w  w. ja  v a2s .c o  m*/
private ListCell wordToListCell(final WordVectors wordVec, final String word) {
    final List<DoubleCell> cells = NDArrayUtils.toListOfDoubleCells(wordVec.getWordVectorMatrix(word));
    return CollectionCellFactory.createListCell(cells);
}

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java

License:Open Source License

/**
 * Calculates the mean vector of all word vectors of all words contained in a document.
 *
 * @param wordVec the {@link WordVectors} model to use
 * @param document the document for which the mean should be calculated
 * @return {@link INDArray} containing the mean vector of the document
 *//*from  w w  w  .  j a v  a2 s.  c  o  m*/
private INDArray calculateDocumentMean(final WordVectors wordVec, final String document) {
    final TokenizerFactory tokenizerFac = new DefaultTokenizerFactory();
    tokenizerFac.setTokenPreProcessor(new CommonPreprocessor());

    final Tokenizer t = tokenizerFac.create(document);
    final List<String> tokens = t.getTokens();

    int numberOfWordsMatchingWithVoc = 0;
    for (final String token : tokens) {
        if (wordVec.hasWord(token)) {
            numberOfWordsMatchingWithVoc++;
        }
    }

    final INDArray documentWordVectors = Nd4j.create(numberOfWordsMatchingWithVoc,
            wordVec.lookupTable().layerSize());

    int i = 0;
    for (final String token : tokens) {
        if (!token.isEmpty()) {
            if (wordVec.hasWord(token)) {
                documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token));
                i++;
            } else {
                m_unknownWords.add(token);
            }
        }
    }
    final INDArray documentMeanVector = documentWordVectors.mean(0);
    return documentMeanVector;
}

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel2.java

License:Open Source License

/**
 * Calculates the mean vector of all word vectors of all words contained in a document.
 *
 * @param wordVec the {@link WordVectors} model to use
 * @param document the document for which the mean should be calculated
 * @return {@link INDArray} containing the mean vector of the document
 *///from ww w  .j  ava  2  s . c  om
private INDArray calculateDocumentMean(final WordVectors wordVec, final List<String> tokens) {
    final INDArray documentWordVectors = Nd4j.create(tokens.size(), wordVec.lookupTable().layerSize());

    int i = 0;
    for (final String token : tokens) {
        if (!token.isEmpty()) {
            documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token));
            i++;
        }
    }
    final INDArray documentMeanVector = documentWordVectors.mean(0);
    return documentMeanVector;
}

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.extract.VocabularyExtractorNodeModel2.java

License:Open Source License

/**
 * Create a BufferedDataTable with two columns containing the word and the corresponding word vector.
 *
 * @param tableSpec the spec to use for container creation
 * @param words the words to write to the container
 * @param wv the model containing the words and vectors
 * @param exec the execution context to use for container creation
 * @return table containing the words and vectors
 *//*w  w  w . ja  va  2  s.  c  o m*/
private BufferedDataTable createWordVectorTableFromWordList(final DataTableSpec tableSpec,
        final List<String> words, final WordVectors wv, final ExecutionContext exec) {
    final BufferedDataContainer container = exec.createDataContainer(tableSpec);
    int i = 0;
    for (final String word : words) {
        incrementProgessAndSetMessage(exec, word);
        final List<DataCell> cells = new ArrayList<>();

        cells.add(new StringCell(word));

        final INDArray vector = wv.getWordVectorMatrix(word);
        final ListCell wordVectorollectionCell = CollectionCellFactory
                .createListCell(NDArrayUtils.toListOfDoubleCells(vector));
        cells.add(wordVectorollectionCell);

        container.addRowToTable(new DefaultRow(new RowKey("Row" + i), cells));
        i++;
    }
    container.close();
    return container.getTable();
}