Example usage for org.deeplearning4j.models.embeddings.wordvectors WordVectors lookupTable

List of usage examples for org.deeplearning4j.models.embeddings.wordvectors WordVectors lookupTable

Introduction

In this page you can find the example usage for org.deeplearning4j.models.embeddings.wordvectors WordVectors lookupTable.

Prototype

WeightLookupTable lookupTable();

Source Link

Document

Lookup table for the vectors

Usage

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java

License:Open Source License

/**
 * Calculates the mean vector of all word vectors of all words contained in a document.
 *
 * @param wordVec the {@link WordVectors} model to use
 * @param document the document for which the mean should be calculated
 * @return {@link INDArray} containing the mean vector of the document
 *///from w ww  .  j a v a2  s  .c om
private INDArray calculateDocumentMean(final WordVectors wordVec, final String document) {
    final TokenizerFactory tokenizerFac = new DefaultTokenizerFactory();
    tokenizerFac.setTokenPreProcessor(new CommonPreprocessor());

    final Tokenizer t = tokenizerFac.create(document);
    final List<String> tokens = t.getTokens();

    int numberOfWordsMatchingWithVoc = 0;
    for (final String token : tokens) {
        if (wordVec.hasWord(token)) {
            numberOfWordsMatchingWithVoc++;
        }
    }

    final INDArray documentWordVectors = Nd4j.create(numberOfWordsMatchingWithVoc,
            wordVec.lookupTable().layerSize());

    int i = 0;
    for (final String token : tokens) {
        if (!token.isEmpty()) {
            if (wordVec.hasWord(token)) {
                documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token));
                i++;
            } else {
                m_unknownWords.add(token);
            }
        }
    }
    final INDArray documentMeanVector = documentWordVectors.mean(0);
    return documentMeanVector;
}

From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel2.java

License:Open Source License

/**
 * Calculates the mean vector of all word vectors of all words contained in a document.
 *
 * @param wordVec the {@link WordVectors} model to use
 * @param document the document for which the mean should be calculated
 * @return {@link INDArray} containing the mean vector of the document
 *//*from  ww w  . j a va2s  .  c  o  m*/
private INDArray calculateDocumentMean(final WordVectors wordVec, final List<String> tokens) {
    final INDArray documentWordVectors = Nd4j.create(tokens.size(), wordVec.lookupTable().layerSize());

    int i = 0;
    for (final String token : tokens) {
        if (!token.isEmpty()) {
            documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token));
            i++;
        }
    }
    final INDArray documentMeanVector = documentWordVectors.mean(0);
    return documentMeanVector;
}

From source file:org.knime.ext.textprocessing.dl4j.util.WordVectorPortObjectUtils.java

License:Open Source License

/**
 * Converts wordVectors to {@link Word2Vec}. Sets {@link WeightLookupTable} and {@link VocabCache}. Depending on
 * specified word vector type this may lead to information loss. E.g. labels for {@link ParagraphVectors}.
 *
 * @param wordVectors//from   w  w  w.j  a va  2  s. com
 * @return Word2Vec containing vocab and lookup table
 */
public static Word2Vec wordVectorsToWord2Vec(final WordVectors wordVectors) {
    final Word2Vec w2v = new Word2Vec();
    w2v.setLookupTable(wordVectors.lookupTable());
    w2v.setVocab(wordVectors.vocab());
    return w2v;
}