List of usage examples for org.deeplearning4j.models.embeddings.wordvectors WordVectors lookupTable
WeightLookupTable lookupTable();
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel.java
License:Open Source License
/** * Calculates the mean vector of all word vectors of all words contained in a document. * * @param wordVec the {@link WordVectors} model to use * @param document the document for which the mean should be calculated * @return {@link INDArray} containing the mean vector of the document *///from w ww . j a v a2 s .c om private INDArray calculateDocumentMean(final WordVectors wordVec, final String document) { final TokenizerFactory tokenizerFac = new DefaultTokenizerFactory(); tokenizerFac.setTokenPreProcessor(new CommonPreprocessor()); final Tokenizer t = tokenizerFac.create(document); final List<String> tokens = t.getTokens(); int numberOfWordsMatchingWithVoc = 0; for (final String token : tokens) { if (wordVec.hasWord(token)) { numberOfWordsMatchingWithVoc++; } } final INDArray documentWordVectors = Nd4j.create(numberOfWordsMatchingWithVoc, wordVec.lookupTable().layerSize()); int i = 0; for (final String token : tokens) { if (!token.isEmpty()) { if (wordVec.hasWord(token)) { documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token)); i++; } else { m_unknownWords.add(token); } } } final INDArray documentMeanVector = documentWordVectors.mean(0); return documentMeanVector; }
From source file:org.knime.ext.textprocessing.dl4j.nodes.embeddings.apply.WordVectorApplyNodeModel2.java
License:Open Source License
/** * Calculates the mean vector of all word vectors of all words contained in a document. * * @param wordVec the {@link WordVectors} model to use * @param document the document for which the mean should be calculated * @return {@link INDArray} containing the mean vector of the document *//*from ww w . j a va2s . c o m*/ private INDArray calculateDocumentMean(final WordVectors wordVec, final List<String> tokens) { final INDArray documentWordVectors = Nd4j.create(tokens.size(), wordVec.lookupTable().layerSize()); int i = 0; for (final String token : tokens) { if (!token.isEmpty()) { documentWordVectors.putRow(i, wordVec.getWordVectorMatrix(token)); i++; } } final INDArray documentMeanVector = documentWordVectors.mean(0); return documentMeanVector; }
From source file:org.knime.ext.textprocessing.dl4j.util.WordVectorPortObjectUtils.java
License:Open Source License
/** * Converts wordVectors to {@link Word2Vec}. Sets {@link WeightLookupTable} and {@link VocabCache}. Depending on * specified word vector type this may lead to information loss. E.g. labels for {@link ParagraphVectors}. * * @param wordVectors//from w w w.j a va 2 s. com * @return Word2Vec containing vocab and lookup table */ public static Word2Vec wordVectorsToWord2Vec(final WordVectors wordVectors) { final Word2Vec w2v = new Word2Vec(); w2v.setLookupTable(wordVectors.lookupTable()); w2v.setVocab(wordVectors.vocab()); return w2v; }