List of usage examples for org.deeplearning4j.text.documentiterator LabelsSource LabelsSource
public LabelsSource(@NonNull List<String> labels)
From source file:doc2vec.LuceneDocIterator.java
void learnDocEmbeddings(File indexDir) throws Exception { boolean storedLabels = Boolean.parseBoolean(prop.getProperty("word.labels", "false")); SentenceIterator iter = new LuceneDocIterator(indexDir, stopFile, storedLabels); InMemoryLookupCache cache = new InMemoryLookupCache(); TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); LabelsSource source = new LabelsSource("DOCNO_"); vec = new ParagraphVectors.Builder().minWordFrequency(minwordfreq).iterations(3).epochs(5) .layerSize(numDimensions).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter) .vocabCache(cache).tokenizerFactory(t).sampling(0.1f).workers(4).trainWordVectors(true).build(); vec.fit();// w w w . java 2 s.c om }
From source file:doc2vec.LuceneDocIterator.java
void learnDocEmbeddings(String docFile) throws Exception { SentenceIterator iter = new BasicLineIterator(docFile); InMemoryLookupCache cache = new InMemoryLookupCache(); TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); LabelsSource source = new LabelsSource("DOCNO_"); vec = new ParagraphVectors.Builder().minWordFrequency(minwordfreq).iterations(3).epochs(5) .layerSize(numDimensions).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter) .vocabCache(cache).tokenizerFactory(t).sampling(0.1f).workers(4).trainWordVectors(true).build(); vec.fit();/*from w w w. j a v a2s . c om*/ }
From source file:org.knime.ext.textprocessing.dl4j.data.BufferedDataTableLabelledDocumentIterator.java
License:Open Source License
/** * Iterates over {@link BufferedDataTable} and collects all labels. * * @return {@link LabelsSource} containing the collected labels. *///w w w . j a v a2 s . c om private LabelsSource initLabelsSource() { while (m_tableIterator.hasNext()) { final DataRow row = m_tableIterator.next(); final DataCell labelCell = row.getCell(m_labelColumnIndex); if (m_skipMissing && labelCell.isMissing()) { continue; } try { m_labels.add(ConverterUtils.convertDataCellToJava(labelCell, String.class)); } catch (DataCellConversionException e) { throw new RuntimeException("Error in row " + row.getKey() + " : " + e.getMessage(), e); } } reset(); return new LabelsSource(m_labels); }