Example usage for org.deeplearning4j.text.documentiterator LabelsSource LabelsSource

List of usage examples for org.deeplearning4j.text.documentiterator LabelsSource LabelsSource

Introduction

In this page you can find the example usage for org.deeplearning4j.text.documentiterator LabelsSource LabelsSource.

Prototype

public LabelsSource(@NonNull List<String> labels) 

Source Link

Document

Build LabelsSource using externally defined list of string labels.

Usage

From source file:doc2vec.LuceneDocIterator.java

void learnDocEmbeddings(File indexDir) throws Exception {

    boolean storedLabels = Boolean.parseBoolean(prop.getProperty("word.labels", "false"));
    SentenceIterator iter = new LuceneDocIterator(indexDir, stopFile, storedLabels);
    InMemoryLookupCache cache = new InMemoryLookupCache();

    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    LabelsSource source = new LabelsSource("DOCNO_");

    vec = new ParagraphVectors.Builder().minWordFrequency(minwordfreq).iterations(3).epochs(5)
            .layerSize(numDimensions).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter)
            .vocabCache(cache).tokenizerFactory(t).sampling(0.1f).workers(4).trainWordVectors(true).build();
    vec.fit();//  w w  w  .  java 2  s.c om
}

From source file:doc2vec.LuceneDocIterator.java

void learnDocEmbeddings(String docFile) throws Exception {

    SentenceIterator iter = new BasicLineIterator(docFile);
    InMemoryLookupCache cache = new InMemoryLookupCache();

    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    LabelsSource source = new LabelsSource("DOCNO_");

    vec = new ParagraphVectors.Builder().minWordFrequency(minwordfreq).iterations(3).epochs(5)
            .layerSize(numDimensions).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter)
            .vocabCache(cache).tokenizerFactory(t).sampling(0.1f).workers(4).trainWordVectors(true).build();
    vec.fit();/*from w w  w. j  a  v  a2s .  c om*/
}

From source file:org.knime.ext.textprocessing.dl4j.data.BufferedDataTableLabelledDocumentIterator.java

License:Open Source License

/**
 * Iterates over {@link BufferedDataTable} and collects all labels.
 *
 * @return {@link LabelsSource} containing the collected labels.
 *///w  w  w . j a v  a2  s . c om
private LabelsSource initLabelsSource() {
    while (m_tableIterator.hasNext()) {
        final DataRow row = m_tableIterator.next();
        final DataCell labelCell = row.getCell(m_labelColumnIndex);

        if (m_skipMissing && labelCell.isMissing()) {
            continue;
        }

        try {
            m_labels.add(ConverterUtils.convertDataCellToJava(labelCell, String.class));
        } catch (DataCellConversionException e) {
            throw new RuntimeException("Error in row " + row.getKey() + " : " + e.getMessage(), e);
        }
    }
    reset();
    return new LabelsSource(m_labels);
}