List of usage examples for org.deeplearning4j.text.documentiterator LabelledDocument LabelledDocument
LabelledDocument
From source file:dollar.learner.smart.ParagraphVectorsClassifierExample.java
License:Apache License
public List<Pair<String, Double>> predict(@NotNull String name, @NotNull SourceSegment source, @NotNull List<var> inputs) { /*/*from w w w. ja v a 2 s .com*/ Now we'll iterate over unlabeled data, and check which label it could be assigned to Please note: for many domains it's normal to have 1 document fall into few labels at once, with different "weight" for each. */ MeansBuilder meansBuilder = new MeansBuilder( (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable(), tokenizerFactory); LabelSeeker seeker = new LabelSeeker(iterator.getLabelsSource().getLabels(), (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable()); LabelledDocument document = new LabelledDocument(); document.setContent(signatureToText(name, inputs)); INDArray documentAsCentroid = meansBuilder.documentAsVector(document); List<Pair<String, Double>> scores = seeker.getScores(documentAsCentroid); return scores; }
From source file:org.knime.ext.textprocessing.dl4j.data.BufferedDataTableLabelledDocumentIterator.java
License:Open Source License
/** * Returns the next {@link LabelledDocument} containing a document and a corresponding label from the * {@link BufferedDataTable}.//from w w w. java2s .c om * * @return the next labelled document */ @Override public LabelledDocument nextDocument() { final DataRow row = m_tableIterator.next(); final DataCell documentCell = row.getCell(m_documentColumnIndex); if (m_skipMissing && containsMissing(row)) { m_currentRow++; return nextDocument(); } String documentContent = null; try { documentContent = ConverterUtils.convertDataCellToJava(documentCell, String.class); } catch (DataCellConversionException e) { throw new RuntimeException("Error in row " + row.getKey() + " : " + e.getMessage(), e); } String documentLabel = m_labels.get(m_nonEmptyRowCounter); final LabelledDocument output = new LabelledDocument(); output.setContent(documentContent); output.setLabel(documentLabel); m_nonEmptyRowCounter++; m_currentRow++; return output; }