List of usage examples for org.deeplearning4j.text.documentiterator FileLabelAwareIterator nextDocument
@Override
public LabelledDocument nextDocument()
From source file:dollar.learner.smart.ParagraphVectorsClassifierExample.java
License:Apache License
void checkUnlabeledData() throws FileNotFoundException { /*//from w ww. ja va2 s. co m At this point we assume that we have model built and we can check which categories our unlabeled document falls into. So we'll start loading our unlabeled documents and checking them */ ClassPathResource unClassifiedResource = new ClassPathResource("paravec/unlabeled"); FileLabelAwareIterator unClassifiedIterator = new FileLabelAwareIterator.Builder() .addSourceFolder(unClassifiedResource.getFile()).build(); /* Now we'll iterate over unlabeled data, and check which label it could be assigned to Please note: for many domains it's normal to have 1 document fall into few labels at once, with different "weight" for each. */ MeansBuilder meansBuilder = new MeansBuilder( (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable(), tokenizerFactory); LabelSeeker seeker = new LabelSeeker(iterator.getLabelsSource().getLabels(), (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable()); while (unClassifiedIterator.hasNextDocument()) { LabelledDocument document = unClassifiedIterator.nextDocument(); INDArray documentAsCentroid = meansBuilder.documentAsVector(document); List<Pair<String, Double>> scores = seeker.getScores(documentAsCentroid); /* please note, document.getLabel() is used just to show which document we're looking at now, as a substitute for printing out the whole document name. So, labels on these two documents are used like titles, just to visualize our classification done properly */ log.info("Document '" + document.getLabel() + "' falls into the following categories: "); for (Pair<String, Double> score : scores) { log.info(" " + score.getFirst() + ": " + score.getSecond()); } } }