Example usage for opennlp.tools.doccat DocumentSample DocumentSample

List of usage examples for opennlp.tools.doccat DocumentSample DocumentSample

Introduction

In this page you can find the example usage for opennlp.tools.doccat DocumentSample DocumentSample.

Prototype

public DocumentSample(String category, String[] text) 

Source Link

Usage

From source file:io.learningbox.controller.APIController.java

@RequestMapping(value = "/categorize/{area}", method = RequestMethod.POST)
public SortedMap<Double, Set<String>> categorize(@PathVariable final String area, @RequestBody String input)
        throws IOException {
    List<LearningSet> l = repository.findByArea(area);
    final Iterator<LearningSet> sets = l.iterator();

    ObjectStream<DocumentSample> stream = new ObjectStream<DocumentSample>() {

        @Override/*from  w  ww.  j a v a2  s.com*/
        public DocumentSample read() throws IOException {
            if (sets.hasNext()) {
                LearningSet s = sets.next();

                return new DocumentSample(s.getCategory(), s.getText());
            }
            return null;
        }

        @Override
        public void reset() throws IOException, UnsupportedOperationException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void close() throws IOException {
            //Do nothing
        }
    };

    TrainingParameters trainingParameters = TrainingParameters.defaultParams();
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(1000));
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

    DoccatModel model = DocumentCategorizerME.train("en", stream, trainingParameters, new DoccatFactory());
    DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
    return myCategorizer.sortedScoreMap(input);
}