Example usage for opennlp.tools.doccat DocumentCategorizer getIndex

List of usage examples for opennlp.tools.doccat DocumentCategorizer getIndex

Introduction

In this page you can find the example usage for opennlp.tools.doccat DocumentCategorizer getIndex.

Prototype

int getIndex(String category);

Source Link

Document

get the index of a certain category

Usage

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

private static void runTest(File[] inputFiles, DocumentCategorizer categorizer, Tokenizer tokenizer,
        ResultAnalyzer resultAnalyzer) throws FileNotFoundException, IOException {
    String line;/*  ww w.j a v a  2 s .com*/
    //<start id="maxent.examples.test.execute"/>
    for (File ff : inputFiles) {
        BufferedReader in = new BufferedReader(new FileReader(ff));
        while ((line = in.readLine()) != null) {
            String[] parts = line.split("\t");
            if (parts.length != 2)
                continue;

            String docText = parts[1]; //<co id="tmt.preprocess"/>
            String[] tokens = tokenizer.tokenize(docText);

            double[] probs = categorizer.categorize(tokens); //<co id="tmt.categorize"/>
            String label = categorizer.getBestCategory(probs);
            int bestIndex = categorizer.getIndex(label);
            double score = probs[bestIndex];

            ClassifierResult result //<co id="tmt.collect"/>
                    = new ClassifierResult(label, score);
            resultAnalyzer.addInstance(parts[0], result);
        }
        in.close();
    }

    System.err.println(resultAnalyzer.toString()); //<co id="tmt.summarize"/>
    /*<calloutlist>
     * <callout arearefs="tmt.preprocess">Preprocess text</callout>
     * <callout arearefs="tmt.categorize">Categorize</callout>
     * <callout arearefs="tmt.collect">Analyze Results</callout>
     * <callout arearefs="tmt.summarize">Present Results</callout>
     * </calloutlist>*/
    //<end id="maxent.examples.test.execute"/>
}