Example usage for opennlp.tools.doccat DocumentCategorizer getIndex

Introduction

In this page you can find the example usage for opennlp.tools.doccat DocumentCategorizer getIndex.

Prototype

int getIndex(String category);

Source Link

Document

get the index of a certain category

Usage

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

private static void runTest(File[] inputFiles, DocumentCategorizer categorizer, Tokenizer tokenizer,
        ResultAnalyzer resultAnalyzer) throws FileNotFoundException, IOException {
    String line;/*  ww w.j a v a  2 s .com*/
    //<start id="maxent.examples.test.execute"/>
    for (File ff : inputFiles) {
        BufferedReader in = new BufferedReader(new FileReader(ff));
        while ((line = in.readLine()) != null) {
            String[] parts = line.split("\t");
            if (parts.length != 2)
                continue;

            String docText = parts[1]; //<co id="tmt.preprocess"/>
            String[] tokens = tokenizer.tokenize(docText);

            double[] probs = categorizer.categorize(tokens); //<co id="tmt.categorize"/>
            String label = categorizer.getBestCategory(probs);
            int bestIndex = categorizer.getIndex(label);
            double score = probs[bestIndex];

            ClassifierResult result //<co id="tmt.collect"/>
                    = new ClassifierResult(label, score);
            resultAnalyzer.addInstance(parts[0], result);
        }
        in.close();
    }

    System.err.println(resultAnalyzer.toString()); //<co id="tmt.summarize"/>
    /*<calloutlist>
     * <callout arearefs="tmt.preprocess">Preprocess text</callout>
     * <callout arearefs="tmt.categorize">Categorize</callout>
     * <callout arearefs="tmt.collect">Analyze Results</callout>
     * <callout arearefs="tmt.summarize">Present Results</callout>
     * </calloutlist>*/
    //<end id="maxent.examples.test.execute"/>
}