List of usage examples for org.apache.mahout.classifier ClassifierResult getScore
public double getScore()
From source file:com.tamingtext.classifier.bayes.ClassifyDocument.java
License:Apache License
public static void main(String[] args) { log.info("Command-line arguments: " + Arrays.toString(args)); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("Input file").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("model").withMinimum(1).withMaximum(1).create()) .withDescription("Model to use when classifying data").withShortName("m").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from ww w.j a v a2 s.co m Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(modelOpt).withOption(helpOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } File inputFile = new File(cmdLine.getValue(inputOpt).toString()); if (!inputFile.isFile()) { throw new IllegalArgumentException(inputFile + " does not exist or is not a file"); } File modelDir = new File(cmdLine.getValue(modelOpt).toString()); if (!modelDir.isDirectory()) { throw new IllegalArgumentException(modelDir + " does not exist or is not a directory"); } BayesParameters p = new BayesParameters(); p.set("basePath", modelDir.getCanonicalPath()); Datastore ds = new InMemoryBayesDatastore(p); Algorithm a = new BayesAlgorithm(); ClassifierContext ctx = new ClassifierContext(a, ds); ctx.initialize(); //TODO: make the analyzer configurable StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); TokenStream ts = analyzer.tokenStream(null, new InputStreamReader(new FileInputStream(inputFile), "UTF-8")); ArrayList<String> tokens = new ArrayList<String>(1000); while (ts.incrementToken()) { tokens.add(ts.getAttribute(CharTermAttribute.class).toString()); } String[] document = tokens.toArray(new String[tokens.size()]); ClassifierResult[] cr = ctx.classifyDocument(document, "unknown", 5); for (ClassifierResult r : cr) { System.err.println(r.getLabel() + "\t" + r.getScore()); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } catch (IOException e) { log.error("IOException", e); } catch (InvalidDatastoreException e) { log.error("InvalidDataStoreException", e); } finally { } }