List of usage examples for org.apache.mahout.classifier ResultAnalyzer ResultAnalyzer
public ResultAnalyzer(Collection<String> labelSet, String defaultLabel)
From source file:com.luca.filipponi.tweetAnalysis.SentimentClassifier.CustomTestNaiveBayesDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w w w . j av a2s . c o m*/ addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption( buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); } boolean complementary = hasOption("testComplementary"); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { classifier = new ComplementaryNaiveBayesClassifier(model); } else { classifier = new StandardNaiveBayesClassifier(model); } SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); SequenceFile.Reader reader = new SequenceFile.Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { writer.append(new Text(SLASH.split(key.toString())[1]), new VectorWritable(classifier.classifyFull(vw.get()))); } writer.close(); reader.close(); } else { boolean succeeded = runMapReduce(parsedArgs); if (!succeeded) { return -1; } } //load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); //loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer); return 0; }
From source file:com.memonews.mahout.sentiment.SentimentModelTester.java
License:Apache License
public void run(final PrintWriter output) throws IOException { final File base = new File(inputFile); // contains the best model final OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class); final Dictionary newsGroups = new Dictionary(); final Multiset<String> overallCounts = HashMultiset.create(); final List<File> files = Lists.newArrayList(); for (final File newsgroup : base.listFiles()) { if (newsgroup.isDirectory()) { newsGroups.intern(newsgroup.getName()); files.addAll(Arrays.asList(newsgroup.listFiles())); }/* w ww . ja v a 2 s . c o m*/ } System.out.printf("%d test files\n", files.size()); final ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT"); for (final File file : files) { final String ng = file.getParentFile().getName(); final int actual = newsGroups.intern(ng); final SentimentModelHelper helper = new SentimentModelHelper(); final Vector input = helper.encodeFeatureVector(file, overallCounts);// no // leak // type // ensures // this // is // a // normal // vector final Vector result = classifier.classifyFull(input); final int cat = result.maxValueIndex(); final double score = result.maxValue(); final double ll = classifier.logLikelihood(actual, input); final ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll); ra.addInstance(newsGroups.values().get(actual), cr); } output.printf("%s\n\n", ra.toString()); }
From source file:com.missionsky.scp.dataanalysis.mahout.TestNaiveBayesDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w w w .ja v a 2 s . com*/ addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption( buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); } boolean complementary = hasOption("testComplementary"); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { classifier = new ComplementaryNaiveBayesClassifier(model); } else { classifier = new StandardNaiveBayesClassifier(model); } SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); Reader reader = new Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { writer.append(new Text(SLASH.split(key.toString())[1]), new VectorWritable(classifier.classifyFull(vw.get()))); } writer.close(); reader.close(); } else { boolean succeeded = runMapReduce(parsedArgs); if (!succeeded) { return -1; } } //load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); //loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer); return 0; }
From source file:com.tamingtext.classifier.maxent.TestMaxent.java
License:Apache License
private static void execute(File[] inputFiles, File modelFile) throws IOException, FileNotFoundException { //<start id="maxent.examples.test.setup"/> NameFinderFeatureGenerator nffg //<co id="tmx.feature"/> = new NameFinderFeatureGenerator(); BagOfWordsFeatureGenerator bowfg = new BagOfWordsFeatureGenerator(); InputStream modelStream = //<co id="tmx.modelreader"/> new FileInputStream(modelFile); DoccatModel model = new DoccatModel(modelStream); DocumentCategorizer categorizer //<co id="tmx.categorizer"/> = new DocumentCategorizerME(model, nffg, bowfg); Tokenizer tokenizer = SimpleTokenizer.INSTANCE; int catCount = categorizer.getNumberOfCategories(); Collection<String> categories = new ArrayList<String>(catCount); for (int i = 0; i < catCount; i++) { categories.add(categorizer.getCategory(i)); }/* ww w .j av a 2 s . c o m*/ ResultAnalyzer resultAnalyzer = //<co id="tmx.results"/> new ResultAnalyzer(categories, "unknown"); runTest(inputFiles, categorizer, tokenizer, resultAnalyzer); //<co id="tmx.run"/> /*<calloutlist> <callout arearefs="tmx.feature">Setup Feature Generators</callout> <callout arearefs="tmx.modelreader">Load Model</callout> <callout arearefs="tmx.categorizer">Create Categorizer</callout> <callout arearefs="tmx.results">Prepare Result Analyzer</callout> <callout arearefs="tmx.run">Execute Test</callout> </calloutlist>*/ //<end id="maxent.examples.test.setup"/> }
From source file:com.tamingtext.classifier.mlt.TestMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing category information").withShortName("catf") .create();/*from w w w . ja va 2 s . co m*/ Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing content information").withShortName("contf") .create(); Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt) .withOption(categoryFieldOpt).withOption(maxResultsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } int maxResults = 10; if (cmdLine.hasOption(maxResultsOpt)) { maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String modelPath = (String) cmdLine.getValue(modelOpt); String categoryField = (String) cmdLine.getValue(categoryFieldOpt); String contentField = (String) cmdLine.getValue(contentFieldOpt); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } Directory directory = FSDirectory.open(new File(modelPath)); IndexReader indexReader = IndexReader.open(directory); Analyzer analyzer //<co id="mlt.analyzersetup"/> = new EnglishAnalyzer(Version.LUCENE_36); MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField); categorizer.setAnalyzer(analyzer); categorizer.setMatchMode(mode); categorizer.setFieldNames(new String[] { contentField }); categorizer.setMaxResults(maxResults); categorizer.setNgramSize(gramSize); File f = new File(inputPath); if (!f.isDirectory()) { throw new IllegalArgumentException(f + " is not a directory or does not exit"); } File[] inputFiles = FileUtil.buildFileList(f); String line = null; //<start id="lucene.examples.mlt.test"/> final ClassifierResult UNKNOWN = new ClassifierResult("unknown", 1.0); ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/> new ResultAnalyzer(categorizer.getCategories(), UNKNOWN.getLabel()); for (File ff : inputFiles) { //<co id="co.mlt.read"/> BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8")); while ((line = in.readLine()) != null) { String[] parts = line.split("\t"); if (parts.length != 2) { continue; } CategoryHits[] hits //<co id="co.mlt.cat"/> = categorizer.categorize(new StringReader(parts[1])); ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN; resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/> } in.close(); } System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/> /* <calloutlist> <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout> <callout arearefs="co.mlt.read">Read Test data</callout> <callout arearefs="co.mlt.cat">Categorize</callout> <callout arearefs="co.mlt.an">Collect Results</callout> <callout arearefs="co.mlt.print">Display Results</callout> </calloutlist> */ //<end id="lucene.examples.mlt.test"/> } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.wsc.myexample.decisionForest.MyTestForest.java
License:Apache License
private void sequential() throws IOException { log.info("Loading the forest..."); MyDecisionForest forest = MyDecisionForest.load(modelPath); if (forest == null) { log.error("No Decision Forest found!"); return;/*from w w w . jav a 2 s .c om*/ } // load the dataset Dataset dataset = MyDataset.load(datasetPath); DataConverter converter = new DataConverter(dataset); log.info("Sequential classification..."); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); // List<double[]> resList = new ArrayList<double[]>(); //----------------0711--------------- ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); //----------------0711--------------- if (new File(dataPath).isDirectory()) { //the input is a directory of files testDirectory(outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer); } else { // the input is one single file testFile(dataPath, outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer); } time = System.currentTimeMillis() - time; log.info("Classification Time: {}", DFUtils.elapsedTime(time)); log.info("{}", analyzer); // if (analyze) { // if (dataset.isNumerical(dataset.getLabelId())) { // RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); // double[][] results = new double[resList.size()][2]; // regressionAnalyzer.setInstances(resList.toArray(results)); // log.info("{}", regressionAnalyzer); // } else { // ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); // for (double[] r : resList) { // analyzer.addInstance(dataset.getLabelString(r[0]), // new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); // } // log.info("{}", analyzer); // } // } }
From source file:guipart.view.GUIOverviewController.java
@FXML void handleClassifyRF(ActionEvent event) throws IOException { String outputFile = "data/out"; Path dataPath = new Path(textFieldCSVRF.getText()); // test data path Path datasetPath = new Path(textFieldDatasetRF.getText()); //info file about data set Path modelPath = new Path(textFieldModelRF.getText()); // path where the forest is stored Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem outFS = FileSystem.get(conf); System.out.println("Loading the forest"); DecisionForest forest = DecisionForest.load(conf, modelPath); if (forest == null) System.err.println("No decision forest found!"); // load the dataset Dataset dataset = Dataset.load(conf, datasetPath); DataConverter converter = new DataConverter(dataset); System.out.println("Sequential classification"); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); List<double[]> resList = Lists.newArrayList(); if (fs.getFileStatus(dataPath).isDir()) { //the input is a directory of files Utils.rfTestDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS, guiPart);//w w w .j a v a 2 s . c o m } else { // the input is one single file Utils.rfTestFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs, guiPart); } time = System.currentTimeMillis() - time; //log.info("Classification Time: {}", DFUtils.elapsedTime(time)); System.out.println("Classification time: " + DFUtils.elapsedTime(time)); if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); double[][] results = new double[resList.size()][2]; regressionAnalyzer.setInstances(resList.toArray(results)); //log.info("{}", regressionAnalyzer); System.out.println(regressionAnalyzer.toString()); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] r : resList) { analyzer.addInstance(dataset.getLabelString(r[0]), new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); } //log.info("{}", analyzer); System.out.println(analyzer.toString()); textAnalyze.setText(analyzer.toString()); } }
From source file:imageClassify.TestForest.java
License:Apache License
private void mapreduce() throws ClassNotFoundException, IOException, InterruptedException { if (outputPath == null) { throw new IllegalArgumentException( "You must specify the ouputPath when using the mapreduce implementation"); }//from www. j av a 2 s . c o m Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, getConf()); classifier.run(); if (analyze) { double[][] results = classifier.getResults(); if (results != null) { Dataset dataset = Dataset.load(getConf(), datasetPath); if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); regressionAnalyzer.setInstances(results); log.info("{}", regressionAnalyzer); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] res : results) { analyzer.addInstance(dataset.getLabelString(res[0]), new ClassifierResult(dataset.getLabelString(res[1]), 1.0)); } log.info("{}", analyzer); } } } }
From source file:imageClassify.TestForest.java
License:Apache License
private void sequential() throws IOException { log.info("Loading the forest..."); DecisionForest forest = DecisionForest.load(getConf(), modelPath); if (forest == null) { log.error("No Decision Forest found!"); return;// www . java2 s . c om } // load the dataset Dataset dataset = Dataset.load(getConf(), datasetPath); DataConverter converter = new DataConverter(dataset); log.info("Sequential classification..."); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); List<double[]> resList = Lists.newArrayList(); if (dataFS.getFileStatus(dataPath).isDir()) { //the input is a directory of files testDirectory(outputPath, converter, forest, dataset, resList, rng); } else { // the input is one single file testFile(dataPath, outputPath, converter, forest, dataset, resList, rng); } time = System.currentTimeMillis() - time; log.info("Classification Time: {}", DFUtils.elapsedTime(time)); if (analyze) { if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); double[][] results = new double[resList.size()][2]; regressionAnalyzer.setInstances(resList.toArray(results)); log.info("{}", regressionAnalyzer); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] r : resList) { analyzer.addInstance(dataset.getLabelString(r[0]), new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); } log.info("{}", analyzer); } } }
From source file:javaapplication3.runRandomForest.java
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { String outputFile = "data/lule24"; String inputFile = "data/DataFraud1MTest.csv"; String modelFile = "data/forest.seq"; String infoFile = "data/DataFraud1M.info"; Path dataPath = new Path(inputFile); // test data path Path datasetPath = new Path(infoFile); Path modelPath = new Path(modelFile); // path where the forest is stored Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); /*/*from w w w .j av a2 s. c o m*/ p = Runtime.getRuntime().exec("bash /home/ivan/hadoop-1.2.1/bin/start-all.sh"); p.waitFor();*/ if (outputPath == null) { throw new IllegalArgumentException( "You must specify the ouputPath when using the mapreduce implementation"); } Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, conf); classifier.run(); double[][] results = classifier.getResults(); if (results != null) { Dataset dataset = Dataset.load(conf, datasetPath); Data data = DataLoader.loadData(dataset, fs, dataPath); Instance inst; for (int i = 0; i < data.size(); i++) { inst = data.get(i); //System.out.println("Prediction:"+inst.get(7)+" Real value:"+results[i][1]); System.out.println(inst.get(0) + " " + inst.get(1) + " " + inst.get(2) + " " + inst.get(3) + " " + inst.get(4) + " " + inst.get(5) + " " + inst.get(6) + " " + inst.get(7) + " "); } ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] res : results) { analyzer.addInstance(dataset.getLabelString(res[0]), new ClassifierResult(dataset.getLabelString(res[1]), 1.0)); System.out.println("Prvi shit:" + res[0] + " Drugi Shit" + res[1]); } System.out.println(analyzer.toString()); } }