List of usage examples for org.apache.mahout.classifier.df.data Dataset getLabelString
public String getLabelString(double code)
From source file:com.wsc.myexample.decisionForest.MyTestForest.java
License:Apache License
private void testFile(String inPath, String outPath, DataConverter converter, MyDecisionForest forest, Dataset dataset, /*List<double[]> results,*/ Random rng, ResultAnalyzer analyzer) throws IOException { // create the predictions file DataOutputStream ofile = null; if (outPath != null) { ofile = new DataOutputStream(new FileOutputStream(outPath)); }/*from ww w .j a va 2 s. c om*/ DataInputStream input = new DataInputStream(new FileInputStream(inPath)); try { Scanner scanner = new Scanner(input); while (scanner.hasNextLine()) { String line = scanner.nextLine(); if (line.isEmpty()) { continue; // skip empty lines } Instance instance = converter.convert(line); if (instance == null) continue; double prediction = forest.classify(dataset, rng, instance); if (ofile != null) { ofile.writeChars(Double.toString(prediction)); // write the prediction ofile.writeChar('\n'); } // results.add(new double[] {dataset.getLabel(instance), prediction}); analyzer.addInstance(dataset.getLabelString(dataset.getLabel(instance)), new ClassifierResult(dataset.getLabelString(prediction), 1.0)); } scanner.close(); } finally { Closeables.closeQuietly(input); ofile.close(); } }
From source file:guipart.view.GUIOverviewController.java
@FXML void handleClassifyRF(ActionEvent event) throws IOException { String outputFile = "data/out"; Path dataPath = new Path(textFieldCSVRF.getText()); // test data path Path datasetPath = new Path(textFieldDatasetRF.getText()); //info file about data set Path modelPath = new Path(textFieldModelRF.getText()); // path where the forest is stored Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem outFS = FileSystem.get(conf); System.out.println("Loading the forest"); DecisionForest forest = DecisionForest.load(conf, modelPath); if (forest == null) System.err.println("No decision forest found!"); // load the dataset Dataset dataset = Dataset.load(conf, datasetPath); DataConverter converter = new DataConverter(dataset); System.out.println("Sequential classification"); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); List<double[]> resList = Lists.newArrayList(); if (fs.getFileStatus(dataPath).isDir()) { //the input is a directory of files Utils.rfTestDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS, guiPart);/* www . j a v a2 s. c o m*/ } else { // the input is one single file Utils.rfTestFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs, guiPart); } time = System.currentTimeMillis() - time; //log.info("Classification Time: {}", DFUtils.elapsedTime(time)); System.out.println("Classification time: " + DFUtils.elapsedTime(time)); if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); double[][] results = new double[resList.size()][2]; regressionAnalyzer.setInstances(resList.toArray(results)); //log.info("{}", regressionAnalyzer); System.out.println(regressionAnalyzer.toString()); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] r : resList) { analyzer.addInstance(dataset.getLabelString(r[0]), new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); } //log.info("{}", analyzer); System.out.println(analyzer.toString()); textAnalyze.setText(analyzer.toString()); } }
From source file:imageClassify.TestForest.java
License:Apache License
private void mapreduce() throws ClassNotFoundException, IOException, InterruptedException { if (outputPath == null) { throw new IllegalArgumentException( "You must specify the ouputPath when using the mapreduce implementation"); }/* w w w . ja v a2 s . c o m*/ Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, getConf()); classifier.run(); if (analyze) { double[][] results = classifier.getResults(); if (results != null) { Dataset dataset = Dataset.load(getConf(), datasetPath); if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); regressionAnalyzer.setInstances(results); log.info("{}", regressionAnalyzer); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] res : results) { analyzer.addInstance(dataset.getLabelString(res[0]), new ClassifierResult(dataset.getLabelString(res[1]), 1.0)); } log.info("{}", analyzer); } } } }
From source file:imageClassify.TestForest.java
License:Apache License
private void sequential() throws IOException { log.info("Loading the forest..."); DecisionForest forest = DecisionForest.load(getConf(), modelPath); if (forest == null) { log.error("No Decision Forest found!"); return;/* ww w. ja v a2 s.c om*/ } // load the dataset Dataset dataset = Dataset.load(getConf(), datasetPath); DataConverter converter = new DataConverter(dataset); log.info("Sequential classification..."); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); List<double[]> resList = Lists.newArrayList(); if (dataFS.getFileStatus(dataPath).isDir()) { //the input is a directory of files testDirectory(outputPath, converter, forest, dataset, resList, rng); } else { // the input is one single file testFile(dataPath, outputPath, converter, forest, dataset, resList, rng); } time = System.currentTimeMillis() - time; log.info("Classification Time: {}", DFUtils.elapsedTime(time)); if (analyze) { if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); double[][] results = new double[resList.size()][2]; regressionAnalyzer.setInstances(resList.toArray(results)); log.info("{}", regressionAnalyzer); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] r : resList) { analyzer.addInstance(dataset.getLabelString(r[0]), new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); } log.info("{}", analyzer); } } }
From source file:javaapplication3.runRandomForest.java
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { String outputFile = "data/lule24"; String inputFile = "data/DataFraud1MTest.csv"; String modelFile = "data/forest.seq"; String infoFile = "data/DataFraud1M.info"; Path dataPath = new Path(inputFile); // test data path Path datasetPath = new Path(infoFile); Path modelPath = new Path(modelFile); // path where the forest is stored Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); /*//from ww w . j av a 2 s .c om p = Runtime.getRuntime().exec("bash /home/ivan/hadoop-1.2.1/bin/start-all.sh"); p.waitFor();*/ if (outputPath == null) { throw new IllegalArgumentException( "You must specify the ouputPath when using the mapreduce implementation"); } Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, conf); classifier.run(); double[][] results = classifier.getResults(); if (results != null) { Dataset dataset = Dataset.load(conf, datasetPath); Data data = DataLoader.loadData(dataset, fs, dataPath); Instance inst; for (int i = 0; i < data.size(); i++) { inst = data.get(i); //System.out.println("Prediction:"+inst.get(7)+" Real value:"+results[i][1]); System.out.println(inst.get(0) + " " + inst.get(1) + " " + inst.get(2) + " " + inst.get(3) + " " + inst.get(4) + " " + inst.get(5) + " " + inst.get(6) + " " + inst.get(7) + " "); } ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] res : results) { analyzer.addInstance(dataset.getLabelString(res[0]), new ClassifierResult(dataset.getLabelString(res[1]), 1.0)); System.out.println("Prvi shit:" + res[0] + " Drugi Shit" + res[1]); } System.out.println(analyzer.toString()); } }
From source file:javaapplication3.RunRandomForestSeq.java
public static void main(String[] args) throws IOException { String outputFile = "data/out"; String inputFile = "data/DataFraud1MTest.csv"; String modelFile = "data/forest.seq"; String infoFile = "data/DataFraud1M.info"; Path dataPath = new Path(inputFile); // test data path Path datasetPath = new Path(infoFile); Path modelPath = new Path(modelFile); // path where the forest is stored Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem outFS = FileSystem.get(conf); //log.info("Loading the forest..."); System.out.println("Loading the forest"); DecisionForest forest = DecisionForest.load(conf, modelPath); if (forest == null) System.err.println("No decision forest found!"); //log.error("No Decision Forest found!"); // load the dataset Dataset dataset = Dataset.load(conf, datasetPath); DataConverter converter = new DataConverter(dataset); //log.info("Sequential classification..."); System.out.println("Sequential classification"); long time = System.currentTimeMillis(); Random rng = RandomUtils.getRandom(); List<double[]> resList = Lists.newArrayList(); if (fs.getFileStatus(dataPath).isDir()) { //the input is a directory of files testDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS); } else {//from ww w.ja va2s. co m // the input is one single file testFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs); } time = System.currentTimeMillis() - time; //log.info("Classification Time: {}", DFUtils.elapsedTime(time)); System.out.println("Classification time: " + DFUtils.elapsedTime(time)); if (dataset.isNumerical(dataset.getLabelId())) { RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); double[][] results = new double[resList.size()][2]; regressionAnalyzer.setInstances(resList.toArray(results)); //log.info("{}", regressionAnalyzer); System.out.println(regressionAnalyzer.toString()); } else { ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); for (double[] r : resList) { analyzer.addInstance(dataset.getLabelString(r[0]), new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); } //log.info("{}", analyzer); System.out.println(analyzer.toString()); } }