Example usage for org.apache.mahout.classifier.df.data Dataset labels

List of usage examples for org.apache.mahout.classifier.df.data Dataset labels

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.df.data Dataset labels.

Prototype

public String[] labels() 

Source Link

Usage

From source file:com.wsc.myexample.decisionForest.MyTestForest.java

License:Apache License

private void sequential() throws IOException {

    log.info("Loading the forest...");
    MyDecisionForest forest = MyDecisionForest.load(modelPath);

    if (forest == null) {
        log.error("No Decision Forest found!");
        return;/*from ww  w . j  a v  a 2  s  .co  m*/
    }

    // load the dataset
    Dataset dataset = MyDataset.load(datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    //    List<double[]> resList = new ArrayList<double[]>();

    //----------------0711---------------
    ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
    //----------------0711---------------

    if (new File(dataPath).isDirectory()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer);
    } else {
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer);
    }

    time = System.currentTimeMillis() - time;
    log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    log.info("{}", analyzer);

    //    if (analyze) {
    //      if (dataset.isNumerical(dataset.getLabelId())) {
    //        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
    //        double[][] results = new double[resList.size()][2];
    //        regressionAnalyzer.setInstances(resList.toArray(results));
    //        log.info("{}", regressionAnalyzer);
    //      } else {
    //        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
    //        for (double[] r : resList) {
    //          analyzer.addInstance(dataset.getLabelString(r[0]),
    //            new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
    //        }
    //        log.info("{}", analyzer);
    //      }
    //    }
}

From source file:guipart.view.GUIOverviewController.java

@FXML
void handleClassifyRF(ActionEvent event) throws IOException {

    String outputFile = "data/out";

    Path dataPath = new Path(textFieldCSVRF.getText()); // test data path
    Path datasetPath = new Path(textFieldDatasetRF.getText()); //info file about data set
    Path modelPath = new Path(textFieldModelRF.getText()); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    FileSystem outFS = FileSystem.get(conf);

    System.out.println("Loading the forest");
    DecisionForest forest = DecisionForest.load(conf, modelPath);

    if (forest == null)
        System.err.println("No decision forest found!");

    // load the dataset
    Dataset dataset = Dataset.load(conf, datasetPath);
    DataConverter converter = new DataConverter(dataset);

    System.out.println("Sequential classification");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (fs.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        Utils.rfTestDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS,
                guiPart);/*ww w.j  ava2s .  co  m*/
    } else {
        // the input is one single file
        Utils.rfTestFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs, guiPart);
    }

    time = System.currentTimeMillis() - time;
    //log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    System.out.println("Classification time: " + DFUtils.elapsedTime(time));

    if (dataset.isNumerical(dataset.getLabelId())) {

        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
        double[][] results = new double[resList.size()][2];
        regressionAnalyzer.setInstances(resList.toArray(results));
        //log.info("{}", regressionAnalyzer);
        System.out.println(regressionAnalyzer.toString());

    } else {
        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
        for (double[] r : resList) {
            analyzer.addInstance(dataset.getLabelString(r[0]),
                    new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
        }
        //log.info("{}", analyzer);
        System.out.println(analyzer.toString());
        textAnalyze.setText(analyzer.toString());
    }

}

From source file:imageClassify.TestForest.java

License:Apache License

private void mapreduce() throws ClassNotFoundException, IOException, InterruptedException {
    if (outputPath == null) {
        throw new IllegalArgumentException(
                "You must specify the ouputPath when using the mapreduce implementation");
    }//from   ww w  .j  a  v a2s  .  co m

    Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, getConf());

    classifier.run();

    if (analyze) {
        double[][] results = classifier.getResults();
        if (results != null) {
            Dataset dataset = Dataset.load(getConf(), datasetPath);
            if (dataset.isNumerical(dataset.getLabelId())) {
                RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
                regressionAnalyzer.setInstances(results);
                log.info("{}", regressionAnalyzer);
            } else {
                ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
                for (double[] res : results) {
                    analyzer.addInstance(dataset.getLabelString(res[0]),
                            new ClassifierResult(dataset.getLabelString(res[1]), 1.0));
                }
                log.info("{}", analyzer);
            }
        }
    }
}

From source file:imageClassify.TestForest.java

License:Apache License

private void sequential() throws IOException {

    log.info("Loading the forest...");
    DecisionForest forest = DecisionForest.load(getConf(), modelPath);

    if (forest == null) {
        log.error("No Decision Forest found!");
        return;//from  ww w .  ja v  a  2 s .c o m
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (dataFS.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, resList, rng);
    } else {
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, resList, rng);
    }

    time = System.currentTimeMillis() - time;
    log.info("Classification Time: {}", DFUtils.elapsedTime(time));

    if (analyze) {
        if (dataset.isNumerical(dataset.getLabelId())) {
            RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
            double[][] results = new double[resList.size()][2];
            regressionAnalyzer.setInstances(resList.toArray(results));
            log.info("{}", regressionAnalyzer);
        } else {
            ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
            for (double[] r : resList) {
                analyzer.addInstance(dataset.getLabelString(r[0]),
                        new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
            }
            log.info("{}", analyzer);
        }
    }
}

From source file:javaapplication3.runRandomForest.java

public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

    String outputFile = "data/lule24";
    String inputFile = "data/DataFraud1MTest.csv";
    String modelFile = "data/forest.seq";
    String infoFile = "data/DataFraud1M.info";

    Path dataPath = new Path(inputFile); // test data path
    Path datasetPath = new Path(infoFile);
    Path modelPath = new Path(modelFile); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    /*// w  w w  . ja  v  a2s. c  o  m
    p = Runtime.getRuntime().exec("bash /home/ivan/hadoop-1.2.1/bin/start-all.sh");
    p.waitFor();*/

    if (outputPath == null) {
        throw new IllegalArgumentException(
                "You must specify the ouputPath when using the mapreduce implementation");
    }

    Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, conf);

    classifier.run();

    double[][] results = classifier.getResults();

    if (results != null) {

        Dataset dataset = Dataset.load(conf, datasetPath);
        Data data = DataLoader.loadData(dataset, fs, dataPath);

        Instance inst;

        for (int i = 0; i < data.size(); i++) {
            inst = data.get(i);

            //System.out.println("Prediction:"+inst.get(7)+" Real value:"+results[i][1]);
            System.out.println(inst.get(0) + " " + inst.get(1) + " " + inst.get(2) + " " + inst.get(3) + " "
                    + inst.get(4) + " " + inst.get(5) + " " + inst.get(6) + " " + inst.get(7) + " ");
        }

        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");

        for (double[] res : results) {
            analyzer.addInstance(dataset.getLabelString(res[0]),
                    new ClassifierResult(dataset.getLabelString(res[1]), 1.0));
            System.out.println("Prvi shit:" + res[0] + " Drugi Shit" + res[1]);
        }

        System.out.println(analyzer.toString());

    }

}

From source file:javaapplication3.RunRandomForestSeq.java

public static void main(String[] args) throws IOException {

    String outputFile = "data/out";
    String inputFile = "data/DataFraud1MTest.csv";
    String modelFile = "data/forest.seq";
    String infoFile = "data/DataFraud1M.info";

    Path dataPath = new Path(inputFile); // test data path
    Path datasetPath = new Path(infoFile);
    Path modelPath = new Path(modelFile); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    FileSystem outFS = FileSystem.get(conf);

    //log.info("Loading the forest...");
    System.out.println("Loading the forest");
    DecisionForest forest = DecisionForest.load(conf, modelPath);

    if (forest == null)
        System.err.println("No decision forest found!");
    //log.error("No Decision Forest found!");

    // load the dataset
    Dataset dataset = Dataset.load(conf, datasetPath);
    DataConverter converter = new DataConverter(dataset);

    //log.info("Sequential classification...");
    System.out.println("Sequential classification");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (fs.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS);
    } else {//from  w  w w  .j  av  a 2 s .co m
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs);
    }

    time = System.currentTimeMillis() - time;
    //log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    System.out.println("Classification time: " + DFUtils.elapsedTime(time));

    if (dataset.isNumerical(dataset.getLabelId())) {

        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
        double[][] results = new double[resList.size()][2];
        regressionAnalyzer.setInstances(resList.toArray(results));
        //log.info("{}", regressionAnalyzer);
        System.out.println(regressionAnalyzer.toString());

    } else {
        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
        for (double[] r : resList) {
            analyzer.addInstance(dataset.getLabelString(r[0]),
                    new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
        }
        //log.info("{}", analyzer);
        System.out.println(analyzer.toString());
    }

}