Example usage for org.apache.mahout.classifier.df.data DataLoader loadData

List of usage examples for org.apache.mahout.classifier.df.data DataLoader loadData

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.df.data DataLoader loadData.

Prototype

public static Data loadData(Dataset dataset, FileSystem fs, Path[] pathes) throws IOException 

Source Link

Document

Loads the data from multiple paths specified by pathes

Usage

From source file:bigimp.BuildForest.java

License:Apache License

protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
    log.info("Loading the data...");
    FileSystem fs = dataPath.getFileSystem(conf);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
    log.info("Data Loaded");

    return data;/*from   w  w  w.j a  va2  s .com*/
}

From source file:javaapplication3.runRandomForest.java

public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

    String outputFile = "data/lule24";
    String inputFile = "data/DataFraud1MTest.csv";
    String modelFile = "data/forest.seq";
    String infoFile = "data/DataFraud1M.info";

    Path dataPath = new Path(inputFile); // test data path
    Path datasetPath = new Path(infoFile);
    Path modelPath = new Path(modelFile); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    /*/*from w w w  .j  a v a 2  s.  c om*/
    p = Runtime.getRuntime().exec("bash /home/ivan/hadoop-1.2.1/bin/start-all.sh");
    p.waitFor();*/

    if (outputPath == null) {
        throw new IllegalArgumentException(
                "You must specify the ouputPath when using the mapreduce implementation");
    }

    Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, conf);

    classifier.run();

    double[][] results = classifier.getResults();

    if (results != null) {

        Dataset dataset = Dataset.load(conf, datasetPath);
        Data data = DataLoader.loadData(dataset, fs, dataPath);

        Instance inst;

        for (int i = 0; i < data.size(); i++) {
            inst = data.get(i);

            //System.out.println("Prediction:"+inst.get(7)+" Real value:"+results[i][1]);
            System.out.println(inst.get(0) + " " + inst.get(1) + " " + inst.get(2) + " " + inst.get(3) + " "
                    + inst.get(4) + " " + inst.get(5) + " " + inst.get(6) + " " + inst.get(7) + " ");
        }

        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");

        for (double[] res : results) {
            analyzer.addInstance(dataset.getLabelString(res[0]),
                    new ClassifierResult(dataset.getLabelString(res[1]), 1.0));
            System.out.println("Prvi shit:" + res[0] + " Drugi Shit" + res[1]);
        }

        System.out.println(analyzer.toString());

    }

}