Example usage for org.apache.mahout.classifier.df.data DataConverter DataConverter

List of usage examples for org.apache.mahout.classifier.df.data DataConverter DataConverter

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.df.data DataConverter DataConverter.

Prototype

public DataConverter(Dataset dataset) 

Source Link

Usage

From source file:com.wsc.myexample.decisionForest.MyDataLoader.java

License:Apache License

/**
 * Loads the data from a file//www  . j  av  a2 s . c  o  m
 * 
 * @param fs
 *          file system
 * @param fpath
 *          data file path
 * @throws IOException
 *           if any problem is encountered
 */

public static Data loadData(Dataset dataset, String fpath) throws IOException {
    Scanner scanner = new Scanner(new File(fpath));

    List<Instance> instances = Lists.newArrayList();

    DataConverter converter = new DataConverter(dataset);

    while (scanner.hasNextLine()) {
        String line = scanner.nextLine();
        if (line.isEmpty()) {
            log.warn("{}: empty string", instances.size());
            continue;
        }

        Instance instance = converter.convert(line);
        if (instance == null) {
            // missing values found
            log.warn("{}: missing values", instances.size());
            continue;
        }

        instances.add(instance);
    }

    scanner.close();

    return new Data(dataset, instances);
}

From source file:com.wsc.myexample.decisionForest.MyDataLoader.java

License:Apache License

/**
 * Loads the data from a String array//from  ww w.  ja  v  a 2 s.  c  o m
 */
public static Data loadData(Dataset dataset, String[] data) {
    List<Instance> instances = Lists.newArrayList();

    DataConverter converter = new DataConverter(dataset);

    for (String line : data) {
        if (line.isEmpty()) {
            log.warn("{}: empty string", instances.size());
            continue;
        }

        Instance instance = converter.convert(line);
        if (instance == null) {
            // missing values found
            log.warn("{}: missing values", instances.size());
            continue;
        }

        instances.add(instance);
    }

    return new Data(dataset, instances);
}

From source file:com.wsc.myexample.decisionForest.MyTestForest.java

License:Apache License

private void sequential() throws IOException {

    log.info("Loading the forest...");
    MyDecisionForest forest = MyDecisionForest.load(modelPath);

    if (forest == null) {
        log.error("No Decision Forest found!");
        return;/*w w w  . j  av a 2 s.co  m*/
    }

    // load the dataset
    Dataset dataset = MyDataset.load(datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    //    List<double[]> resList = new ArrayList<double[]>();

    //----------------0711---------------
    ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
    //----------------0711---------------

    if (new File(dataPath).isDirectory()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer);
    } else {
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, /*resList,*/ rng, analyzer);
    }

    time = System.currentTimeMillis() - time;
    log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    log.info("{}", analyzer);

    //    if (analyze) {
    //      if (dataset.isNumerical(dataset.getLabelId())) {
    //        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
    //        double[][] results = new double[resList.size()][2];
    //        regressionAnalyzer.setInstances(resList.toArray(results));
    //        log.info("{}", regressionAnalyzer);
    //      } else {
    //        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
    //        for (double[] r : resList) {
    //          analyzer.addInstance(dataset.getLabelString(r[0]),
    //            new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
    //        }
    //        log.info("{}", analyzer);
    //      }
    //    }
}

From source file:guipart.view.GUIOverviewController.java

@FXML
void handleClassifyRF(ActionEvent event) throws IOException {

    String outputFile = "data/out";

    Path dataPath = new Path(textFieldCSVRF.getText()); // test data path
    Path datasetPath = new Path(textFieldDatasetRF.getText()); //info file about data set
    Path modelPath = new Path(textFieldModelRF.getText()); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    FileSystem outFS = FileSystem.get(conf);

    System.out.println("Loading the forest");
    DecisionForest forest = DecisionForest.load(conf, modelPath);

    if (forest == null)
        System.err.println("No decision forest found!");

    // load the dataset
    Dataset dataset = Dataset.load(conf, datasetPath);
    DataConverter converter = new DataConverter(dataset);

    System.out.println("Sequential classification");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (fs.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        Utils.rfTestDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS,
                guiPart);/*from   w  w w . ja v a 2 s . c o m*/
    } else {
        // the input is one single file
        Utils.rfTestFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs, guiPart);
    }

    time = System.currentTimeMillis() - time;
    //log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    System.out.println("Classification time: " + DFUtils.elapsedTime(time));

    if (dataset.isNumerical(dataset.getLabelId())) {

        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
        double[][] results = new double[resList.size()][2];
        regressionAnalyzer.setInstances(resList.toArray(results));
        //log.info("{}", regressionAnalyzer);
        System.out.println(regressionAnalyzer.toString());

    } else {
        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
        for (double[] r : resList) {
            analyzer.addInstance(dataset.getLabelString(r[0]),
                    new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
        }
        //log.info("{}", analyzer);
        System.out.println(analyzer.toString());
        textAnalyze.setText(analyzer.toString());
    }

}

From source file:imageClassify.TestForest.java

License:Apache License

private void sequential() throws IOException {

    log.info("Loading the forest...");
    DecisionForest forest = DecisionForest.load(getConf(), modelPath);

    if (forest == null) {
        log.error("No Decision Forest found!");
        return;/*from www  . j a  v a  2 s .  co  m*/
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (dataFS.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, resList, rng);
    } else {
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, resList, rng);
    }

    time = System.currentTimeMillis() - time;
    log.info("Classification Time: {}", DFUtils.elapsedTime(time));

    if (analyze) {
        if (dataset.isNumerical(dataset.getLabelId())) {
            RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
            double[][] results = new double[resList.size()][2];
            regressionAnalyzer.setInstances(resList.toArray(results));
            log.info("{}", regressionAnalyzer);
        } else {
            ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
            for (double[] r : resList) {
                analyzer.addInstance(dataset.getLabelString(r[0]),
                        new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
            }
            log.info("{}", analyzer);
        }
    }
}

From source file:javaapplication3.RunRandomForestSeq.java

public static void main(String[] args) throws IOException {

    String outputFile = "data/out";
    String inputFile = "data/DataFraud1MTest.csv";
    String modelFile = "data/forest.seq";
    String infoFile = "data/DataFraud1M.info";

    Path dataPath = new Path(inputFile); // test data path
    Path datasetPath = new Path(infoFile);
    Path modelPath = new Path(modelFile); // path where the forest is stored
    Path outputPath = new Path(outputFile); // path to predictions file, if null do not output the predictions

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    FileSystem outFS = FileSystem.get(conf);

    //log.info("Loading the forest...");
    System.out.println("Loading the forest");
    DecisionForest forest = DecisionForest.load(conf, modelPath);

    if (forest == null)
        System.err.println("No decision forest found!");
    //log.error("No Decision Forest found!");

    // load the dataset
    Dataset dataset = Dataset.load(conf, datasetPath);
    DataConverter converter = new DataConverter(dataset);

    //log.info("Sequential classification...");
    System.out.println("Sequential classification");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();

    List<double[]> resList = Lists.newArrayList();
    if (fs.getFileStatus(dataPath).isDir()) {
        //the input is a directory of files
        testDirectory(outputPath, converter, forest, dataset, resList, rng, fs, dataPath, outFS);
    } else {/*w  w  w  .  j a v  a2  s . c  o  m*/
        // the input is one single file
        testFile(dataPath, outputPath, converter, forest, dataset, resList, rng, outFS, fs);
    }

    time = System.currentTimeMillis() - time;
    //log.info("Classification Time: {}", DFUtils.elapsedTime(time));
    System.out.println("Classification time: " + DFUtils.elapsedTime(time));

    if (dataset.isNumerical(dataset.getLabelId())) {

        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
        double[][] results = new double[resList.size()][2];
        regressionAnalyzer.setInstances(resList.toArray(results));
        //log.info("{}", regressionAnalyzer);
        System.out.println(regressionAnalyzer.toString());

    } else {
        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
        for (double[] r : resList) {
            analyzer.addInstance(dataset.getLabelString(r[0]),
                    new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
        }
        //log.info("{}", analyzer);
        System.out.println(analyzer.toString());
    }

}