Example usage for org.apache.mahout.classifier.df.mapreduce.inmem InMemBuilder InMemBuilder

List of usage examples for org.apache.mahout.classifier.df.mapreduce.inmem InMemBuilder InMemBuilder

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.df.mapreduce.inmem InMemBuilder InMemBuilder.

Prototype

public InMemBuilder(TreeBuilder treeBuilder, Path dataPath, Path datasetPath, Long seed, Configuration conf) 

Source Link

Usage

From source file:bigimp.BuildForest.java

License:Apache License

private void buildForest() throws IOException, ClassNotFoundException, InterruptedException {
    // make sure the output path does not exist
    FileSystem ofs = outputPath.getFileSystem(getConf());
    if (ofs.exists(outputPath)) {
        log.error("Output path already exists");
        return;/*from  w  w w.j a v  a 2 s . c  o  m*/
    }

    DecisionTreeBuilder treeBuilder = new DecisionTreeBuilder();
    if (m != null) {
        treeBuilder.setM(m);
    }
    treeBuilder.setComplemented(complemented);
    if (minSplitNum != null) {
        treeBuilder.setMinSplitNum(minSplitNum);
    }
    if (minVarianceProportion != null) {
        treeBuilder.setMinVarianceProportion(minVarianceProportion);
    }

    Builder forestBuilder;

    if (isPartial) {
        log.info("Partial Mapred implementation");
        forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
        log.info("InMem Mapred implementation");
        forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }

    forestBuilder.setOutputDirName(outputPath.getName());

    log.info("Building the forest...");
    long time = System.currentTimeMillis();

    DecisionForest forest = forestBuilder.build(nbTrees);

    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
    log.info("Forest num Nodes: {}", forest.nbNodes());
    log.info("Forest mean num Nodes: {}", forest.meanNbNodes());
    log.info("Forest mean max Depth: {}", forest.meanMaxDepth());

    // store the decision forest in the output path
    Path forestPath = new Path(outputPath, "forest.seq");
    log.info("Storing the forest in: {}", forestPath);
    DFUtils.storeWritable(getConf(), forestPath, forest);
}