List of usage examples for org.apache.mahout.clustering.topdown.postprocessor ClusterOutputPostProcessorDriver run
public static void run(Path input, Path output, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException
From source file:root.benchmark.ReutersBenchmarkJob.java
License:Apache License
/** * {@inheritDoc}/* w w w . j a v a2s .com*/ */ @Override public int run(String[] args) throws Exception { constructParameterList(); if (parseArguments(args) == null) { return -1; } initializeConfigurationParameters(); printJobHeader(); Configuration conf = getConf(); URI workingURI = new URI(conf.get("fs.default.name")); URI inputURI = new URI(inputDirectory); FileSystem workingFS = FileSystem.get(workingURI, conf); FileSystem inputFS = FileSystem.get(inputURI, conf); Path inputDirectoryPath = new Path(inputDirectory); if (!inputFS.exists(inputDirectoryPath)) { throw new Exception("Input directory not found."); } Path workingDirectoryPath = new Path(workingDirectory); if (workingFS.exists(workingDirectoryPath)) { throw new Exception("Working Directory already exists."); } if (!workingFS.mkdirs(workingDirectoryPath)) { throw new Exception("Failed to create Working Directory."); } String[] vectorizationArgs = { "-i", inputDirectory, "-o", workingDirectory + vectorizationOutputDirectory, "-x", exclusionThreshold, "-mdf", minimumDocumentFrequency, }; System.out.println(); ToolRunner.run(conf, new ReutersVectorizationJob(), vectorizationArgs); long starttime, stoptime, deltatime; starttime = System.currentTimeMillis(); String[] canopyArgs = { "-i", workingDirectory + vectorizationOutputDirectory + "/vectorFiles/tf-vectors", "-o", workingDirectory + canopyOutputDirectory, "-dm", distanceMetric, "-t1", threshold1, "-t2", threshold2 }; ToolRunner.run(conf, new CanopyJob(), canopyArgs); String[] kmeansArgs = { "-i", workingDirectory + vectorizationOutputDirectory + "/vectorFiles/tf-vectors", "-o", workingDirectory + kmeansOutputDirectory, "-c", workingDirectory + canopyOutputDirectory + "/clusters-0-final", "-dm", distanceMetric, "-cd", convergenceDelta, "-mIter", numIterations, "-k", "3" }; ToolRunner.run(conf, new KMeansJob(), kmeansArgs); Path input = new Path(workingDirectory + kmeansOutputDirectory); Path output = new Path(workingDirectory + outputDirectory); boolean sequential = false; ClusterOutputPostProcessorDriver.run(input, output, sequential); stoptime = System.currentTimeMillis(); deltatime = stoptime - starttime; conf.setStrings(CONF_PREFIX + "Dataset", "Image"); conf.setStrings(CONF_PREFIX + "Iterations", numIterations); conf.setStrings(CONF_PREFIX + "Levels", convergenceDelta); writeTimestamp(conf, workingDirectory + timeStamp, deltatime); return 0; }