List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:avro.mr.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*from ww w. ja v a 2s. c o m*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:avro.mr.MapReduceColorCountSeqFile.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/* w w w .j a v a 2s. c om*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCountSeqFile.class); job.setJobName("Color Count 2"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroSequenceFileInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG)); AvroJob.setInputValueSchema(job, User.getClassSchema()); job.setMapperClass(ColorCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroSequenceFileOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:basic.PartitionGraph.java
License:Apache License
/** * Runs this tool.//from ww w . j ava 2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:bb.BranchAndBound.java
License:Apache License
static Job getJob(String input, String output, String dataDir, int iteration) throws Exception { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); FileStatus[] fileStatus = hdfs.listStatus(new Path(input)); for (int i = 0; i < fileStatus.length; ++i) { if (fileStatus[i].getLen() == 0) { hdfs.delete(fileStatus[i].getPath()); }// w ww . ja va2 s . c o m } DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf); Job ret = new Job(conf, dataDir + "_iteration_" + iteration); ret.setJarByClass(BranchAndBound.class); ret.setMapperClass(BBMapper1.class); ret.setReducerClass(BBReducer.class); //ret.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(ret, new Path(input)); //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864); FileOutputFormat.setOutputPath(ret, new Path(output)); ret.setOutputKeyClass(NullWritable.class); ret.setOutputValueClass(Text.class); return ret; }
From source file:bdss.cmu.edu.Sort.java
License:Apache License
/** * The main driver for sort program.//from w w w. ja v a 2s . c o m * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = new Job(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);//from w w w . j a v a2 s . co m Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String pipeline = ""; if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { HalvadeConf.setIsPass2(halvadeConf, true); HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = RNA_PASS2;// ww w . ja v a 2 s .c o m } else if (jobType == HalvadeResourceManager.DNA) { HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = DNA; } HalvadeConf.setOutDir(halvadeConf, tmpOutDir); FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf); if (outFs.exists(new Path(tmpOutDir))) { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } if (halvadeOpts.useBamInput) setHeaderFile(halvadeOpts.in, halvadeConf); Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline); halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob); FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir)); if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class); } else if (jobType == HalvadeResourceManager.DNA) { halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class); } halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class); halvadeJob.setMapOutputValueClass(SAMRecordWritable.class); halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class); halvadeJob.setOutputKeyClass(Text.class); if (halvadeOpts.mergeBam) { halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class); halvadeJob.setOutputValueClass(SAMRecordWritable.class); } else { halvadeJob.setPartitionerClass(ChrRgPartitioner.class); halvadeJob.setSortComparatorClass(ChrRgSortComparator.class); halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class); halvadeJob.setOutputValueClass(VariantContextWritable.class); } if (halvadeOpts.justAlign) halvadeJob.setNumReduceTasks(0); else if (halvadeOpts.mergeBam) { halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class); halvadeJob.setNumReduceTasks(1); } else halvadeJob.setNumReduceTasks(halvadeOpts.reduces); if (halvadeOpts.useBamInput) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class); halvadeJob.setInputFormatClass(BAMInputFormat.class); } return runTimedJob(halvadeJob, "Halvade Job"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if (!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2);//from w w w .j av a 2 s . c o m } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }
From source file:be_uclouvain_ingi2145_lab05.GiraphJobRunner.java
@Override public int run(String[] strings) throws Exception { GiraphConfiguration gconf = new GiraphConfiguration(conf); //gconf.setVertexClass(SimpleShortestPathsComputation.class); /*gconf.setVertexInputFormatClass( SimpleShortestPathsVertexInputFormat.class); gconf.setVertexOutputFormatClass(/*w w w .j a va2 s . co m*/ SimpleShortestPathsVertexOutputFormat.class); */ CommandLine cmd = ConfigurationUtils.parseArgs(gconf, strings); if (null == cmd) { return 0; } //GiraphYarnClient job = new GiraphYarnClient(gconf,gconf.getClass().getName()); GiraphJob job = new GiraphJob(gconf, getClass().getName()); job.getInternalJob().setJarByClass(getClass()); if (cmd.hasOption("vof") || cmd.hasOption("eof")) { if (cmd.hasOption("op")) { Path outputPath = new Path(cmd.getOptionValue("op")); FileSystem fs = FileSystem.get(outputPath.toUri(), conf); /*Check if output path (args[1])exist or not*/ if (fs.exists(outputPath)) { /*If exist delete the output path*/ fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job.getInternalJob(), outputPath); } } /* if (cmd.hasOption("vif") || cmd.hasOption("eif")) { if (cmd.hasOption("vip")) { FileInputFormat.addInputPath(job.getInternalJob(), new Path(cmd.getOptionValue("op"))); } }*/ //If there is a custom option specified if (cmd.hasOption("ca")) { String[] args = cmd.getOptionValues("ca"); LOG.fatal("" + Arrays.toString(args)); gconf.set("ca", args[0].split("=")[1]); LOG.fatal("" + gconf.get("ca")); gconf.setWorkerConfiguration(Integer.parseInt(cmd.getOptionValue("w")), Integer.parseInt(cmd.getOptionValue("w")), 100.0f); } /* if (cmd.hasOption("cf")) { DistributedCache.addCacheFile(new URI(cmd.getOptionValue("cf")), job.getConfiguration()); } */ return job.run(true) ? 0 : -1; }
From source file:biglayer.AutoCoder.java
License:Apache License
/** * Runs this tool./*from w ww .j a va2 s.c om*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/MNIST-mingled-key/part*"; String outputPath = "shangfu/layeroutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); job0.setNumReduceTasks(reduceTasks); job0.getConfiguration().setInt("layer_ind", 0); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(ModelNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(ModelNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "_0"); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); long codeStart = System.currentTimeMillis(); double codeTimeSum = 0; job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) { Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); job1.setNumReduceTasks(reduceTasks); job1.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1))); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train")); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + (iterations - 1)); LOG.info(" - output path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - number of reducers: " + reduceTasks); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(ModelNode.class); job1.setOutputKeyClass(PairOfInts.class); job1.setOutputValueClass(ModelNode.class); job1.setMapperClass(MyMapper.class); job1.setReducerClass(MyReducer_Train.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations + "_train"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Job job2 = Job.getInstance(conf); job2.setJobName(AutoCoder.class.getSimpleName()); job2.setJarByClass(AutoCoder.class); job2.setNumReduceTasks(reduceTasks); job2.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train"))); FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations)); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - output path: " + outputPath + "_" + iterations); LOG.info(" - number of reducers: " + reduceTasks); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(SequenceFileOutputFormat.class); job2.setMapOutputKeyClass(PairOfInts.class); job2.setMapOutputValueClass(ModelNode.class); job2.setOutputKeyClass(PairOfInts.class); job2.setOutputValueClass(ModelNode.class); job2.setMapperClass(MyMapper.class); job2.setReducerClass(MyReducer_GenData.class); job2.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job2.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; } LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER); LOG.info(" - output path: " + outputPath); reduceTasks = 1; LOG.info(" - number of reducers: " + reduceTasks); Job job_super = Job.getInstance(conf); job_super.setJobName(AutoCoder.class.getSimpleName()); job_super.setJarByClass(AutoCoder.class); job_super.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER)); FileOutputFormat.setOutputPath(job_super, new Path(outputPath)); job_super.setInputFormatClass(SequenceFileInputFormat.class); job_super.setOutputFormatClass(SequenceFileOutputFormat.class); job_super.setMapOutputKeyClass(PairOfInts.class); job_super.setMapOutputValueClass(ModelNode.class); job_super.setOutputKeyClass(NullWritable.class); job_super.setOutputValueClass(NullWritable.class); job_super.setMapperClass(MyMapper_Super.class); job_super.setReducerClass(MyReducer_Super.class); job_super.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job_super.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + codeTimeSum + " seconds."); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }