List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:contrail.correct.InvokeFlash.java
License:Apache License
public RunningJob runJob() throws Exception { JobConf conf = new JobConf(InvokeFlash.class); conf.setJobName("Flash invocation"); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); String flashPath = (String) stage_options.get("flash_binary"); if (flashPath.length() == 0) { throw new Exception("Flash binary location required"); }// w w w.ja va 2 s . c om DistributedCache.addCacheFile(new Path(flashPath).toUri(), conf); //Sets the parameters in JobConf initializeJobConfiguration(conf); AvroJob.setMapperClass(conf, RunFlashMapper.class); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); //Input MatePair read = new MatePair(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new fastqrecord().getSchema()); //Map Only Job conf.setNumReduceTasks(0); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; }
From source file:contrail.correct.InvokeQuakeForMatePairs.java
License:Apache License
public RunningJob runJob() throws Exception { JobConf conf = new JobConf(InvokeQuakeForMatePairs.class); conf.setJobName("Quake Paired invocation"); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); String quakePath = (String) stage_options.get("quake_binary"); String bithashPath = (String) stage_options.get("bithashpath"); //User wants to run quake if (quakePath.length() != 0) { DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf); } else {/*from w w w .j a v a2 s . c om*/ throw new Exception("Please specify Quake binary path"); } if (bithashPath.length() != 0) { DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf); } else { throw new Exception("Please specify bithash path"); } //Sets the parameters in JobConf initializeJobConfiguration(conf); AvroJob.setMapperClass(conf, RunQuakeMapper.class); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); ///Input MatePair read = new MatePair(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new fastqrecord().getSchema()); //Map Only Job conf.setNumReduceTasks(0); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; }
From source file:contrail.correct.InvokeQuakeForSingles.java
License:Apache License
public RunningJob runJob() throws Exception { JobConf conf = new JobConf(InvokeQuakeForSingles.class); conf.setJobName("Quake Singles invocation"); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); String quakePath = (String) stage_options.get("quake_binary"); String bithashPath = (String) stage_options.get("bithashpath"); //User wants to run quake if (quakePath.length() != 0) { DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf); } else {/*from w w w . j a va 2s.co m*/ throw new Exception("Please specify Quake path"); } if (bithashPath.length() != 0) { DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf); } else { throw new Exception("Please specify bithash path"); } //Sets the parameters in JobConf initializeJobConfiguration(conf); AvroJob.setMapperClass(conf, RunQuakeMapper.class); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); ///Input fastqrecord read = new fastqrecord(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new fastqrecord().getSchema()); //Map Only Job conf.setNumReduceTasks(0); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; }
From source file:contrail.stages.GraphStats.java
License:Open Source License
@Override public RunningJob runJob() throws Exception { String[] required_args = { "inputpath", "outputpath" }; checkHasParametersOrDie(required_args); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); Configuration base_conf = getConf(); JobConf conf = null; if (base_conf != null) { conf = new JobConf(getConf(), PairMergeAvro.class); } else {/*ww w .java 2 s . com*/ conf = new JobConf(PairMergeAvro.class); } conf.setJobName("GraphStats " + inputPath); initializeJobConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); Pair<Integer, GraphStatsData> mapOutput = new Pair<Integer, GraphStatsData>(0, new GraphStatsData()); GraphNodeData nodeData = new GraphNodeData(); AvroJob.setInputSchema(conf, nodeData.getSchema()); AvroJob.setMapOutputSchema(conf, mapOutput.getSchema()); AvroJob.setOutputSchema(conf, mapOutput.value().getSchema()); AvroJob.setMapperClass(conf, GraphStatsMapper.class); AvroJob.setCombinerClass(conf, GraphStatsCombiner.class); AvroJob.setReducerClass(conf, GraphStatsReducer.class); // Use a single reducer task that we accumulate all the stats in one // reducer. conf.setNumReduceTasks(1); if (stage_options.containsKey("writeconfig")) { writeJobConfig(conf); } else { // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { // TODO(jlewi): We should only delete an existing directory // if explicitly told to do so. sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists."); FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob job = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) ((endtime - starttime) / 1000.0); System.out.println("Runtime: " + diff + " s"); // Create iterators to read the output Iterator<GraphStatsData> binsIterator = createOutputIterator(); // Compute the N50 stats for each bin. ArrayList<GraphN50StatsData> N50Stats = computeN50Stats(binsIterator); // Write the N50 stats to a file. writeN50StatsToFile(N50Stats); Integer topn_contigs = (Integer) stage_options.get("topn_contigs"); if (topn_contigs > 0) { // Get the lengths of the n contigs. binsIterator = createOutputIterator(); List<Integer> topN = topNContigs(binsIterator, topn_contigs); writeTopNContigs(topN); } return job; } return null; }
From source file:contrail.stages.GraphToFasta.java
License:Open Source License
@Override public RunningJob runJob() throws Exception { String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); sLogger.info(" - inputpath: " + inputPath); sLogger.info(" - outputpath: " + outputPath); JobConf conf = new JobConf(GraphToFasta.class); AvroJob.setInputSchema(conf, GraphNodeData.SCHEMA$); initializeJobConfiguration(conf);//from w ww . j a v a 2 s . com FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); AvroInputFormat<GraphNodeData> input_format = new AvroInputFormat<GraphNodeData>(); conf.setInputFormat(input_format.getClass()); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); // Make it mapper only. conf.setNumReduceTasks(0); conf.setMapperClass(GraphToFastqMapper.class); if (stage_options.containsKey("writeconfig")) { writeJobConfig(conf); } else { // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { // TODO(jlewi): We should only delete an existing directory // if explicitly told to do so. sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists."); FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) ((endtime - starttime) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; } return null; }
From source file:Corrector.Config.java
License:Apache License
public static void initializeConfiguration(JobConf conf) { validateConfiguration();// w w w . java2s . co m conf.setNumMapTasks(HADOOP_MAPPERS); conf.setNumReduceTasks(HADOOP_REDUCERS); conf.set("mapred.child.java.opts", HADOOP_JAVAOPTS); conf.set("mapred.task.timeout", Long.toString(HADOOP_TIMEOUT)); conf.setLong("LOCALNODES", HADOOP_LOCALNODES); conf.setLong("RANDOM_PASS", RANDOM_PASS); conf.setLong("UP_KMER", UP_KMER); conf.setLong("LOW_KMER", LOW_KMER); conf.setLong("K", K); conf.setLong("READLENGTH", READLEN); }
From source file:Corrector.Graph2Fasta.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Fasta [7/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Fasta.class); conf.setJobName("Graph2Fasta " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2FastaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.Graph2Sfa.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Sfa [0/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Sfa.class); conf.setJobName("Graph2Sfa " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2SfaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.PreProcessReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: PreProcessReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(PreProcessReads.class); conf.setJobName("PreProcessReads " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(PreProcessReadsMapper.class); //conf.setReducerClass(PreProcessReadsReducer.class); conf.setNumReduceTasks(0); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.printf("Usage: %s [generic options] <input> <output> <schema-file>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w . j av a2 s. co m*/ String input = args[0]; String output = args[1]; String schemaFile = args[2]; JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Avro projection"); FileInputFormat.addInputPath(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); Schema schema = new Schema.Parser().parse(new File(schemaFile)); AvroJob.setInputSchema(conf, schema); AvroJob.setMapOutputSchema(conf, schema); AvroJob.setOutputSchema(conf, schema); conf.setNumReduceTasks(0); JobClient.runJob(conf); return 0; }