List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:contrail.correct.InvokeQuakeForMatePairs.java
License:Apache License
public RunningJob runJob() throws Exception { JobConf conf = new JobConf(InvokeQuakeForMatePairs.class); conf.setJobName("Quake Paired invocation"); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); String quakePath = (String) stage_options.get("quake_binary"); String bithashPath = (String) stage_options.get("bithashpath"); //User wants to run quake if (quakePath.length() != 0) { DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf); } else {//from ww w. j a va 2 s . c o m throw new Exception("Please specify Quake binary path"); } if (bithashPath.length() != 0) { DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf); } else { throw new Exception("Please specify bithash path"); } //Sets the parameters in JobConf initializeJobConfiguration(conf); AvroJob.setMapperClass(conf, RunQuakeMapper.class); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); ///Input MatePair read = new MatePair(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new fastqrecord().getSchema()); //Map Only Job conf.setNumReduceTasks(0); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; }
From source file:contrail.correct.InvokeQuakeForSingles.java
License:Apache License
public RunningJob runJob() throws Exception { JobConf conf = new JobConf(InvokeQuakeForSingles.class); conf.setJobName("Quake Singles invocation"); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); String quakePath = (String) stage_options.get("quake_binary"); String bithashPath = (String) stage_options.get("bithashpath"); //User wants to run quake if (quakePath.length() != 0) { DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf); } else {// w ww .ja v a2 s .c om throw new Exception("Please specify Quake path"); } if (bithashPath.length() != 0) { DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf); } else { throw new Exception("Please specify bithash path"); } //Sets the parameters in JobConf initializeJobConfiguration(conf); AvroJob.setMapperClass(conf, RunQuakeMapper.class); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); ///Input fastqrecord read = new fastqrecord(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new fastqrecord().getSchema()); //Map Only Job conf.setNumReduceTasks(0); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; }
From source file:contrail.correct.KmerCounter.java
License:Open Source License
public RunningJob runJob() throws Exception { String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); JobConf conf = new JobConf(KmerCounter.class); conf.setJobName("Kmer Counter "); initializeJobConfiguration(conf);//from w w w .j a v a2s. c om FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); fastqrecord read = new fastqrecord(); AvroJob.setInputSchema(conf, read.getSchema()); AvroJob.setOutputSchema(conf, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(conf, KmerCounterMapper.class); AvroJob.setReducerClass(conf, KmerCounterReducer.class); // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob run = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) (((float) (endtime - starttime)) / 1000.0); System.out.println("Runtime: " + diff + " s"); return run; }
From source file:contrail.stages.GraphStats.java
License:Open Source License
@Override public RunningJob runJob() throws Exception { String[] required_args = { "inputpath", "outputpath" }; checkHasParametersOrDie(required_args); String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); Configuration base_conf = getConf(); JobConf conf = null; if (base_conf != null) { conf = new JobConf(getConf(), PairMergeAvro.class); } else {//from w w w .ja v a 2s. c om conf = new JobConf(PairMergeAvro.class); } conf.setJobName("GraphStats " + inputPath); initializeJobConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); Pair<Integer, GraphStatsData> mapOutput = new Pair<Integer, GraphStatsData>(0, new GraphStatsData()); GraphNodeData nodeData = new GraphNodeData(); AvroJob.setInputSchema(conf, nodeData.getSchema()); AvroJob.setMapOutputSchema(conf, mapOutput.getSchema()); AvroJob.setOutputSchema(conf, mapOutput.value().getSchema()); AvroJob.setMapperClass(conf, GraphStatsMapper.class); AvroJob.setCombinerClass(conf, GraphStatsCombiner.class); AvroJob.setReducerClass(conf, GraphStatsReducer.class); // Use a single reducer task that we accumulate all the stats in one // reducer. conf.setNumReduceTasks(1); if (stage_options.containsKey("writeconfig")) { writeJobConfig(conf); } else { // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { // TODO(jlewi): We should only delete an existing directory // if explicitly told to do so. sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists."); FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob job = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) ((endtime - starttime) / 1000.0); System.out.println("Runtime: " + diff + " s"); // Create iterators to read the output Iterator<GraphStatsData> binsIterator = createOutputIterator(); // Compute the N50 stats for each bin. ArrayList<GraphN50StatsData> N50Stats = computeN50Stats(binsIterator); // Write the N50 stats to a file. writeN50StatsToFile(N50Stats); Integer topn_contigs = (Integer) stage_options.get("topn_contigs"); if (topn_contigs > 0) { // Get the lengths of the n contigs. binsIterator = createOutputIterator(); List<Integer> topN = topNContigs(binsIterator, topn_contigs); writeTopNContigs(topN); } return job; } return null; }
From source file:Corrector.Correction.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Correction [0/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Correction.class); conf.setJobName("Correction " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CorrectionMapper.class); conf.setReducerClass(CorrectionReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.FindError.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, int idx, String hkmerlist) throws Exception { sLogger.info("Tool name: FindError"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(FindError.class); conf.setJobName("FindError " + inputPath + " " + Config.K); conf.setLong("IDX", idx); //\\/* w w w . ja v a2 s.c om*/ DistributedCache.addCacheFile(new URI(hkmerlist), conf); //\\ Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FindErrorMapper.class); conf.setReducerClass(FindErrorReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.Graph2Fasta.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Fasta [7/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Fasta.class); conf.setJobName("Graph2Fasta " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2FastaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0);/*from ww w . j a v a2 s.c o m*/ //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.Graph2Sfa.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Sfa [0/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Sfa.class); conf.setJobName("Graph2Sfa " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2SfaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0);//from w ww . j ava 2 s . c o m //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.IdentifyTrustedReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, long kmer_threshold) throws Exception { sLogger.info("Tool name: IdentifyTrustedReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(IdentifyTrustedReads.class); conf.setJobName("IdentifyTrustedReads " + inputPath + " " + Config.K); conf.setLong("KmerThreshold", kmer_threshold); // conf.setLong("AllKmer", allkmer); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(IdentifyTrustedReads.IdentifyTrustedReadsMapper.class); conf.setReducerClass(IdentifyTrustedReads.IdentifyTrustedReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.KmerFrequencyOfReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: KmerFrequencyOfReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(KmerFrequencyOfReads.class); conf.setJobName("KmerFrequencyOfReads " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); //conf.setMapOutputValueClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(KmerFrequencyOfReads.KmerFrequencyOfReadsMapper.class); conf.setReducerClass(KmerFrequencyOfReads.KmerFrequencyOfReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }