List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:babel.prep.merge.PageMerger.java
License:Apache License
/** * Configures a reduce-only page merge job. *//* w ww . ja va2 s . c o m*/ protected JobConf createJobConf(String crawlDir, String pagesSubDirOne, String pagesSubDirTwo) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("merge pages in " + pagesSubDirOne + " and " + pagesSubDirTwo); job.setInputFormat(SequenceFileInputFormat.class); job.setReducerClass(PageMergeReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Page.class); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirOne)); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirTwo)); Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.merge." + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); return job; }
From source file:boa.datagen.SeqSort.java
License:Apache License
/** * The main driver for sort program./*from w ww .j a va 2 s . c o m*/ * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ @Override public int run(String[] args) throws Exception { System.out.println(inPath); JobConf jobConf = new JobConf(getConf(), SeqSort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BytesWritable.class); SequenceFileOutputFormat.setCompressOutput(jobConf, true); SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class); SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK); // Make sure there are exactly 2 parameters left. FileInputFormat.setInputPaths(jobConf, inPath); FileOutputFormat.setOutputPath(jobConf, new Path(outPath)); System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:br.eti.kinoshita.hadoop.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJarByClass(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca.")); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w.j a v a2 s .com*/ }
From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(IndexerMain.class); conf.setJobName("indexer"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IndexerMap.class); conf.setCombinerClass(IndexerReduce.class); conf.setReducerClass(IndexerReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* w w w .j a v a2s . c o m*/ }
From source file:Brush.AdjustMateEdge.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, long reads, long ctg_sum) throws Exception { sLogger.info("Tool name: AdjustMateEdge"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(AdjustMateEdge.class); conf.setJobName("AdjustMateEdge " + inputPath); conf.setLong("READS", reads); conf.setLong("CTG_SUM", ctg_sum); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AdjustMateEdgeMapper.class); conf.setReducerClass(AdjustMateEdgeReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.Compressible.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.CountBraid.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: CountBraid"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(CountBraid.class); conf.setJobName("CountBraid " + inputPath + " " + BrushConfig.K); //conf.setFloat("Error_Rate", ErrorRate); //conf.setFloat("Exp_Cov", Exp_Cov); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CountBraidMapper.class); conf.setReducerClass(CountBraidReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.CountKmer.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: CountKmer"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(CountKmer.class); conf.setJobName("CountKmer " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); //conf.setMapOutputValueClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(CountKmerMapper.class); conf.setReducerClass(CountKmerReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.CountReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: CountReads"); sLogger.info(" - input: " + inputPath); JobConf conf = new JobConf(CountReads.class); conf.setJobName("CountReads " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CountReadsMapper.class); //conf.setReducerClass(CountReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.CutChimericLinks.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: CutChimericLinks"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(CutChimericLinks.class); conf.setJobName("CutChimericLinks " + inputPath + " " + BrushConfig.K); //conf.setFloat("Error_Rate", ErrorRate); //conf.setFloat("Exp_Cov", Exp_Cov); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CutChimericLinksMapper.class); conf.setReducerClass(CutChimericLinksReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }