List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:Brush.RedundantRemoval.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: RedundantRemoval"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(RedundantRemoval.class); conf.setJobName("RedundantRemoval " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(RedundantRemovalMapper.class); conf.setReducerClass(RedundantRemovalReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.RemoveLowCoverage.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: RemoveLowCoverage"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(RemoveLowCoverage.class); conf.setJobName("RemoveLowCoverage " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(RemoveLowCoverageMapper.class); conf.setReducerClass(RemoveLowCoverageReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.Stats.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Stats"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Stats.class); conf.setJobName("Stats " + inputPath); BrushConfig.initializeConfiguration(conf); conf.setNumReduceTasks(1);//from w ww . j ava 2 s . com FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(StatsMapper.class); conf.setReducerClass(StatsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.TipsRemoval.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: TipsRemoval"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(TipsRemoval.class); conf.setJobName("RemoveTips " + inputPath + " " + BrushConfig.K); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(TipsRemovalMapper.class); conf.setReducerClass(TipsRemovalReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.TransitiveReduction.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: TransitiveReduction"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(TransitiveReduction.class); conf.setJobName("TransitiveReduction " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setMapperClass(TransitiveReductionMapper.class); conf.setReducerClass(TransitiveReductionReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.VerifyOverlap.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: VerifyOverlap"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(VerifyOverlap.class); conf.setJobName("VerifyOverlap " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); //conf.setClass("mapred.output.compression.codec", GzipCodec.class,CompressionCodec.class); conf.setMapperClass(VerifyOverlapMapper.class); conf.setReducerClass(VerifyOverlapReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRGraphBuilder(String args[], int iterCnt) { Job theJob = null;//from w w w.ja v a2 s .c o m conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Graph Builder"); conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapper.class); conf.setReducerClass(SpeciesGraphBuilderReducer.class); // Reading in XML. conf.setInputFormat(StreamInputFormat.class); conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); // Look for the <page> record in the XML. conf.set("stream.recordreader.begin", "<page>"); conf.set("stream.recordreader.end", "</page>"); inputpath = args[0]; outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); theJob.submit(); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } return true; }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRSpeciesRank(String args[], int iterCnt) { long newCounterVal = 0; long totalLinks = 1; // Initialize to 1 to prevent divide by zero long totalIterations = 0; Job theJob = null;//from w ww . j a v a2 s . com conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Iter"); conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); boolean nextIterationNeeded = true; while (nextIterationNeeded || numExtraIterations != 0) { long iterationNumber = 0; if ((iterCnt == 0) || (iterCnt == 1)) { inputpath = args[1] + "0"; } else { inputpath = args[1] + iterCnt; } iterCnt++; conf.set("iterationNumber", Integer.toString(iterCnt)); conf.set("totalLinks", Long.toString(totalLinks)); outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } try { if (theJob.isComplete()) { Counters jobCtrs = theJob.getCounters(); if (jobCtrs != null) { newCounterVal = jobCtrs.findCounter(ITERATION_COUNTER.ITERATIONS_NEEDED).getValue(); } // If reducer recorded change in species rank, repeat iteration. if ((newCounterVal > 0) || (iterCnt == 1)) { nextIterationNeeded = true; } else { nextIterationNeeded = false; numExtraIterations--; // Do one extra iteration } totalLinks = jobCtrs.findCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS) .getValue(); } totalIterations += 1; if (totalIterations > 200) { System.out.println("too many iterations!!"); } } catch (Exception e) { e.printStackTrace(); } } System.out.println("Total iterations = " + totalIterations); return true; }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRSpeciesView(String input, String args[]) { Job theJob = null;// w ww . j a v a 2 s. c o m JobConf conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inputpath = input; outputpath = args[1] + "FinalRanks"; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); conf.setMapperClass(SpeciesViewerMapper.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); try { theJob = new Job(conf, "SpeciesIter"); theJob.waitForCompletion(true); } catch (Exception e) { e.printStackTrace(); } return true; }
From source file:buildtestproject.MyFirstMapReduce.java
public static void main(String[] args) throws Exception { //Configuration conf = new Configuration(); JobConf conf = new JobConf(MyFirstMapReduce.class); //Job job = Job.getInstance(conf, "word-count-one"); conf.setJobName("word-count-one"); conf.setMapperClass(TokenizerMapper.class); conf.setCombinerClass(IntSumReducer.class); conf.setReducerClass(IntSumReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // job.setJarByClass(MyFirstMapReduce.class); // job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // //www. j av a2 s . c om // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // FileInputFormat.addInputPath(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(conf); // System.exit(job.waitForCompletion(true) ? 0 : 1); }