List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:hibench.DataGenerator.java
License:Apache License
public void createHtmlPages(Path dummy, HtmlConf html) throws IOException { LOG.info("Creating Html Pages..."); Path fout = new Path(dummy.getParent(), "tmp"); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create html pages to " + fout.getName(); job.setJobName(jobname); html.setJobConf(job);//from w w w .j a v a 2s . c om job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CreateHtmlPagesMapper.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, dummy); // first create result files under tmp folder FileOutputFormat.setOutputPath(job, fout); // begin from dummy file job.setInputFormat(NLineInputFormat.class); // use MultipleTextOutputFormat to produce three out files defined // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE job.setOutputFormat(HtmlMultipleTextOutputFormat.class); LOG.info("Running Job: " + jobname); LOG.info("Dummy file: " + dummy); LOG.info("Multiple result Html files as <links, words, urls>"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); // Move result files under tmp into parent path // and remove the empty tmp path finally DataPaths.moveFilesToParent(fout); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createRankingsTable() throws IOException { LOG.info("Creating table rankings..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " rankings"; job.setJobName(jobname); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setCombinerClass(ConcatTextCombiner.class); job.setReducerClass(CountRankingAndReplaceIdReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else {/* w ww .j a v a2 s . c o m*/ job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); /*** * need to join result with LINK table so that to replace * url ids with real contents */ MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class, MyIdentityMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS)); LOG.info("Running Job: " + jobname); LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS)); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createUserVisitsTable() throws IOException, URISyntaxException { LOG.info("Creating user visits..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " uservisits"; job.setJobName(jobname); /***//w w w . j a v a 2s . c o m * Set distributed cache file for table generation, * cache files include: * 1. user agents * 2. country code and language code * 3. search keys */ DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); visit.setJobConf(job); job.setInputFormat(TextInputFormat.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class, CreateRandomAccessMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); job.setCombinerClass(CreateUserVisitsCombiner.class); job.setReducerClass(CreateUserVisitsReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS)); LOG.info("Running Job: " + jobname); LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS)); }
From source file:hibench.PageRankDataGenerator.java
License:Apache License
private void createPageRankNodes() throws IOException { LOG.info("Creating PageRank nodes...", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank nodes"; job.setJobName(jobname); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS)); job.setInputFormat(TextInputFormat.class); if (options.PAGERANK_NODE_BALANCE) { /***/*from ww w .j a va 2s. co m*/ * Balance the output order of nodes, to prevent the running * of pagerank bench from potential data skew */ job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setMapperClass(BalancedLinkNodesMapper.class); job.setReducerClass(BalancedLinkNodesReducer.class); // job.setPartitionerClass(ModulusPartitioner.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } } else { job.setMapOutputKeyClass(Text.class); job.setMapperClass(OutputLinkNodesMapper.class); job.setNumReduceTasks(0); } if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS)); LOG.info("Running Job: " + jobname); LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input"); LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS)); }
From source file:hibench.PageRankDataGenerator.java
License:Apache License
/*** * Create pagerank edge table, output link A->B as <A, B> pairs * @throws IOException//from www .j a v a2 s . c om */ private void createPageRankLinks() throws IOException { LOG.info("Creating PageRank links", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank links"; job.setJobName(jobname); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.T_LINK_PAGE)); job.setInputFormat(TextInputFormat.class); job.setMapperClass(OutputLinkEdgesMapper.class); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.EDGES)); LOG.info("Running Job: " + jobname); LOG.info("Table link-page " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input"); LOG.info("Edges file " + paths.getResult(DataPaths.EDGES) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.EDGES)); }
From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, HadoopMetrics.class); try {//from w w w . j a v a2 s . c om conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(HadoopMetrics.MapClass.class); conf.setReducerClass(SystemLog.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.HistoryLog.java
License:Apache License
public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, HistoryLog.class); try {/*w w w . ja v a 2 s .com*/ conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(HistoryLog.MapClass.class); conf.setReducerClass(HistoryLog.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try {//from w w w . j a v a2 s. c o m conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); //conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.InstrumentSamplingTop.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); try {//w w w . j a va 2s .c o m JobConf conf = new JobConf(this.conf, InstrumentSamplingTop.class); conf.setJobName(this.getClass().getSimpleName() + "_1_" + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentSamplingTop.MapClass.class); conf.setReducerClass(InstrumentSamplingTop.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(outputKeyClass); conf.setOutputValueClass(outputValueClass); conf.setOutputFormat(SequenceFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(outputPaths + "_1_" + timestamp)); try { //first job JobClient.runJob(conf); JobConf secondconf = new JobConf(this.conf, InstrumentSamplingTop.class); secondconf.setJobName(this.getClass().getSimpleName() + "_2_" + timestamp); secondconf.setInputFormat(SequenceFileInputFormat.class); secondconf.setMapperClass(IdentityMapper.class); secondconf.setReducerClass(InstrumentSamplingTop.TopClass.class); secondconf.setMapOutputKeyClass(outputKeyClass); secondconf.setMapOutputValueClass(outputValueClass); secondconf.setOutputKeyClass(Text.class); secondconf.setOutputValueClass(TextArrayWritable.class); secondconf.setOutputFormat(CSVFileOutputFormat.class); FileInputFormat.setInputPaths(secondconf, outputPaths + "_1_" + timestamp); FileOutputFormat.setOutputPath(secondconf, new Path(temp_outputPaths)); //second job to get ranking list JobClient.runJob(secondconf); moveResults(secondconf, outputPaths, temp_outputPaths); Path temp = new Path(outputPaths + "_1_" + timestamp); temp.getFileSystem(conf).delete(temp); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.SystemLog.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, SystemLog.class); try {/*from w w w. j a va2s .c o m*/ conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(SystemLog.MapClass.class); conf.setReducerClass(SystemLog.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }