List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass)
From source file:hibench.PageRankDataGenerator.java
License:Apache License
private void createPageRankNodes() throws IOException { LOG.info("Creating PageRank nodes...", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank nodes"; job.setJobName(jobname);//from w w w.j a va 2 s . c om job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS)); job.setInputFormat(TextInputFormat.class); if (options.PAGERANK_NODE_BALANCE) { /*** * Balance the output order of nodes, to prevent the running * of pagerank bench from potential data skew */ job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setMapperClass(BalancedLinkNodesMapper.class); job.setReducerClass(BalancedLinkNodesReducer.class); // job.setPartitionerClass(ModulusPartitioner.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } } else { job.setMapOutputKeyClass(Text.class); job.setMapperClass(OutputLinkNodesMapper.class); job.setNumReduceTasks(0); } if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS)); LOG.info("Running Job: " + jobname); LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input"); LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS)); }
From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, HadoopMetrics.class); try {// ww w . j a va 2s . c om conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(HadoopMetrics.MapClass.class); conf.setReducerClass(SystemLog.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.HistoryLog.java
License:Apache License
public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, HistoryLog.class); try {/* ww w . j a v a 2s . co m*/ conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(HistoryLog.MapClass.class); conf.setReducerClass(HistoryLog.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, InstrumentDataflow.class); try {// ww w . ja v a 2s . c o m conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentDataflow.MapClass.class); conf.setReducerClass(InstrumentDataflow.ReduceClass.class); conf.setOutputKeyClass(Text.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class); //conf.setNumReduceTasks(1); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.InstrumentSamplingTop.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); try {/*ww w .jav a 2 s . co m*/ JobConf conf = new JobConf(this.conf, InstrumentSamplingTop.class); conf.setJobName(this.getClass().getSimpleName() + "_1_" + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(InstrumentSamplingTop.MapClass.class); conf.setReducerClass(InstrumentSamplingTop.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(outputKeyClass); conf.setOutputValueClass(outputValueClass); conf.setOutputFormat(SequenceFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(outputPaths + "_1_" + timestamp)); try { //first job JobClient.runJob(conf); JobConf secondconf = new JobConf(this.conf, InstrumentSamplingTop.class); secondconf.setJobName(this.getClass().getSimpleName() + "_2_" + timestamp); secondconf.setInputFormat(SequenceFileInputFormat.class); secondconf.setMapperClass(IdentityMapper.class); secondconf.setReducerClass(InstrumentSamplingTop.TopClass.class); secondconf.setMapOutputKeyClass(outputKeyClass); secondconf.setMapOutputValueClass(outputValueClass); secondconf.setOutputKeyClass(Text.class); secondconf.setOutputValueClass(TextArrayWritable.class); secondconf.setOutputFormat(CSVFileOutputFormat.class); FileInputFormat.setInputPaths(secondconf, outputPaths + "_1_" + timestamp); FileOutputFormat.setOutputPath(secondconf, new Path(temp_outputPaths)); //second job to get ranking list JobClient.runJob(secondconf); moveResults(secondconf, outputPaths, temp_outputPaths); Path temp = new Path(outputPaths + "_1_" + timestamp); temp.getFileSystem(conf).delete(temp); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:hitune.analysis.mapreduce.processor.SystemLog.java
License:Apache License
@Override public void run() { // TODO Auto-generated method stub long timestamp = System.currentTimeMillis(); JobConf conf = new JobConf(this.conf, SystemLog.class); try {/*from ww w. j av a2 s . co m*/ conf.setJobName(this.getClass().getSimpleName() + timestamp); conf.setInputFormat(MultiSequenceFileInputFormat.class); conf.setMapperClass(SystemLog.MapClass.class); conf.setReducerClass(SystemLog.ReduceClass.class); Class<? extends WritableComparable> outputKeyClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)) .asSubclass(WritableComparable.class); Class<? extends Writable> outputValueClass = Class .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)) .asSubclass(Writable.class); conf.setMapOutputKeyClass(outputKeyClass); conf.setMapOutputValueClass(outputValueClass); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(TextArrayWritable.class); conf.setOutputFormat(CSVFileOutputFormat.class); String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile); String temp_outputPaths = getTempOutputDir(outputPaths); if (this.inputfiles != null) { log.debug("inputPaths:" + inputfiles); FileInputFormat.setInputPaths(conf, inputfiles); FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths)); try { JobClient.runJob(conf); moveResults(conf, outputPaths, temp_outputPaths); } catch (IOException e) { // TODO Auto-generated catch block log.warn("For " + getOutputFileName() + " :JOB fails!"); log.warn(e); e.printStackTrace(); this.MOVE_DONE = false; } } else { log.warn("For " + getOutputFileName() + " :No input path!"); } } catch (Exception e) { log.warn("Job preparation failure!"); log.warn(e); e.printStackTrace(); } }
From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java
License:Apache License
private JobConf createJobConf() { JobConf conf = mrCluster.createJobConf(); conf.setJobName("wordcount test"); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(WordCountReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setNumMapTasks(1);//from w w w. j a v a 2s . c o m conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); return conf; }
From source file:io.fluo.stress.trie.Unique.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 1) { log.error("Usage: " + this.getClass().getSimpleName() + "<input dir>{ <input dir>}"); System.exit(-1);/*from www . ja va 2 s . c o m*/ } JobConf job = new JobConf(getConf()); job.setJobName(Unique.class.getName()); job.setJarByClass(Unique.class); job.setInputFormat(SequenceFileInputFormat.class); for (String arg : args) { SequenceFileInputFormat.addInputPath(job, new Path(arg)); } job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(UniqueReducer.class); job.setOutputFormat(NullOutputFormat.class); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); numUnique = (int) runningJob.getCounters().getCounter(Stats.UNIQUE); log.debug("numUnique : " + numUnique); return runningJob.isSuccessful() ? 0 : -1; }
From source file:ivory.core.preprocess.BuildTargetLangWeightedIntDocVectors.java
License:Apache License
@SuppressWarnings("deprecation") public int runTool() throws Exception { // sLogger.setLevel(Level.DEBUG); sLogger.info("PowerTool: GetTargetLangWeightedIntDocVectors"); JobConf conf = new JobConf(BuildTargetLangWeightedIntDocVectors.class); FileSystem fs = FileSystem.get(conf); String indexPath = getConf().get("Ivory.IndexPath"); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); String outputPath = env.getWeightedIntDocVectorsDirectory(); int mapTasks = getConf().getInt("Ivory.NumMapTasks", 0); int minSplitSize = getConf().getInt("Ivory.MinSplitSize", 0); String collectionName = getConf().get("Ivory.CollectionName"); sLogger.info("Characteristics of the collection:"); sLogger.info(" - CollectionName: " + collectionName); sLogger.info("Characteristics of the job:"); sLogger.info(" - NumMapTasks: " + mapTasks); sLogger.info(" - MinSplitSize: " + minSplitSize); String vocabFile = getConf().get("Ivory.FinalVocab"); DistributedCache.addCacheFile(new URI(vocabFile), conf); Path inputPath = new Path(PwsimEnvironment.getFileNameWithPars(indexPath, "TermDocs")); Path weightedVectorsPath = new Path(outputPath); if (fs.exists(weightedVectorsPath)) { sLogger.info("Output path already exists!"); return -1; }/*from www . ja v a 2 s.c o m*/ conf.setJobName("GetWeightedIntDocVectors:" + collectionName); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); conf.setInt("mapred.min.split.size", minSplitSize); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setBoolean("Ivory.Normalize", getConf().getBoolean("Ivory.Normalize", false)); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, weightedVectorsPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(WeightedIntDocVector.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(WeightedIntDocVector.class); conf.setMapperClass(MyMapper.class); long startTime = System.currentTimeMillis(); RunningJob rj = JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); Counters counters = rj.getCounters(); long numOfDocs = (long) counters.findCounter(Docs.Total).getCounter(); return (int) numOfDocs; }
From source file:ivory.core.preprocess.BuildWeightedIntDocVectors.java
License:Apache License
@SuppressWarnings("deprecation") public int runTool() throws Exception { sLogger.setLevel(Level.WARN); sLogger.info("PowerTool: GetWeightedIntDocVectors"); // create a new JobConf, inheriting from the configuration of this // PowerTool//from w ww .ja v a 2 s . c om JobConf conf = new JobConf(getConf(), BuildWeightedIntDocVectors.class); FileSystem fs = FileSystem.get(conf); String indexPath = conf.get("Ivory.IndexPath"); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); String outputPath = env.getWeightedIntDocVectorsDirectory(); int mapTasks = conf.getInt("Ivory.NumMapTasks", 0); int minSplitSize = conf.getInt("Ivory.MinSplitSize", 0); String collectionName = conf.get("Ivory.CollectionName"); sLogger.info("Characteristics of the collection:"); sLogger.info(" - CollectionName: " + collectionName); sLogger.info("Characteristics of the job:"); sLogger.info(" - NumMapTasks: " + mapTasks); sLogger.info(" - MinSplitSize: " + minSplitSize); String dfByIntFilePath = env.getDfByIntData(); String cfByIntFilePath = env.getCfByIntData(); /* add df table to cache */ if (!fs.exists(new Path(dfByIntFilePath))) { throw new RuntimeException("Error, df data file " + dfByIntFilePath + "doesn't exist!"); } DistributedCache.addCacheFile(new URI(dfByIntFilePath), conf); /* add cf table to cache */ if (!fs.exists(new Path(cfByIntFilePath))) { throw new RuntimeException("Error, cf data file " + cfByIntFilePath + "doesn't exist!"); } DistributedCache.addCacheFile(new URI(cfByIntFilePath), conf); /* add dl table to cache */ Path docLengthFile = env.getDoclengthsData(); if (!fs.exists(docLengthFile)) { throw new RuntimeException("Error, doc-length data file " + docLengthFile + "doesn't exist!"); } DistributedCache.addCacheFile(docLengthFile.toUri(), conf); Path inputPath = new Path(env.getIntDocVectorsDirectory()); Path weightedVectorsPath = new Path(outputPath); if (fs.exists(weightedVectorsPath)) { sLogger.info("Output path already exists!"); return 0; } //fs.delete(weightedVectirsPath, true); conf.setJobName("GetWeightedIntDocVectors:" + collectionName); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); conf.setInt("mapred.min.split.size", minSplitSize); conf.set("mapred.child.java.opts", "-Xmx2048m"); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, weightedVectorsPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(WeightedIntDocVector.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(WeightedIntDocVector.class); conf.setMapperClass(MyMapper.class); //conf.setInt("mapred.task.timeout",3600000); long startTime = System.currentTimeMillis(); RunningJob job = JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }