List of usage examples for org.apache.hadoop.mapred JobConf setMapperClass
public void setMapperClass(Class<? extends Mapper> theClass)
From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java
License:Apache License
@Override public void configure(JobConf job) { String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }/* w w w .ja v a 2s. c om*/ try { String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job); } catch (IOException e) { e.printStackTrace(); } Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class); job.setMapperClass(JoBimMapper.class); job.setReducerClass(JoBimReducer.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMemoryForMapTask(4096); job.setMemoryForReduceTask(4096); job.set("mapred.child.java.opts", "-Xmx4096m"); job.setNumReduceTasks(1); // reset to default }
From source file:de.tudarmstadt.lt.n2n.hadoop.RelationToOneHoleTransformerJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), RelationToOneHoleTransformerJob.class); conf.setJobName(RelationToOneHoleTransformerJob.class.getSimpleName()); args = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(RelationToOneHoleTransformerJob.Map.class); conf.setNumReduceTasks(0);//from w w w .java2 s . c o m // conf.setReducerClass(IdentityReducer.class); // sort or no sort? - that is here the question conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.RemoveExactDuplicatesJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), RemoveExactDuplicatesJob.class); conf.setJobName(RemoveExactDuplicatesJob.class.getSimpleName()); conf.setMapperClass(LineMapper.class); conf.setReducerClass(KeyReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); JobClient.runJob(conf);/*from www . j av a2 s . co m*/ return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertInvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertInvertSVO.class); conf.setJobName(ConvertInvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);// w w w .jav a 2 s. c o m return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertSVO.class); conf.setJobName(ConvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w.j a v a 2 s . co m*/ return 0; }
From source file:dinocode.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapperd.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducerd.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);/*ww w. java2s. com*/ try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 500; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2d.class); conf.setReducerClass(SpeciesIterReducer2d.class); conf.setCombinerClass(SpeciesIterReducer2d.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapperd.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:drivers.CalculatePageRank.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, this.getClass()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Fiqie|Calculate"); job.setMapperClass(CalculatePageRank1Mapper.class); job.setReducerClass(CalculatePageRank1Reducer.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobClient.runJob(job);/* w w w . j a v a 2 s . com*/ return 0; }
From source file:drivers.FinishPageRank.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, this.getClass()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Fiqie|Finish"); job.setOutputKeyComparatorClass(DecreasingComparator.class); job.setMapperClass(SortingPageRankMapper.class); job.setReducerClass(SortingPageRankReducer.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobClient.runJob(job);//from w w w . j a v a2s .c o m return 0; }
From source file:drivers.InitPageRank.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf(conf, this.getClass()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Fiqie|Init"); job.setMapperClass(InitPageRankMapper.class); job.setReducerClass(InitPageRankReducer.class); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobClient.runJob(job);// w w w . j av a 2 s . co m return 0; }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public void runCombine() throws Exception { if (this.last_job == null) { throw new NullPointerException("ERROR: Last job is Null"); }/*from w w w .j a v a 2s. c o m*/ JobConf job = new JobConf(this.conf, this.benchmarkClass); job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine"); job.setMapperClass(IdentityMapper.class); job.setNumMapTasks(0); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(1); // this is needed to get a single output file // Input FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job)); job.setInputFormat(KeyValueTextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobConf real_last_job = this.last_job; this.runJob(job); this.last_job = real_last_job; return; }