List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:de.tudarmstadt.lt.n2n.hadoop.FilterByVocabularyJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), FilterByVocabularyJob.class); conf.setJobName(FilterByVocabularyJob.class.getSimpleName()); conf.setMapperClass(FilterByVocabularyMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); String word_list_file = conf.get(SHARED_CONSTANTS.PARAM_WORD_LIST); if (word_list_file == null) throw new MissingArgumentException( "Please specify word list with '-Dnlkg.filterbywordsfile=<path-to-file-in-hdfs>'."); DistributedCache.addFileToClassPath(new Path(word_list_file), conf); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* w w w. j av a 2 s . c o m*/ return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.FlipJoBims.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FlipJoBims.class); /* begin necessary for UKP cluster */ conf.setMemoryForMapTask(1000L); // 1 GB /* necessary for UKP cdh3 */ conf.setMemoryForReduceTask(1000L); // 1 GB /* necessary for UKP cdh3 */ FileOutputFormat.setCompressOutput(conf, true); // compress output FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.BZip2Codec.class); /* use the bzip2 codec for compression */ conf.setCompressMapOutput(true); // compress mapper output /* end necessary for UKP cluster */ conf.setJobName(FlipJoBims.class.getSimpleName()); args = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(FlipJoBims.Map.class); conf.setNumReduceTasks(0);/*from w w w . java 2 s .c om*/ // conf.setReducerClass(IdentityReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:de.tudarmstadt.lt.n2n.hadoop.GoogleSyntacticsJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob.class); conf.setJobName(GoogleSyntacticsJob.class.getSimpleName()); conf.setMapperClass(GoogleSyntacticsJob3Mapper.class); conf.setReducerClass(GoogleSyntacticsJob3Reducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(JoBimFormat.class); conf.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }//from w w w.j av a2 s . c o m String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf); JobClient.runJob(conf); return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob2.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob2.class); conf.setJobName(GoogleSyntacticsJob2.class.getSimpleName()); conf.setMapperClass(GoogleSyntacticsJob2Mapper.class); conf.setReducerClass(GoogleSyntacticsJob2Reducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // conf.setMapOutputKeyClass(Text.class); // conf.setMapOutputValueClass(NullWritable.class); conf.setOutputKeyClass(JoBimFormat.class); conf.setOutputValueClass(IntWritable.class); args = new GenericOptionsParser(conf, args).getRemainingArgs(); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }// w w w. ja v a 2 s . c om String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf); JobClient.runJob(conf); return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java
License:Apache License
@Override public void configure(JobConf job) { String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }/*w w w . ja v a 2s . c o m*/ try { String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job); } catch (IOException e) { e.printStackTrace(); } Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class); job.setMapperClass(JoBimMapper.class); job.setReducerClass(JoBimReducer.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMemoryForMapTask(4096); job.setMemoryForReduceTask(4096); job.set("mapred.child.java.opts", "-Xmx4096m"); job.setNumReduceTasks(1); // reset to default }
From source file:de.tudarmstadt.lt.n2n.hadoop.RelationToOneHoleTransformerJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), RelationToOneHoleTransformerJob.class); conf.setJobName(RelationToOneHoleTransformerJob.class.getSimpleName()); args = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(RelationToOneHoleTransformerJob.Map.class); conf.setNumReduceTasks(0);//from ww w .j a v a 2 s . c om // conf.setReducerClass(IdentityReducer.class); // sort or no sort? - that is here the question conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.RemoveExactDuplicatesJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), RemoveExactDuplicatesJob.class); conf.setJobName(RemoveExactDuplicatesJob.class.getSimpleName()); conf.setMapperClass(LineMapper.class); conf.setReducerClass(KeyReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); JobClient.runJob(conf);/*from w w w . j av a 2s .c o m*/ return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertInvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertInvertSVO.class); conf.setJobName(ConvertInvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//www.j ava 2 s .c o m return 0; }
From source file:de.tudarmstadt.lt.nlkg.ConvertSVO.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), ConvertSVO.class); conf.setJobName(ConvertSVO.class.getSimpleName()); conf.setMapperClass(ConversionMapper.class); conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ConvertedWritable.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//from w w w.j a v a 2 s. com return 0; }
From source file:dinocode.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapperd.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducerd.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);//w ww .j av a2 s. co m try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 500; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2d.class); conf.setReducerClass(SpeciesIterReducer2d.class); conf.setCombinerClass(SpeciesIterReducer2d.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapperd.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }