List of usage examples for org.apache.hadoop.mapred JobConf setCombinerClass
public void setCombinerClass(Class<? extends Reducer> theClass)
From source file:fr.worf.hadoop.scratchpad.Scratch2MapReduce.java
License:Apache License
/** * @param args the command line arguments * @throws java.io.IOException// w w w .j a v a 2 s .c o m */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(Scratch2MapReduce.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("/home/slash/test/testfile1.txt")); FileOutputFormat.setOutputPath(job, new Path("/home/slash/test/testfile2.txt")); RunningJob runJob = JobClient.runJob(job); }
From source file:gov.nih.ncgc.hadoop.SmartsSearch.java
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), HeavyAtomCount.class); jobConf.setJobName("smartsSearch"); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapperClass(MoleculeMapper.class); jobConf.setCombinerClass(SmartsMatchReducer.class); jobConf.setReducerClass(SmartsMatchReducer.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setNumMapTasks(5);/* w ww . j av a 2 s. c o m*/ if (args.length != 4) { System.err.println("Usage: ss <in> <out> <pattern> <license file>"); System.exit(2); } FileInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); jobConf.setStrings("pattern", args[2]); // make the license file available vis dist cache DistributedCache.addCacheFile(new Path(args[3]).toUri(), jobConf); JobClient.runJob(jobConf); return 0; }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/*w w w . j a va 2s .c o m*/ conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }
From source file:hadoopProcesses.testJob.java
public static void start(String[] args) { try {/*from w ww . j a va 2 s. c o m*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); map Map = new map(); conf.setMapperClass(Map.getClass()); reducer Reduce = new reducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); FileSystem FS = FileSystem.get(conf); Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000"); if (FS.exists(src)) { System.out.println("\t\t------ Results ------ "); /* BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src))); String line; line = br.readLine(); while (line != null) { System.out.println("\t" + line); line = br.readLine(); } */ List<String> FileList = (new fileInteractions()).readLines(src, conf); for (String LocString : FileList) { System.out.println(LocString); } } } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:hibench.DataGenerator.java
License:Apache License
public void replaceIds(Path fcontent, Path fids, Path fjoin, ZipfRandom zipf) throws IOException { LOG.info("Replace Virtual Zipfian Ids with real Ids..."); JobConf job = new JobConf(WebDataGen.class); String jobname = fcontent.getName() + " JOIN " + fids.getName() + " -> " + fjoin.getName(); job.setJobName(jobname);/*from w w w .j a va 2s . c o m*/ job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, fids, TextInputFormat.class, TagRecordsMapper.class); MultipleInputs.addInputPath(job, fcontent, TextInputFormat.class, ReverseContentMapper.class); job.setOutputFormat(TextOutputFormat.class); // use combiner to avoid too many inputs for reducer job.setCombinerClass(ConcatTextCombiner.class); job.setReducerClass(JoinContentWithZipfReducer.class); if (zipf.reds > 0) { job.setNumReduceTasks(zipf.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } FileOutputFormat.setOutputPath(job, fjoin); LOG.info("Running Job: " + jobname); LOG.info("Zipfian Id distribution: " + fids); LOG.info("Content file with virtual Ids: " + fcontent); LOG.info("Joint result file: " + fjoin); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createRankingsTable() throws IOException { LOG.info("Creating table rankings..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " rankings"; job.setJobName(jobname);/*from w w w .ja va2 s.c o m*/ job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setCombinerClass(ConcatTextCombiner.class); job.setReducerClass(CountRankingAndReplaceIdReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); /*** * need to join result with LINK table so that to replace * url ids with real contents */ MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class, MyIdentityMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS)); LOG.info("Running Job: " + jobname); LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS)); }
From source file:hibench.HiveDataGenerator.java
License:Apache License
private void createUserVisitsTable() throws IOException, URISyntaxException { LOG.info("Creating user visits..."); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " uservisits"; job.setJobName(jobname);//from ww w .j a v a2 s. co m /*** * Set distributed cache file for table generation, * cache files include: * 1. user agents * 2. country code and language code * 3. search keys */ DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job); DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); visit.setJobConf(job); job.setInputFormat(TextInputFormat.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class, CreateRandomAccessMapper.class); MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class, TagRecordsMapper.class); job.setCombinerClass(CreateUserVisitsCombiner.class); job.setReducerClass(CreateUserVisitsReducer.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } // job.setNumReduceTasks(options.agents/2); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS)); LOG.info("Running Job: " + jobname); LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input"); LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output"); LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS)); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS)); }
From source file:IndexService.IndexMergeMR.java
License:Open Source License
public static RunningJob run(String inputfiles, String outputdir, Configuration conf) { if (inputfiles == null || outputdir == null) return null; JobConf job = new JobConf(conf); job.setJobName("MergeIndexMR"); job.setJarByClass(IndexMergeMR.class); job.setNumReduceTasks(1);// w w w . j a v a2s . c o m FileSystem fs = null; try { fs = FileSystem.get(job); fs.delete(new Path(outputdir), true); String[] ifs = inputfiles.split(","); TreeSet<String> files = new TreeSet<String>(); for (int i = 0; i < ifs.length; i++) { IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[i]); Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values(); for (String str : strs) { files.add(str); } ifdf.close(); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str + ","); } job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[0]); HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes(); ArrayList<String> fieldStrings = new ArrayList<String>(); for (int i = 0; i < map.size(); i++) { IRecord.IFType type = map.get(i); fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx()); } job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()])); job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456)); ifdf.close(); } catch (Exception e2) { e2.printStackTrace(); } FileInputFormat.setInputPaths(job, inputfiles); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(IndexKey.class); job.setOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexMergePartitioner.class); job.setMapperClass(MergeIndexMap.class); job.setCombinerClass(MergeIndexReduce.class); job.setReducerClass(MergeIndexReduce.class); job.setInputFormat(IndexMergeIFormatInputFormat.class); job.setOutputFormat(IndexMergeIFormatOutputFormat.class); try { JobClient jc = new JobClient(job); return jc.submitJob(job); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:IndexService.IndexMR.java
License:Open Source License
public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids, String outputdir) {//w ww. j ava 2 s .c o m if (inputfiles == null || outputdir == null) return null; JobConf conf = new JobConf(conf2); conf.setJobName("IndexMR:\t" + ids); conf.setJarByClass(IndexMR.class); FileSystem fs = null; try { fs = FileSystem.get(conf); fs.delete(new Path(outputdir), true); } catch (IOException e3) { e3.printStackTrace(); } conf.set("index.ids", ids); if (column) { conf.set("datafiletype", "column"); } else { conf.set("datafiletype", "format"); } String[] ifs = inputfiles.split(","); long wholerecnum = 0; String[] idxs = ids.split(","); String[] fieldStrings = new String[idxs.length + 2]; if (!column) { IFormatDataFile ifdf; try { ifdf = new IFormatDataFile(conf); ifdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } ifdf.close(); } catch (IOException e) { e.printStackTrace(); } } else { try { IColumnDataFile icdf = new IColumnDataFile(conf); icdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = icdf.fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } icdf.close(); } catch (IOException e) { e.printStackTrace(); } } fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit + (fieldStrings.length - 2); fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit + (fieldStrings.length - 1); conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); if (!column) { conf.set(ConstVar.HD_index_filemap, inputfiles); for (String file : ifs) { IFormatDataFile fff; try { fff = new IFormatDataFile(conf); fff.open(file); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } } } else { HashSet<String> files = new HashSet<String>(); for (String file : ifs) { files.add(file); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str).append(","); } conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); for (String file : files) { Path parent = new Path(file).getParent(); try { FileStatus[] fss = fs.listStatus(parent); String openfile = ""; for (FileStatus status : fss) { if (status.getPath().toString().contains(file)) { openfile = status.getPath().toString(); break; } } IFormatDataFile fff = new IFormatDataFile(conf); fff.open(openfile); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1)); FileInputFormat.setInputPaths(conf, inputfiles); Path outputPath = new Path(outputdir); FileOutputFormat.setOutputPath(conf, outputPath); conf.setOutputKeyClass(IndexKey.class); conf.setOutputValueClass(IndexValue.class); conf.setPartitionerClass(IndexPartitioner.class); conf.setMapperClass(IndexMap.class); conf.setCombinerClass(IndexReduce.class); conf.setReducerClass(IndexReduce.class); if (column) { conf.setInputFormat(IColumnInputFormat.class); } else { conf.setInputFormat(IFormatInputFormat.class); } conf.setOutputFormat(IndexIFormatOutputFormat.class); try { JobClient jc = new JobClient(conf); return jc.submitJob(conf); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:invertedIndex.startJob.java
public static void start(String[] args) { try {/*from ww w.j a v a2s .c o m*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); lineIndexMapper Map = new lineIndexMapper(); conf.setMapperClass(Map.getClass()); lineIndexReducer Reduce = new lineIndexReducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } }