List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.twitter.algebra.nmf.Edge2MapDirJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows, int numInputCols, String name) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(INDEXNAME, name);// w ww. j a v a 2 s .c o m conf.setInt(ROWS, numInputRows); conf.setInt(COLS, numInputCols); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t"); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "edge2matrix"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(Edge2MapDirJob.class); job.setJobName(Edge2MapDirJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "edge2matrix"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputRows); job.setCombinerClass(MergeVectorsCombiner.class); job.setReducerClass(MergeVectorsReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.ErrDMJ.java
License:Apache License
public Job run(Configuration conf, Path xPath, Path matrixAInputPath, Path ytPath, Path outPath, int aRows, int ytRows, int ytCols) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set(MAPDIRMATRIXX, xPath.toString()); conf.set(MAPDIRMATRIXYT, ytPath.toString()); conf.setInt(YTROWS, ytRows);//from w w w.ja v a 2 s. co m conf.setInt(YTCOLS, ytCols); FileSystem fs = FileSystem.get(outPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixAInputPath, "err"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ErrDMJ.class); job.setJobName(ErrDMJ.class.getSimpleName() + "-" + outPath.getName()); matrixAInputPath = fs.makeQualified(matrixAInputPath); MultipleInputs.addInputPath(job, matrixAInputPath, SequenceFileInputFormat.class); outPath = fs.makeQualified(outPath); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = 1; job.setNumReduceTasks(numReducers); job.setCombinerClass(SumVectorsReducer.class); job.setReducerClass(SumVectorsReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed! "); return job; }
From source file:com.twitter.algebra.nmf.RowSquareSumJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(RowSquareSumJob.class); job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); int numReducers = 1; job.setNumReduceTasks(numReducers);/*from ww w. j a v a2 s . com*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(SumMapper.class); job.setCombinerClass(MergeVectorsReducer.class); job.setReducerClass(MergeVectorsReducer.class); // RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, // aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.TransposeJob.java
License:Apache License
/** * Perform transpose of A, where A refers to the path that contains a matrix * in {@link SequenceFileInputFormat}.//w ww .ja v a2 s. co m * * @param conf * the initial configuration * @param matrixInputPath * the path to the input files that we process * @param matrixOutputPath * the path of the resulting transpose matrix * @param numInputRows * rows * @param numInputCols * cols * @return the running job * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows, int numInputCols) throws IOException, InterruptedException, ClassNotFoundException { conf.setInt(NUM_ORIG_ROWS_KEY, numInputRows); conf.setInt(RowPartitioner.TOTAL_KEYS, numInputCols); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "transpose"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(TransposeJob.class); job.setJobName(TransposeJob.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(TransposeMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "transpose"); job.setNumReduceTasks(numReducers); // job.setPartitionerClass(RowPartitioner.IntRowPartitioner.class); RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputCols); job.setCombinerClass(MergeVectorsCombiner.class); job.setReducerClass(MergeVectorsReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.elephanttwin.retrieval.ScanUsingIndexJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { params = new IndexConfig(); LOG.info(" - input: " + Joiner.on(" ").join(params.getInput())); LOG.info(" - output: " + IndexConfig.output.get()); Configuration conf = getConf(); Path outputDir = new Path(params.getOutput()); FileSystem fs = outputDir.getFileSystem(conf); fs.delete(outputDir, true);/*from ww w. ja v a2 s . c o m*/ int totalInputFiles = 0; List<FileStatus> stats = Lists.newArrayList(); for (String s : params.getInput()) { Path spath = new Path(IndexConfig.index.get() + s); HdfsUtils.addInputPathRecursively(stats, fs, spath, hiddenDirectoryFilter, indexDataFilter); } totalInputFiles = stats.size(); LOG.info(totalInputFiles + " total index files to be scanned"); conf.set(IndexScanMapper.searchColumnName, params.getColumnName()); Job job = new Job(new Configuration(conf)); job.setJarByClass(getClass()); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(params.getOutput())); for (FileStatus file : stats) FileInputFormat.addInputPath(job, file.getPath()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); job.setMapperClass(IndexScanMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setJobName("ScanUsingIndexJob:" + IndexConfig.input.get()); BlockIndexedFileInputFormat.setSearchOptions(job, params.getinputFormat(), params.getValueClass(), params.getIndex(), (String) null); job.waitForCompletion(true); return 0; }
From source file:com.uc.qiujw.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); System.err.println("Usage: wordcount <in> <out> <map_sleep> <reduce_sleep>"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); String[] in = otherArgs[0].split(","); String out = otherArgs[1];/*from ww w .j a v a2 s . c o m*/ for (String inStr : in) { FileInputFormat.addInputPath(job, new Path(inStr)); } FileOutputFormat.setOutputPath(job, new Path(out)); int mapSleep = 1, reduceSleep = 1; if (otherArgs.length > 2) { mapSleep = Integer.valueOf(otherArgs[2]); } if (otherArgs.length > 3) { reduceSleep = Integer.valueOf(otherArgs[3]); } conf.set(mapSleepKey, Integer.toString(mapSleep)); conf.set(reduceSleepKey, Integer.toString(reduceSleep)); FileSystem fs = FileSystem.get(conf); Path outPath = new Path(out); if (fs.exists(outPath)) { fs.delete(outPath, true); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.wibidata.wibidota.DotaGatherExampleValues.java
License:Apache License
public final int run(final String[] args) throws Exception { Job job = new Job(super.getConf(), "Dota Gatherer Example Values"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(EnumGatherMap.class); job.setCombinerClass(AppendText.class); job.setReducerClass(EnumGatherReducer.class); job.setJarByClass(DotaGatherExampleValues.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (job.waitForCompletion(true)) { return 0; } else {//from w w w. ja v a2s. com return -1; } }
From source file:com.wibidata.wibidota.dotaloader.DotaValuesCounter.java
License:Apache License
public final int run(final String[] args) throws Exception { Job job = new Job(super.getConf(), "Dota Value Counter"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Add.class); job.setReducerClass(Add.class); job.setJarByClass(DotaValuesCounter.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (job.waitForCompletion(true)) { return 0; } else {/*from w ww. ja v a 2 s . co m*/ return -1; } }
From source file:com.wibidata.wibidota.DotaMaxAccountId.java
License:Apache License
public final int run(final String[] args) throws Exception { Job job = new Job(super.getConf(), "Dota Max Builder"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(DotaMaxAccountId.Map.class); job.setCombinerClass(DotaMaxAccountId.TakeMax.class); job.setReducerClass(DotaMaxAccountId.TakeMax.class); job.setJarByClass(DotaMaxAccountId.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); if (job.waitForCompletion(true)) { return 0; } else {// w w w. j a va2s. c o m return -1; } }
From source file:com.wind.mapreduce.WordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), WordCount.class); Job job = Job.getInstance(jobConf, "word count"); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return (job.waitForCompletion(true) ? 0 : 1); }