List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.phantom.hadoop.examples.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();/*from ww w . j a v a2 s . c o m*/ return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); // set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); // use the defined mapper job.setMapperClass(MapClass.class); // use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.WordMean.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; }//from w ww . j a v a2 s . com Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
From source file:com.phantom.hadoop.examples.WordMedian.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }//from www. j a v a 2 s . co m setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:com.phantom.hadoop.examples.WordStandardDeviation.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; }//from w ww. j av a2 s. c o m Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
From source file:com.philiphubbard.digraph.MRBuildVertices.java
License:Open Source License
public static void setupJob(Job job, Path inputPath, Path outputPath) throws IOException { job.setJarByClass(MRBuildVertices.class); job.setMapperClass(MRBuildVertices.Mapper.class); job.setCombinerClass(MRBuildVertices.Reducer.class); job.setReducerClass(MRBuildVertices.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.philiphubbard.digraph.MRCompressChains.java
License:Open Source License
public static void setupIterationJob(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException { job.setJarByClass(MRCompressChains.class); job.setMapperClass(MRCompressChains.Mapper.class); job.setCombinerClass(MRCompressChains.Reducer.class); job.setReducerClass(MRCompressChains.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); Path inputPath;/*from w ww . j ava 2 s. com*/ if (iter == 0) inputPath = inputPathOrig; else inputPath = new Path(outputPathOrig.toString() + (iter - 1)); Path outputPath = new Path(outputPathOrig.toString() + iter); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.philiphubbard.sabe.MRCompressMerChains.java
License:Open Source License
public static void setupIterationJob(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException { MRCompressChains.setupIterationJob(job, inputPathOrig, outputPathOrig); job.setMapperClass(MRCompressMerChains.Mapper.class); job.setCombinerClass(MRCompressMerChains.Reducer.class); job.setReducerClass(MRCompressMerChains.Reducer.class); }
From source file:com.pocketx.gravity.recommender.cf.similarity.job.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//w w w . ja va 2 s .c o m addOutputOption(); addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, " + "users with more preferences will be sampled down"); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); //convert items to an internal index Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); boolean succeeded = itemIDIndex.waitForCompletion(true); if (!succeeded) { return -1; } //convert user preferences into a vector per user Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); succeeded = toUserVectors.waitForCompletion(true); if (!succeeded) { return -1; } //we need the number of users later int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); /* configure sampling regarding the uservectors */ if (hasOption("maxPrefsPerUser")) { int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser")); toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize); } succeeded = toItemVectors.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:com.pocketx.gravity.recommender.cf.similarity.job.RowSimilarityJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from w w w . j a v a 2 s . co m addOutputOption(); addOption("numberOfColumns", "r", "Number of columns in the input matrix", false); addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + "one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: " + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW)); addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false)); addOption("threshold", "tr", "discard row pairs with a similarity value below this", false); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int numberOfColumns; if (hasOption("numberOfColumns")) { // Number of columns explicitly specified via CLI numberOfColumns = Integer.parseInt(getOption("numberOfColumns")); } else { // else get the number of columns by determining the cardinality of a vector in the input matrix numberOfColumns = getDimensions(getInputPath()); } String similarityClassnameArg = getOption("similarityClassname"); String similarityClassname; try { similarityClassname = VectorSimilarityMeasures.valueOf(similarityClassnameArg).getClassname(); } catch (IllegalArgumentException iae) { similarityClassname = similarityClassnameArg; } // Clear the output and temp paths if the overwrite option has been set if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { // Clear the temp path HadoopUtil.delete(getConf(), getTempPath()); // Clear the output path HadoopUtil.delete(getConf(), getOutputPath()); } int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow")); boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : NO_THRESHOLD; Path weightsPath = getTempPath("weights"); Path normsPath = getTempPath("norms.bin"); Path numNonZeroEntriesPath = getTempPath("numNonZeroEntries.bin"); Path maxValuesPath = getTempPath("maxValues.bin"); Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job normsAndTranspose = prepareJob(getInputPath(), weightsPath, VectorNormMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); normsAndTranspose.setCombinerClass(MergeVectorsCombiner.class); Configuration normsAndTransposeConf = normsAndTranspose.getConfiguration(); normsAndTransposeConf.set(THRESHOLD, String.valueOf(threshold)); normsAndTransposeConf.set(NORMS_PATH, normsPath.toString()); normsAndTransposeConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); normsAndTransposeConf.set(MAXVALUES_PATH, maxValuesPath.toString()); normsAndTransposeConf.set(SIMILARITY_CLASSNAME, similarityClassname); boolean succeeded = normsAndTranspose.waitForCompletion(true); if (!succeeded) { return -1; } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, CooccurrencesMapper.class, IntWritable.class, VectorWritable.class, SimilarityReducer.class, IntWritable.class, VectorWritable.class); pairwiseSimilarity.setCombinerClass(VectorSumReducer.class); Configuration pairwiseConf = pairwiseSimilarity.getConfiguration(); pairwiseConf.set(THRESHOLD, String.valueOf(threshold)); pairwiseConf.set(NORMS_PATH, normsPath.toString()); pairwiseConf.set(NUM_NON_ZERO_ENTRIES_PATH, numNonZeroEntriesPath.toString()); pairwiseConf.set(MAXVALUES_PATH, maxValuesPath.toString()); pairwiseConf.set(SIMILARITY_CLASSNAME, similarityClassname); pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns); pairwiseConf.setBoolean(EXCLUDE_SELF_SIMILARITY, excludeSelfSimilarity); boolean succeeded = pairwiseSimilarity.waitForCompletion(true); if (!succeeded) { return -1; } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job asMatrix = prepareJob(pairwiseSimilarityPath, getOutputPath(), UnsymmetrifyMapper.class, IntWritable.class, VectorWritable.class, MergeToTopKSimilaritiesReducer.class, IntWritable.class, VectorWritable.class); asMatrix.setCombinerClass(MergeToTopKSimilaritiesReducer.class); asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow); boolean succeeded = asMatrix.waitForCompletion(true); if (!succeeded) { return -1; } } return 0; }
From source file:com.rockstor.compact.GenGarbageIndexTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(GenGarbageIndexTool.class); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(GarbageChunkDB.TAB_NAME, scan, GarbageChunkMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableMapReduceUtil.setScannerCaching(job, batchSize); job.setReducerClass(GarbageChunkReduce.class); job.setPartitionerClass(GarbageChunkPartition.class); job.setCombinerClass(GarbageChunkCombine.class); job.setNumReduceTasks(Compactor.getInstance().getReduceNum()); job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " finished!"); return job;/* w ww . jav a 2 s . co m*/ }