List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:org.apache.hadoop.examples.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from ww w. j av a2s . co m Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = getConf(); conf.set(RegexMapper.PATTERN, args[2]); if (args.length == 4) conf.set(RegexMapper.GROUP, args[3]); Job grepJob = Job.getInstance(conf); try { grepJob.setJobName("grep-search"); grepJob.setJarByClass(Grep.class); FileInputFormat.setInputPaths(grepJob, args[0]); grepJob.setMapperClass(RegexMapper.class); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(grepJob, tempDir); grepJob.setOutputFormatClass(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.waitForCompletion(true); Job sortJob = Job.getInstance(conf); sortJob.setJobName("grep-sort"); sortJob.setJarByClass(Grep.class); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormatClass(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob.waitForCompletion(true); } finally { FileSystem.get(conf).delete(tempDir, true); } return 0; }
From source file:org.apache.hadoop.examples.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();/* w w w . j a v a 2 s . c om*/ return 2; } Job job = Job.getInstance(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.hadoop.examples.WordMean.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; }//w ww . j av a2s .c om Configuration conf = getConf(); Job job = Job.getInstance(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
From source file:org.apache.hadoop.examples.WordMedian.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }//from w w w . j av a 2 s.c om setConf(new Configuration()); Configuration conf = getConf(); Job job = Job.getInstance(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:org.apache.hadoop.examples.WordStandardDeviation.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; }/*from w ww . ja v a 2 s . c om*/ Configuration conf = getConf(); Job job = Job.getInstance(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval./* www . j av a2s .c o m*/ * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); }
From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java
License:Apache License
/** * Test job submission.//w w w. jav a2 s .c o m * * @param noCombiners Whether there are no combiners. * @param noReducers Whether there are no reducers. * @throws Exception If failed. */ public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception { IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("word"); } Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setJobName(JOB_NAME); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (!noCombiners) job.setCombinerClass(TestCombiner.class); if (noReducers) job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TestOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); JobID jobId = job.getJobID(); // Setup phase. JobStatus jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f; assert jobStatus.getMapProgress() == 0.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); JobStatus recentJobStatus = job.getStatus(); assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old=" + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress(); // Transferring to map phase. setupLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getSetupProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); // Map phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress(); // Transferring to reduce phase. mapLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getMapProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); if (!noReducers) { // Reduce phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f; // Ensure that reduces progress increases. U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old=" + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress(); reduceLockFile.delete(); } job.waitForCompletion(false); jobStatus = job.getStatus(); checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() == 1.0f; dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT)); }
From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval./*from w ww . j a v a2 s. c o m*/ * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); }
From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Test job submission.//from ww w . ja v a 2 s.c o m * * @param noCombiners Whether there are no combiners. * @param noReducers Whether there are no reducers. * @throws Exception If failed. */ public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("word"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); job.setJobName(JOB_NAME); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (!noCombiners) job.setCombinerClass(TestCombiner.class); if (noReducers) job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TestOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); JobID jobId = job.getJobID(); // Setup phase. JobStatus jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f; assert jobStatus.getMapProgress() == 0.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); JobStatus recentJobStatus = job.getStatus(); assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old=" + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress(); // Transferring to map phase. setupLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getSetupProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); // Map phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress(); // Transferring to reduce phase. mapLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getMapProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); if (!noReducers) { // Reduce phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f; // Ensure that reduces progress increases. U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old=" + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress(); reduceLockFile.delete(); } job.waitForCompletion(false); jobStatus = job.getStatus(); checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() == 1.0f; dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT)); }
From source file:org.apache.ignite.internal.processors.hadoop.examples.GridHadoopWordCount2.java
License:Apache License
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change./*from w w w. j av a2 s. co m*/ * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) { if (setMapper) { job.setMapperClass(GridHadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(GridHadoopWordCount2Reducer.class); if (setReducer) { job.setReducerClass(GridHadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } }