List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:org.apache.ignite.internal.processors.hadoop.examples.HadoopWordCount2.java
License:Apache License
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change.//from ww w .ja v a 2 s . c o m * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(HadoopWordCount2Reducer.class); if (setReducer) { job.setReducerClass(HadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java
License:Apache License
/** * @param combiner With combiner./* www .j av a2s . co m*/ * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { vals.clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(30000); assertTrue(vals.isEmpty()); }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopJobTrackerSelfTest.java
License:Apache License
/** * @throws Exception If failed.//from ww w . jav a 2s . c o m */ public void testTaskWithCombinerPerMap() throws Exception { try { UUID globalId = UUID.randomUUID(); Job job = Job.getInstance(); setupFileSystems(job.getConfiguration()); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); job.setCombinerClass(TestCombiner.class); job.setInputFormatClass(InFormat.class); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2")); GridHadoopJobId jobId = new GridHadoopJobId(globalId, 1); grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); checkStatus(jobId, false); info("Releasing map latch."); latch.get("mapAwaitLatch").countDown(); checkStatus(jobId, false); // All maps are completed. We have a combiner, so no reducers should be executed // before combiner latch is released. U.sleep(50); assertEquals(0, reduceExecCnt.get()); info("Releasing combiner latch."); latch.get("combineAwaitLatch").countDown(); checkStatus(jobId, false); info("Releasing reduce latch."); latch.get("reduceAwaitLatch").countDown(); checkStatus(jobId, true); assertEquals(10, mapExecCnt.get()); assertEquals(10, combineExecCnt.get()); assertEquals(1, reduceExecCnt.get()); } finally { // Safety. latch.get("mapAwaitLatch").countDown(); latch.get("combineAwaitLatch").countDown(); latch.get("reduceAwaitLatch").countDown(); } }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java
License:Apache License
/** * @throws Exception If failed.//from www . j ava2 s .co m */ public void testMapCombineRun() throws Exception { int lineCnt = 10001; String fileName = "/testFile"; prepareFile(fileName, lineCnt); totalLineCnt.set(0); taskWorkDirs.clear(); Configuration cfg = new Configuration(); cfg.setStrings("fs.igfs.impl", IgfsHadoopFileSystem.class.getName()); cfg.setBoolean(MAP_WRITE, true); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestCombiner.class); job.setReducerClass(TestReducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output")); job.setJarByClass(getClass()); GridHadoopJobId jobId = new GridHadoopJobId(UUID.randomUUID(), 2); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); fut.get(); assertEquals(lineCnt, totalLineCnt.get()); assertEquals(34, taskWorkDirs.size()); for (int g = 0; g < gridCount(); g++) grid(g).hadoop().finishFuture(jobId).get(); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java
License:Apache License
/** * @param combiner With combiner.//w w w .j a va 2s . co m * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { vals.clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(30000); assertTrue(vals.isEmpty()); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopJobTrackerSelfTest.java
License:Apache License
/** * @throws Exception If failed./*from ww w . j av a 2 s.c o m*/ */ public void testTaskWithCombinerPerMap() throws Exception { try { UUID globalId = UUID.randomUUID(); Job job = Job.getInstance(); setupFileSystems(job.getConfiguration()); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); job.setCombinerClass(TestCombiner.class); job.setInputFormatClass(InFormat.class); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2")); HadoopJobId jobId = new HadoopJobId(globalId, 1); grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); checkStatus(jobId, false); info("Releasing map latch."); latch.get("mapAwaitLatch").countDown(); checkStatus(jobId, false); // All maps are completed. We have a combiner, so no reducers should be executed // before combiner latch is released. U.sleep(50); assertEquals(0, reduceExecCnt.get()); info("Releasing combiner latch."); latch.get("combineAwaitLatch").countDown(); checkStatus(jobId, false); info("Releasing reduce latch."); latch.get("reduceAwaitLatch").countDown(); checkStatus(jobId, true); assertEquals(10, mapExecCnt.get()); assertEquals(10, combineExecCnt.get()); assertEquals(1, reduceExecCnt.get()); } finally { // Safety. latch.get("mapAwaitLatch").countDown(); latch.get("combineAwaitLatch").countDown(); latch.get("reduceAwaitLatch").countDown(); } }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopTaskExecutionSelfTest.java
License:Apache License
/** * @throws Exception If failed.//from w w w. j av a 2 s . c om */ public void testMapCombineRun() throws Exception { int lineCnt = 10001; String fileName = "/testFile"; prepareFile(fileName, lineCnt); totalLineCnt.set(0); taskWorkDirs.clear(); Configuration cfg = new Configuration(); cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName()); cfg.setBoolean(MAP_WRITE, true); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestCombiner.class); job.setReducerClass(TestReducer.class); job.setNumReduceTasks(2); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output")); job.setJarByClass(getClass()); HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 2); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); fut.get(); assertEquals(lineCnt, totalLineCnt.get()); assertEquals(34, taskWorkDirs.size()); for (int g = 0; g < gridCount(); g++) grid(g).hadoop().finishFuture(jobId).get(); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Tests job counters retrieval./*w ww .j a v a 2 s . c om*/ * * @throws Exception If failed. */ public void testJobCounters() throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); try { job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestCountingMapper.class); job.setReducerClass(TestCountingReducer.class); job.setCombinerClass(TestCountingCombiner.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT)); job.submit(); final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1); assertEquals(0, cntr.getValue()); cntr.increment(10); assertEquals(10, cntr.getValue()); // Transferring to map phase. setupLockFile.delete(); // Transferring to reduce phase. mapLockFile.delete(); job.waitForCompletion(false); assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState()); final Counters counters = job.getCounters(); assertNotNull("counters cannot be null", counters); assertEquals("wrong counters count", 3, counters.countCounters()); assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue()); assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue()); } catch (Throwable t) { log.error("Unexpected exception", t); } finally { job.getCluster().close(); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java
License:Apache License
/** * Test job submission.//from www . j av a2 s . c om * * @param noCombiners Whether there are no combiners. * @param noReducers Whether there are no reducers. * @throws Exception If failed. */ public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception { IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName); igfs.mkdirs(new IgfsPath(PATH_INPUT)); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) { bw.write("word"); } Configuration conf = config(HadoopAbstractSelfTest.REST_PORT); final Job job = Job.getInstance(conf); try { job.setJobName(JOB_NAME); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (!noCombiners) job.setCombinerClass(TestCombiner.class); if (noReducers) job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TestOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT)); job.submit(); JobID jobId = job.getJobID(); // Setup phase. JobStatus jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f; assert jobStatus.getMapProgress() == 0.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); JobStatus recentJobStatus = job.getStatus(); assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old=" + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress(); // Transferring to map phase. setupLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getSetupProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); // Map phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f; assert jobStatus.getReduceProgress() == 0.0f; U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress(); // Transferring to reduce phase. mapLockFile.delete(); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { return F.eq(1.0f, job.getStatus().getMapProgress()); } catch (Exception e) { throw new RuntimeException("Unexpected exception.", e); } } }, 5000L); if (!noReducers) { // Reduce phase. jobStatus = job.getStatus(); checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f; // Ensure that reduces progress increases. U.sleep(2100); recentJobStatus = job.getStatus(); assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old=" + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress(); reduceLockFile.delete(); } job.waitForCompletion(false); jobStatus = job.getStatus(); checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f); assert jobStatus.getSetupProgress() == 1.0f; assert jobStatus.getMapProgress() == 1.0f; assert jobStatus.getReduceProgress() == 1.0f; dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT)); } finally { job.getCluster().close(); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2.java
License:Apache License
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change./* w w w .j ava2s. com*/ * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer, boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(HadoopWordCount2Combiner.class); if (setReducer) { job.setReducerClass(HadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } if (outputCompression) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); } }