Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:org.apache.ignite.internal.processors.hadoop.examples.HadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.//from ww w  .ja  v a  2 s  .  c o  m
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Reducer.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*  www  .j  av  a2s  . co  m*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopJobTrackerSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   ww  w . jav  a  2s .  c o  m
 */
public void testTaskWithCombinerPerMap() throws Exception {
    try {
        UUID globalId = UUID.randomUUID();

        Job job = Job.getInstance();
        setupFileSystems(job.getConfiguration());

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);
        job.setCombinerClass(TestCombiner.class);
        job.setInputFormatClass(InFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2"));

        GridHadoopJobId jobId = new GridHadoopJobId(globalId, 1);

        grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

        checkStatus(jobId, false);

        info("Releasing map latch.");

        latch.get("mapAwaitLatch").countDown();

        checkStatus(jobId, false);

        // All maps are completed. We have a combiner, so no reducers should be executed
        // before combiner latch is released.

        U.sleep(50);

        assertEquals(0, reduceExecCnt.get());

        info("Releasing combiner latch.");

        latch.get("combineAwaitLatch").countDown();

        checkStatus(jobId, false);

        info("Releasing reduce latch.");

        latch.get("reduceAwaitLatch").countDown();

        checkStatus(jobId, true);

        assertEquals(10, mapExecCnt.get());
        assertEquals(10, combineExecCnt.get());
        assertEquals(1, reduceExecCnt.get());
    } finally {
        // Safety.
        latch.get("mapAwaitLatch").countDown();
        latch.get("combineAwaitLatch").countDown();
        latch.get("reduceAwaitLatch").countDown();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   www .  j  ava2 s .co m
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.igfs.impl", IgfsHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    GridHadoopJobId jobId = new GridHadoopJobId(UUID.randomUUID(), 2);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner.//w w  w  .j  a  va  2s  .  co m
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopJobTrackerSelfTest.java

License:Apache License

/**
 * @throws Exception If failed./*from ww w  . j  av  a 2 s.c  o m*/
 */
public void testTaskWithCombinerPerMap() throws Exception {
    try {
        UUID globalId = UUID.randomUUID();

        Job job = Job.getInstance();
        setupFileSystems(job.getConfiguration());

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);
        job.setCombinerClass(TestCombiner.class);
        job.setInputFormatClass(InFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2"));

        HadoopJobId jobId = new HadoopJobId(globalId, 1);

        grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

        checkStatus(jobId, false);

        info("Releasing map latch.");

        latch.get("mapAwaitLatch").countDown();

        checkStatus(jobId, false);

        // All maps are completed. We have a combiner, so no reducers should be executed
        // before combiner latch is released.

        U.sleep(50);

        assertEquals(0, reduceExecCnt.get());

        info("Releasing combiner latch.");

        latch.get("combineAwaitLatch").countDown();

        checkStatus(jobId, false);

        info("Releasing reduce latch.");

        latch.get("reduceAwaitLatch").countDown();

        checkStatus(jobId, true);

        assertEquals(10, mapExecCnt.get());
        assertEquals(10, combineExecCnt.get());
        assertEquals(1, reduceExecCnt.get());
    } finally {
        // Safety.
        latch.get("mapAwaitLatch").countDown();
        latch.get("combineAwaitLatch").countDown();
        latch.get("reduceAwaitLatch").countDown();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   w  w w.  j av  a 2  s .  c  om
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 2);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./*w  ww  .j a  v  a  2 s  .  c om*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    try {
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(TestCountingMapper.class);
        job.setReducerClass(TestCountingReducer.class);
        job.setCombinerClass(TestCountingCombiner.class);

        FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT));

        job.submit();

        final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

        assertEquals(0, cntr.getValue());

        cntr.increment(10);

        assertEquals(10, cntr.getValue());

        // Transferring to map phase.
        setupLockFile.delete();

        // Transferring to reduce phase.
        mapLockFile.delete();

        job.waitForCompletion(false);

        assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

        final Counters counters = job.getCounters();

        assertNotNull("counters cannot be null", counters);
        assertEquals("wrong counters count", 3, counters.countCounters());
        assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
    } catch (Throwable t) {
        log.error("Unexpected exception", t);
    } finally {
        job.getCluster().close();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//from www .  j  av  a2  s  .  c om
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    try {
        job.setJobName(JOB_NAME);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);

        if (!noCombiners)
            job.setCombinerClass(TestCombiner.class);

        if (noReducers)
            job.setNumReduceTasks(0);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TestOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

        job.submit();

        JobID jobId = job.getJobID();

        // Setup phase.
        JobStatus jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
        assert jobStatus.getMapProgress() == 0.0f;
        assert jobStatus.getReduceProgress() == 0.0f;

        U.sleep(2100);

        JobStatus recentJobStatus = job.getStatus();

        assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
                + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

        // Transferring to map phase.
        setupLockFile.delete();

        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getSetupProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);

        // Map phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
        assert jobStatus.getReduceProgress() == 0.0f;

        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old="
                + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress();

        // Transferring to reduce phase.
        mapLockFile.delete();

        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getMapProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);

        if (!noReducers) {
            // Reduce phase.
            jobStatus = job.getStatus();
            checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
            assert jobStatus.getSetupProgress() == 1.0f;
            assert jobStatus.getMapProgress() == 1.0f;
            assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

            // Ensure that reduces progress increases.
            U.sleep(2100);

            recentJobStatus = job.getStatus();

            assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                    + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

            reduceLockFile.delete();
        }

        job.waitForCompletion(false);

        jobStatus = job.getStatus();
        checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() == 1.0f;

        dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
    } finally {
        job.getCluster().close();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change./*  w  w w  .j ava2s. com*/
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}