Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:org.apache.ignite.internal.processors.hadoop.examples.HadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.//from ww w  .ja  v a  2 s  .  c o  m
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Reducer.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*  www  .j  av  a2s  . co  m*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopJobTrackerSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   ww  w . jav  a  2s .  c o  m
 */
public void testTaskWithCombinerPerMap() throws Exception {
    try {
        UUID globalId = UUID.randomUUID();

        Job job = Job.getInstance();
        setupFileSystems(job.getConfiguration());

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);
        job.setCombinerClass(TestCombiner.class);
        job.setInputFormatClass(InFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2"));

        GridHadoopJobId jobId = new GridHadoopJobId(globalId, 1);

        grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

        checkStatus(jobId, false);

        info("Releasing map latch.");

        latch.get("mapAwaitLatch").countDown();

        checkStatus(jobId, false);

        // All maps are completed. We have a combiner, so no reducers should be executed
        // before combiner latch is released.

        U.sleep(50);

        assertEquals(0, reduceExecCnt.get());

        info("Releasing combiner latch.");

        latch.get("combineAwaitLatch").countDown();

        checkStatus(jobId, false);

        info("Releasing reduce latch.");

        latch.get("reduceAwaitLatch").countDown();

        checkStatus(jobId, true);

        assertEquals(10, mapExecCnt.get());
        assertEquals(10, combineExecCnt.get());
        assertEquals(1, reduceExecCnt.get());
    } finally {
        // Safety.
        latch.get("mapAwaitLatch").countDown();
        latch.get("combineAwaitLatch").countDown();
        latch.get("reduceAwaitLatch").countDown();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   www .  j  ava2 s .co m
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.igfs.impl", IgfsHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    GridHadoopJobId jobId = new GridHadoopJobId(UUID.randomUUID(), 2);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner.//w w  w  .j  a  va  2s  .  co m
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopJobTrackerSelfTest.java

License:Apache License

/**
 * @throws Exception If failed./*from ww w  . j  av  a 2 s.c  o m*/
 */
public void testTaskWithCombinerPerMap() throws Exception {
    try {
        UUID globalId = UUID.randomUUID();

        Job job = Job.getInstance();
        setupFileSystems(job.getConfiguration());

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);
        job.setCombinerClass(TestCombiner.class);
        job.setInputFormatClass(InFormat.class);

        FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2"));

        HadoopJobId jobId = new HadoopJobId(globalId, 1);

        grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

        checkStatus(jobId, false);

        info("Releasing map latch.");

        latch.get("mapAwaitLatch").countDown();

        checkStatus(jobId, false);

        // All maps are completed. We have a combiner, so no reducers should be executed
        // before combiner latch is released.

        U.sleep(50);

        assertEquals(0, reduceExecCnt.get());

        info("Releasing combiner latch.");

        latch.get("combineAwaitLatch").countDown();

        checkStatus(jobId, false);

        info("Releasing reduce latch.");

        latch.get("reduceAwaitLatch").countDown();

        checkStatus(jobId, true);

        assertEquals(10, mapExecCnt.get());
        assertEquals(10, combineExecCnt.get());
        assertEquals(1, reduceExecCnt.get());
    } finally {
        // Safety.
        latch.get("mapAwaitLatch").countDown();
        latch.get("combineAwaitLatch").countDown();
        latch.get("reduceAwaitLatch").countDown();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from   w  w w.  j av  a 2  s .  c  om
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 2);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./*w  ww  .j a  v  a  2 s  .  c om*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    try {
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(TestCountingMapper.class);
        job.setReducerClass(TestCountingReducer.class);
        job.setCombinerClass(TestCountingCombiner.class);

        FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@" + PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@" + PATH_OUTPUT));

        job.submit();

        final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

        assertEquals(0, cntr.getValue());

        cntr.increment(10);

        assertEquals(10, cntr.getValue());

        // Transferring to map phase.
        setupLockFile.delete();

        // Transferring to reduce phase.
        mapLockFile.delete();

        job.waitForCompletion(false);

        assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

        final Counters counters = job.getCounters();

        assertNotNull("counters cannot be null", counters);
        assertEquals("wrong counters count", 3, counters.countCounters());
        assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
        assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
    } catch (Throwable t) {
        log.error("Unexpected exception", t);
    } finally {
        job.getCluster().close();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.client.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//from www .  j  av  a2  s  .  c om
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    try {
        job.setJobName(JOB_NAME);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);

        if (!noCombiners)
            job.setCombinerClass(TestCombiner.class);

        if (noReducers)
            job.setNumReduceTasks(0);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TestOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

        job.submit();

        JobID jobId = job.getJobID();

        // Setup phase.
        JobStatus jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
        assert jobStatus.getMapProgress() == 0.0f;
        assert jobStatus.getReduceProgress() == 0.0f;

        U.sleep(2100);

        JobStatus recentJobStatus = job.getStatus();

        assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
                + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

        // Transferring to map phase.
        setupLockFile.delete();

        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getSetupProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);

        // Map phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
        assert jobStatus.getReduceProgress() == 0.0f;

        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old="
                + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress();

        // Transferring to reduce phase.
        mapLockFile.delete();

        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getMapProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);

        if (!noReducers) {
            // Reduce phase.
            jobStatus = job.getStatus();
            checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
            assert jobStatus.getSetupProgress() == 1.0f;
            assert jobStatus.getMapProgress() == 1.0f;
            assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

            // Ensure that reduces progress increases.
            U.sleep(2100);

            recentJobStatus = job.getStatus();

            assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                    + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

            reduceLockFile.delete();
        }

        job.waitForCompletion(false);

        jobStatus = job.getStatus();
        checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() == 1.0f;

        dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
    } finally {
        job.getCluster().close();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change./*  w  w w  .j ava2s. com*/
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}