Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:org.apache.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from   ww  w. j  av a2s . co m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");
        sortJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:org.apache.hadoop.examples.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/* w w  w .  j a v a 2  s  . c om*/
        return 2;
    }

    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.hadoop.examples.WordMean.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }//w  ww .  j av  a2s  .c om

    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word mean");
    job.setJarByClass(WordMean.class);
    job.setMapperClass(WordMeanMapper.class);
    job.setCombinerClass(WordMeanReducer.class);
    job.setReducerClass(WordMeanReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.hadoop.examples.WordMedian.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }//from  w w  w .  j av a 2 s.c om

    setConf(new Configuration());
    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.hadoop.examples.WordStandardDeviation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }/*from  w  ww .  ja v  a 2  s .  c om*/

    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word stddev");
    job.setJarByClass(WordStandardDeviation.class);
    job.setMapperClass(WordStandardDeviationMapper.class);
    job.setCombinerClass(WordStandardDeviationReducer.class);
    job.setReducerClass(WordStandardDeviationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./* www . j  av a2s .c  o  m*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//w w w.  jav  a2 s  .c o  m
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setJobName(JOB_NAME);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setReducerClass(TestReducer.class);

    if (!noCombiners)
        job.setCombinerClass(TestCombiner.class);

    if (noReducers)
        job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TestOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    JobID jobId = job.getJobID();

    // Setup phase.
    JobStatus jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
    assert jobStatus.getMapProgress() == 0.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    JobStatus recentJobStatus = job.getStatus();

    assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
            + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

    // Transferring to map phase.
    setupLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getSetupProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    // Map phase.
    jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    recentJobStatus = job.getStatus();

    assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress()
            + ", new=" + recentJobStatus.getMapProgress();

    // Transferring to reduce phase.
    mapLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getMapProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    if (!noReducers) {
        // Reduce phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

        // Ensure that reduces progress increases.
        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

        reduceLockFile.delete();
    }

    job.waitForCompletion(false);

    jobStatus = job.getStatus();
    checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() == 1.0f;
    assert jobStatus.getReduceProgress() == 1.0f;

    dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./*from w  ww . j  a v a2 s.  c  o  m*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//from   ww w .  ja  v  a 2  s.c  o m
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setJobName(JOB_NAME);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setReducerClass(TestReducer.class);

    if (!noCombiners)
        job.setCombinerClass(TestCombiner.class);

    if (noReducers)
        job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TestOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    JobID jobId = job.getJobID();

    // Setup phase.
    JobStatus jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
    assert jobStatus.getMapProgress() == 0.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    JobStatus recentJobStatus = job.getStatus();

    assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
            + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

    // Transferring to map phase.
    setupLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getSetupProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    // Map phase.
    jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    recentJobStatus = job.getStatus();

    assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress()
            + ", new=" + recentJobStatus.getMapProgress();

    // Transferring to reduce phase.
    mapLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getMapProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    if (!noReducers) {
        // Reduce phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

        // Ensure that reduces progress increases.
        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

        reduceLockFile.delete();
    }

    job.waitForCompletion(false);

    jobStatus = job.getStatus();
    checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() == 1.0f;
    assert jobStatus.getReduceProgress() == 1.0f;

    dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
}

From source file:org.apache.ignite.internal.processors.hadoop.examples.GridHadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change./*from   w  w  w. j  av a2  s. co  m*/
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
        job.setMapperClass(GridHadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(GridHadoopWordCount2Reducer.class);

    if (setReducer) {
        job.setReducerClass(GridHadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }
}