Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:org.apache.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from   ww  w. j  av a2s . co m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");
        sortJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:org.apache.hadoop.examples.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();/* w w  w .  j a v a 2  s  . c om*/
        return 2;
    }

    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.hadoop.examples.WordMean.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }//w  ww .  j av  a2s  .c om

    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word mean");
    job.setJarByClass(WordMean.class);
    job.setMapperClass(WordMeanMapper.class);
    job.setCombinerClass(WordMeanReducer.class);
    job.setReducerClass(WordMeanReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.hadoop.examples.WordMedian.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }//from  w w  w .  j av a 2 s.c om

    setConf(new Configuration());
    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.hadoop.examples.WordStandardDeviation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }/*from  w  ww .  ja v  a 2  s .  c om*/

    Configuration conf = getConf();

    Job job = Job.getInstance(conf, "word stddev");
    job.setJarByClass(WordStandardDeviation.class);
    job.setMapperClass(WordStandardDeviationMapper.class);
    job.setCombinerClass(WordStandardDeviationReducer.class);
    job.setReducerClass(WordStandardDeviationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./* www . j  av a2s .c  o  m*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.client.hadoop.GridHadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//w w w.  jav  a2 s  .c o  m
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFs igfs = grid(0).fileSystem(GridHadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(GridHadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setJobName(JOB_NAME);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setReducerClass(TestReducer.class);

    if (!noCombiners)
        job.setCombinerClass(TestCombiner.class);

    if (noReducers)
        job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TestOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    JobID jobId = job.getJobID();

    // Setup phase.
    JobStatus jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
    assert jobStatus.getMapProgress() == 0.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    JobStatus recentJobStatus = job.getStatus();

    assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
            + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

    // Transferring to map phase.
    setupLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getSetupProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    // Map phase.
    jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    recentJobStatus = job.getStatus();

    assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress()
            + ", new=" + recentJobStatus.getMapProgress();

    // Transferring to reduce phase.
    mapLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getMapProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    if (!noReducers) {
        // Reduce phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

        // Ensure that reduces progress increases.
        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

        reduceLockFile.delete();
    }

    job.waitForCompletion(false);

    jobStatus = job.getStatus();
    checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() == 1.0f;
    assert jobStatus.getReduceProgress() == 1.0f;

    dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Tests job counters retrieval./*from w  ww . j  a v a2 s.  c  o  m*/
 *
 * @throws Exception If failed.
 */
public void testJobCounters() throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n" + "gamma\n" + "alpha\n" + "beta\n"
                + "gamma\n");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestCountingMapper.class);
    job.setReducerClass(TestCountingReducer.class);
    job.setCombinerClass(TestCountingCombiner.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    final Counter cntr = job.getCounters().findCounter(TestCounter.COUNTER1);

    assertEquals(0, cntr.getValue());

    cntr.increment(10);

    assertEquals(10, cntr.getValue());

    // Transferring to map phase.
    setupLockFile.delete();

    // Transferring to reduce phase.
    mapLockFile.delete();

    job.waitForCompletion(false);

    assertEquals("job must end successfully", JobStatus.State.SUCCEEDED, job.getStatus().getState());

    final Counters counters = job.getCounters();

    assertNotNull("counters cannot be null", counters);
    assertEquals("wrong counters count", 3, counters.countCounters());
    assertEquals("wrong counter value", 15, counters.findCounter(TestCounter.COUNTER1).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER2).getValue());
    assertEquals("wrong counter value", 3, counters.findCounter(TestCounter.COUNTER3).getValue());
}

From source file:org.apache.ignite.client.hadoop.HadoopClientProtocolSelfTest.java

License:Apache License

/**
 * Test job submission.//from   ww w .  ja  v  a 2  s.c  o m
 *
 * @param noCombiners Whether there are no combiners.
 * @param noReducers Whether there are no reducers.
 * @throws Exception If failed.
 */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);

    igfs.mkdirs(new IgfsPath(PATH_INPUT));

    try (BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {

        bw.write("word");
    }

    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);

    final Job job = Job.getInstance(conf);

    job.setJobName(JOB_NAME);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setReducerClass(TestReducer.class);

    if (!noCombiners)
        job.setCombinerClass(TestCombiner.class);

    if (noReducers)
        job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TestOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));

    job.submit();

    JobID jobId = job.getJobID();

    // Setup phase.
    JobStatus jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
    assert jobStatus.getMapProgress() == 0.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    JobStatus recentJobStatus = job.getStatus();

    assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old="
            + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();

    // Transferring to map phase.
    setupLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getSetupProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    // Map phase.
    jobStatus = job.getStatus();
    checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
    assert jobStatus.getReduceProgress() == 0.0f;

    U.sleep(2100);

    recentJobStatus = job.getStatus();

    assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress()
            + ", new=" + recentJobStatus.getMapProgress();

    // Transferring to reduce phase.
    mapLockFile.delete();

    assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
        @Override
        public boolean apply() {
            try {
                return F.eq(1.0f, job.getStatus().getMapProgress());
            } catch (Exception e) {
                throw new RuntimeException("Unexpected exception.", e);
            }
        }
    }, 5000L);

    if (!noReducers) {
        // Reduce phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;

        // Ensure that reduces progress increases.
        U.sleep(2100);

        recentJobStatus = job.getStatus();

        assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old="
                + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();

        reduceLockFile.delete();
    }

    job.waitForCompletion(false);

    jobStatus = job.getStatus();
    checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
    assert jobStatus.getSetupProgress() == 1.0f;
    assert jobStatus.getMapProgress() == 1.0f;
    assert jobStatus.getReduceProgress() == 1.0f;

    dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
}

From source file:org.apache.ignite.internal.processors.hadoop.examples.GridHadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change./*from   w  w  w. j  av a2  s. co  m*/
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
        job.setMapperClass(GridHadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(GridHadoopWordCount2Reducer.class);

    if (setReducer) {
        job.setReducerClass(GridHadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }
}