Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner.//from w ww. j ava2s .c  o  m
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    HadoopGroupingTestState.values().clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(HadoopGroupingTestState.values().isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//  w  w  w .  ja v a 2 s.  c  o  m
 */
public void testMapCombineRun() throws Exception {
    int lineCnt = 10001;
    String fileName = "/testFile";

    prepareFile(fileName, lineCnt);

    totalLineCnt.set(0);
    taskWorkDirs.clear();

    Configuration cfg = new Configuration();

    cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
    cfg.setBoolean(MAP_WRITE, true);

    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestCombiner.class);
    job.setReducerClass(TestReducer.class);

    job.setNumReduceTasks(2);

    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/"));

    job.setJarByClass(getClass());

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 2);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

    fut.get();

    assertEquals(lineCnt, totalLineCnt.get());

    assertEquals(34, taskWorkDirs.size());

    for (int g = 0; g < gridCount(); g++)
        grid(g).hadoop().finishFuture(jobId).get();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.taskexecutor.external.HadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from  w w w. j a v a2s  .c  o m
 */
public void testSimpleTaskSubmit() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job,
            new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    fut.get();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.taskexecutor.external.HadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.// w w  w .  ja  v  a  2s.  co  m
 */
public void testMapperException() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestFailingMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job,
            new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    try {
        fut.get();
    } catch (IgniteCheckedException e) {
        IOException exp = X.cause(e, IOException.class);

        assertNotNull(exp);
        assertEquals("Test failure", exp.getMessage());
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.taskexecutor.external.GridHadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//  w  w w .  j a  va 2  s .c  o  m
 */
public void testSimpleTaskSubmit() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    fut.get();
}

From source file:org.apache.ignite.internal.processors.hadoop.taskexecutor.external.GridHadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.// w w  w.  jav a 2 s.co m
 */
public void testMapperException() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestFailingMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    try {
        fut.get();
    } catch (IgniteCheckedException e) {
        IOException exp = X.cause(e, IOException.class);

        assertNotNull(exp);
        assertEquals("Test failure", exp.getMessage());
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.taskexecutor.external.HadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.// www . j a va  2  s.c  o m
 */
public void testSimpleTaskSubmit() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    fut.get();
}

From source file:org.apache.ignite.internal.processors.hadoop.taskexecutor.external.HadoopExternalTaskExecutionSelfTest.java

License:Apache License

/**
 * @throws Exception If failed.//from  w ww  .j  a v a 2s  . com
 */
public void testMapperException() throws Exception {
    String testInputFile = "/test";

    prepareTestFile(testInputFile);

    Configuration cfg = new Configuration();

    setupFileSystems(cfg);

    Job job = Job.getInstance(cfg);

    job.setMapperClass(TestFailingMapper.class);
    job.setCombinerClass(TestReducer.class);
    job.setReducerClass(TestReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestGridName(0) + "@/" + testInputFile));
    FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestGridName(0) + "@/output"));

    job.setJarByClass(getClass());

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
            createJobInfo(job.getConfiguration()));

    try {
        fut.get();
    } catch (IgniteCheckedException e) {
        IOException exp = X.cause(e, IOException.class);

        assertNotNull(exp);
        assertEquals("Test failure", exp.getMessage());
    }
}

From source file:org.apache.jena.tdbloader4.StatsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w w w  . j a v a  2 s.c  om*/

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName(Constants.NAME_STATS);
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(NQuadsInputFormat.class);

    job.setMapperClass(StatsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(StatsReducer.class);

    job.setReducerClass(StatsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // we use the combiner, 1 reducer here is not a problem
    configuration.set(Constants.OPTION_NUM_REDUCERS, "1");
    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.apache.mahout.avro.text.mapreduce.AvroDocumentsWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/* ww  w  .  j a  v a2s .c om*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    Job job = new Job(conf, "word count");
    job.setJarByClass(AvroDocumentsWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(AvroInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroInputFormat.setAvroInputClass(job, AvroDocument.class);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}