Example usage for org.apache.hadoop.mapreduce Job getCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCombinerClass.

Prototype

@SuppressWarnings("unchecked")
public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException

Source Link

Document

Get the combiner class for the job.

Usage

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }//  w  ww  .j av a  2  s .c o m

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testBuildValid() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class).withCombiner(MyCombiner.class)
            .withReducer(MyReducer.class)
            .withOutput(MapReduceJobOutputs.newTextMapReduceJobOutput(new Path("mypath"), 10)).build();

    // TODO: Verify that the MR Job was configured correctly.
    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(MyCombiner.class, job.getCombinerClass());
    assertEquals(MyReducer.class, job.getReducerClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(GatherToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();//from   ww  w  .  j  av  a 2  s.c o  m

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(GatherToHFile.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(FijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.moz.fiji.mapreduce.TestFijiGatherJobBuilder.java

License:Apache License

@Test
public void testGatherReducerToHFile() throws Exception {
    final FijiMapReduceJob gatherJob = FijiGatherJobBuilder.create().withConf(getConf())
            .withInputTable(mTable.getURI()).withGatherer(SimpleGatherer.class)
            .withReducer(ReducerToHFile.class).withOutput(MapReduceJobOutputs
                    .newHFileMapReduceJobOutput(mTable.getURI(), getLocalTestPath("hfile"), 10))
            .build();//w  w w. ja  v a2 s .c o  m

    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(FijiConfKeys.FIJI_GATHERER_CLASS));
    assertEquals(null, job.getCombinerClass());
    assertEquals(ReducerToHFile.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(SequenceFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(HFileKeyValue.class, job.getOutputKeyClass());
    assertEquals(NullWritable.class, job.getOutputValueClass());
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*from w w  w .ja v  a  2s .  c  om*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java

License:Open Source License

/**
 * Add the HBase dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*  w ww .jav  a 2  s  .  c  o  m*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(),
                job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java

License:Apache License

/**
 * Add the Blur dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *//*from w  ww.j  av a 2  s. com*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class);
        addAllJarsInBlurLib(job.getConfiguration());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the Kudu dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *///from  ww  w.j  a  v  a  2s  .  co m
public static void addDependencyJars(Job job) throws IOException {
    addKuduDependencyJars(job.getConfiguration());
    try {
        addDependencyJars(job.getConfiguration(),
                // when making changes here, consider also mapred.TableMapReduceUtil
                // pull job classes
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the HBase dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *//* w w w . j av a2  s. c  om*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                org.apache.hadoop.hbase.util.Bytes.class, // one class from
                // hbase.jar
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.kiji.mapreduce.TestKijiGatherJobBuilder.java

License:Apache License

@Test
public void testBuildValid() throws Exception {
    final MapReduceJob gatherJob = KijiGatherJobBuilder.create().withConf(getConf()).withInputTable(mTable)
            .withGatherer(SimpleGatherer.class).withCombiner(MyCombiner.class).withReducer(MyReducer.class)
            .withOutput(new TextMapReduceJobOutput(new Path("mypath"), 10)).build();

    // TODO: Verify that the MR Job was configured correctly.
    final Job job = gatherJob.getHadoopJob();
    final Configuration conf = job.getConfiguration();
    assertEquals(SimpleGatherer.class.getName(), conf.get(KijiConfKeys.KIJI_GATHERER_CLASS));
    assertEquals(MyCombiner.class, job.getCombinerClass());
    assertEquals(MyReducer.class, job.getReducerClass());
}