Example usage for org.apache.hadoop.mapred JobConf setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the user-defined combiner class used to combine map-outputs before being sent to the reducers.

Usage

From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();//from   ww w.jav  a  2 s.  c  om
    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();
    TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr");
    TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-map-xml", transConfig.getXML());

    transMeta = new TransMeta("./test-res/wordcount-reducer.ktr");
    transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-reduce-xml", transConfig.getXML());

    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");

    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    conf.setCombinerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals(
            "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n",
            output);
}

From source file:org.pentaho.wordcount.LocalWordCount.java

License:Open Source License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker./*from w w  w  . j a v a 2 s. co m*/
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), LocalWordCount.class);
    conf.setJobName("wordcount");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(GenericTransReduce.class);
    conf.setReducerClass(GenericTransReduce.class);

    //  these are set so the job is run in the same
    //  JVM as the debugger - we are not submitting 
    //  to MR Node.
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "local");

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:org.sf.xrime.algorithms.layout.radialtree.Raw2OutLabeledAdjVertexTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Raw2OutLabeledAdjVertexTransformer.class);
    jobConf.setJobName("Smth - OutLabeledAdjVertexesList");

    jobConf.setMapperClass(MapClass.class);
    jobConf.setCombinerClass(ReduceClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // the keys are author names (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes (Writable)
    jobConf.setOutputValueClass(LabeledAdjVertex.class);

    // makes the file format suitable for machine processing.
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);/*ww  w  .j a  va 2s.  c o m*/
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetMinorExpand.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, EdgeSetMinorExpand.class);
    conf.setJobName("EdgeSetMinorExpand");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(EdgeSet.class);
    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {/*from w w  w  .  jav  a2 s.c  om*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());
    conf.setCompressMapOutput(true);
    conf.setMapOutputCompressorClass(GzipCodec.class);

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetMinorJoin.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, EdgeSetMinorJoin.class);
    conf.setJobName("EdgeSetMinorJoin");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(EdgeSet.class);
    conf.setMapperClass(MapClass.class);
    // Combiner is permitted here.
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {/*from   w  ww .j a v a  2  s.c o  m*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());
    conf.setCompressMapOutput(true);
    conf.setMapOutputCompressorClass(GzipCodec.class);

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.bi.EdgeSetSummarize.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, EdgeSetSummarize.class);
    conf.setJobName("EdgeSetSummarize");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(EdgeSet.class);
    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {//w  w  w  .j ava2 s .co  m
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());
    conf.setCompressMapOutput(true);
    conf.setMapOutputCompressorClass(GzipCodec.class);

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.strongly.LabelPropagation.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, LabelPropagation.class);
    conf.setJobName("LabelPropagation");

    // the keys are vertex identifiers (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    conf.setOutputValueClass(LabeledAdjBiSetVertex.class);
    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {/*from w  w  w  .ja va 2 s.  com*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());
    conf.setCompressMapOutput(true);
    conf.setMapOutputCompressorClass(GzipCodec.class);

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetMinorExpand.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, VertexSetMinorExpand.class);
    conf.setJobName("VertexSetMinorExpand");

    // the keys are vertex identifiers (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are ultimate vertex set identifiers/label (Writable)
    conf.setOutputValueClass(VertexSet.class);
    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {//w ww . j  a v  a 2 s  .  c  o m
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetMinorJoin.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, VertexSetMinorJoin.class);
    conf.setJobName("VertexSetMinorJoin");

    // the keys are vertex identifiers (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are vertex sets (Writable)
    conf.setOutputValueClass(VertexSet.class);
    conf.setMapperClass(MapClass.class);
    // Combiner is permitted here.
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {/*from w  w w  .  j a  v  a  2s.c om*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.weakly.alg_1.VertexSetSummarize.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, VertexSetSummarize.class);
    conf.setJobName("VertexSetSummarize");

    // the keys are vertex labels (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are vertex sets (Writable)
    conf.setOutputValueClass(VertexSet.class);
    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    try {/*from ww w  . j  av  a2s  .c o m*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}