Example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);//from   w w w. ja va  2 s . c o m

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);/*from w  ww .  j  ava  2  s .  com*/

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);// w  w  w  . jav a 2s  .  c o  m

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testReducerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransReduce reducer = new GenericTransReduce();

    reducer.configure(jobConf);//from w  w  w  . j  a  v  a  2  s .c  om

    assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
    assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
}

From source file:org.pentaho.hadoop.sample.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    String hdfsHost = "localhost:9000";
    String jobTrackerHost = "localhost:9001";
    String fsPrefix = "hdfs";

    String dirInput = "/wordcount/input";
    String dirOutput = "/wordcount/output";

    if (args.length == 1 && (args[0].equals("--help") || args[0].equals("-h") || args[0].equals("/?"))) {
        System.out.println("Usage: WordCount <options>");
        System.out.println();// www. j  a  v  a 2s .com
        System.out.println("Options:");
        System.out.println();
        System.out.println("--input=DIR                   The directory containing the input files for the");
        System.out.println("                              WordCount Hadoop job");
        System.out.println("--output=DIR                  The directory where the results of the WordCount");
        System.out.println("                              Hadoop job will be stored");
        System.out.println("--hdfsHost=HOST               The host<:port> of the HDFS service");
        System.out.println("                              e.g.- localhost:9000");
        System.out.println("--jobTrackerHost=HOST         The host<:port> of the job tracker service");
        System.out.println("                              e.g.- localhost:9001");
        System.out.println("--fsPrefix=PREFIX             The prefix to use for for the filesystem");
        System.out.println("                              e.g.- hdfs");
        System.out.println();
        System.out.println();
        System.out.println("If an option is not provided through the command prompt the following defaults");
        System.out.println("will be used:");
        System.out.println("--input='/wordcount/input'");
        System.out.println("--output='/wordcount/output'");
        System.out.println("--hdfsHost=localhost:9000");
        System.out.println("--jobTrackerHost=localhost:9001");
        System.out.println("--fsPrefix=hdfs");

    } else {
        if (args.length > 0) {
            for (String arg : args) {
                if (arg.startsWith("--input=")) {
                    dirInput = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--output=")) {
                    dirOutput = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--hdfsHost=")) {
                    hdfsHost = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--jobTrackerHost=")) {
                    jobTrackerHost = WordCount.getArgValue(arg);
                } else if (arg.startsWith("--fsPrefix=")) {
                    fsPrefix = WordCount.getArgValue(arg);
                }
            }
        }

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("WordCount");

        String hdfsBaseUrl = fsPrefix + "://" + hdfsHost;
        conf.set("fs.default.name", hdfsBaseUrl + "/");
        if (jobTrackerHost != null && jobTrackerHost.length() > 0) {
            conf.set("mapred.job.tracker", jobTrackerHost);
        }

        FileInputFormat.setInputPaths(conf, new Path[] { new Path(hdfsBaseUrl + dirInput) });
        FileOutputFormat.setOutputPath(conf, new Path(hdfsBaseUrl + dirOutput));

        conf.setMapperClass(WordCountMapper.class);
        conf.setReducerClass(WordCountReducer.class);

        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(IntWritable.class);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        JobClient.runJob(conf);
    }
}

From source file:org.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf(baseConf, MeanChiSquareDistanceCalculation.class);
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*from  ww  w  .ja va  2 s .  c o m*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    conf.setJobName("mean_chi_square_calculation");

    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name- " + conf.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(DoubleWritable.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setOutputFormat(TextOutputFormat.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, args[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, args[0]);

    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    JobClient.runJob(conf);
}

From source file:org.sf.xrime.algorithms.BC.Pajek2LabeledAdjBiSetVertexTransformer.java

License:Apache License

public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Pajek2LabeledAdjBiSetVertexTransformer.class);
    jobConf.setJobName("tansfer_pajek2LabeledAdjBiSetvert");

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(LabeledAdjBiSetVertex.class);
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);
    jobConf.setNumMapTasks(mapperNum);/*  www .  jav  a  2  s .  c o m*/
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.BCApproximation.Pajek2LabeledAdjBiSetVertexTransformer.java

License:Apache License

public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, Pajek2LabeledAdjBiSetVertexTransformer.class);
    jobConf.setJobName("tansfer_pajek2LabeledAdjBiSetvert");

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);

    jobConf.setOutputValueClass(LabeledAdjBiSetVertex.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);/*from w w  w. j a v  a  2  s. com*/
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.clique.maximal.AllMaximalCliquesGenerate.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, AllMaximalCliquesGenerate.class);
    conf.setJobName("AllMaximalCliquesGenerate");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(SetOfVertexSets.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(MapClass.class);
    // Combiner is not permitted.
    conf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    conf.setInputFormat(SequenceFileInputFormat.class);
    // Enable compression.
    conf.setCompressMapOutput(true);/*  w w  w.  j  av  a 2s .  c o  m*/
    conf.setMapOutputCompressorClass(GzipCodec.class);
    try {
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    conf.setNumReduceTasks(getReducerNum());

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.partitions.connected.bi.SpanningTreeRootChoose.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf conf = new JobConf(context, SpanningTreeRootChoose.class);
    conf.setJobName("SpanningTreeRootChoose");

    // This is necessary.
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(LabeledAdjSetVertex.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(MapClass.class);
    // Since k2,v2 is different from k3,v3. No combiner is permitted.
    conf.setReducerClass(ReduceClass.class);
    // The format of input data is generated with WritableSerialization.
    conf.setInputFormat(SequenceFileInputFormat.class);
    try {/*from ww w  . j a v a2  s. c  o  m*/
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
    } catch (IllegalAccessException e1) {
        throw new ProcessorExecutionException(e1);
    }
    conf.setNumMapTasks(getMapperNum());
    // Only one reducer is permitted, or the largest value will be wrong.
    conf.setNumReduceTasks(1);
    conf.setCompressMapOutput(true);
    conf.setMapOutputCompressorClass(GzipCodec.class);

    try {
        this.runningJob = JobClient.runJob(conf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}