Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?//from   w w  w . j  av  a  2s  . co m
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapreduce.app-submission.cross-platform", "true");
    String ioArgs[] = { "input", "output2" };
    String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from w  w  w.  j a  v a  2 s . c  o m*/
    }
    //job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    //map, combine, reduce
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    //
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//from   w  ww  . jav  a 2  s.co  m

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }/*from w  w  w.  ja v  a  2 s.  com*/

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java

@Override
public int run(String[] args) throws Exception {
    Job client = new Job(getConf(), "hbasetest");
    client.setSpeculativeExecution(false);
    client.setMaxMapAttempts(2);/*  ww w  .j a  va2s. c om*/
    client.setJarByClass(Hbase2.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(Text.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output4"));

    client.setMapperClass(Hbase2Mapper.class);
    client.setReducerClass(Hbase2Reducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        System.out.println(e);
    }
    return 0;
}

From source file:com.hhscyber.nl.tweets.hbasefill.HbaseFill.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException/* w  ww. java 2s  . co  m*/
 */
public static void main(String[] args) throws IOException {
    conHbase = HBaseConfiguration.create();
    Job client = new Job(conHbase); // new configuration
    client.setJarByClass(HbaseFill.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/" + test));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output3"));

    client.setMapperClass(HbaseFillMapper.class);

    try {
        client.submit();
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }

}

From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java

/**
 * @param args the command line arguments
 *///  www.  j  a va2 s  . c  o m
public static void main(String[] args) throws IOException {

    Job client = new Job(new Configuration());
    client.setJarByClass(ProcessTweets.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input_concat"));//
    TextOutputFormat.setOutputPath(client, new Path("output2"));

    client.setMapperClass(ProcessTweetsMapper.class);
    client.setReducerClass(ProcessTweetsReducer.class);
    client.setCombinerClass(ProcessTweetsReducer.class);

    try {
        client.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*w ww.  j  a va 2  s  .c  om*/
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hortonworks.mapreduce.URLCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
    Job job = Job.getInstance(conf, "URLCount");
    job.setJarByClass(getClass());//from   w  w  w  . j  ava  2s .c o  m
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(URLCountM.class);
    job.setReducerClass(URLCountR.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return (job.waitForCompletion(true) == true ? 0 : -1);
}