Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?//ww w .j  a  v a2 s.c  o m
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.examples.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapreduce.app-submission.cross-platform", "true");
    String ioArgs[] = { "input", "output2" };
    String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//  w w w .j a v  a  2s  .  c  o m
    }
    //job
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    //map, combine, reduce
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    //
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//ww w.  jav a 2s  .  co m

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }//ww w  .  jav a  2  s.  c o m

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java

@Override
public int run(String[] args) throws Exception {
    Job client = new Job(getConf(), "hbasetest");
    client.setSpeculativeExecution(false);
    client.setMaxMapAttempts(2);//www.j  a  v  a2s .c o m
    client.setJarByClass(Hbase2.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(Text.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output4"));

    client.setMapperClass(Hbase2Mapper.class);
    client.setReducerClass(Hbase2Reducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        System.out.println(e);
    }
    return 0;
}

From source file:com.hhscyber.nl.tweets.hbasefill.HbaseFill.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException//from  w ww  .  j a va  2 s .  c  o m
 */
public static void main(String[] args) throws IOException {
    conHbase = HBaseConfiguration.create();
    Job client = new Job(conHbase); // new configuration
    client.setJarByClass(HbaseFill.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/" + test));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output3"));

    client.setMapperClass(HbaseFillMapper.class);

    try {
        client.submit();
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }

}

From source file:com.hhscyber.nl.tweets.processtweets.ProcessTweets.java

/**
 * @param args the command line arguments
 *//*  www.j a  va 2  s. c  o m*/
public static void main(String[] args) throws IOException {

    Job client = new Job(new Configuration());
    client.setJarByClass(ProcessTweets.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(IntWritable.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input_concat"));//
    TextOutputFormat.setOutputPath(client, new Path("output2"));

    client.setMapperClass(ProcessTweetsMapper.class);
    client.setReducerClass(ProcessTweetsReducer.class);
    client.setCombinerClass(ProcessTweetsReducer.class);

    try {
        client.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:com.hhscyber.nl.tweets.svm.train.Train.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException//from  w w w. j  av a2 s  . com
 */
public static void main(String[] args) throws IOException {

    Conf conf = new Conf(args, "");
    FileSystem hdfs = FileSystem.get(conf);

    hdfs.delete(new Path("trainer"), true);

    Job client = new HBJob(conf, "SVMTrainer");

    client.setJarByClass(Train.class);
    client.setMapOutputKeyClass(Text.class);
    client.setMapOutputValueClass(Text.class);

    client.setInputFormatClass(TextInputFormat.class);

    TextInputFormat.addInputPath(client, new Path("svmclass"));
    client.setNumReduceTasks(1);

    client.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(client, new Path("trainer"));

    client.setMapperClass(TrainMapper.class);
    client.setReducerClass(TrainReducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
    }
}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?// ww  w. j a v  a  2  s .  com
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}