Example usage for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException

Source Link

Document

Set the Mapper for the job.

Usage

From source file:clustering.tf_idf.DocCntDriver.java

License:Apache License

Job configJob(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir pre_step_output_dir\n", getClass().getSimpleName());
        System.exit(1);//w  w  w .  j  a  v a 2s  .c om
    }

    Configuration conf = getConf();
    conf = initConf(conf);

    Job job = Job.getInstance(conf, "tf idf pre job");
    job.setJarByClass(WorkflowDriver.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DocCntMapper.class);
    job.setCombinerClass(DocCntReducer.class);

    job.setReducerClass(DocCntReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    return job;
}

From source file:clustering.tf_idf.TermCntDriver.java

License:Apache License

Job configJob(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.printf("usage: %s simhash_result_dir step_1_output_dir\n", getClass().getSimpleName());
        System.exit(1);/*  ww w  .j av a  2s . c  o  m*/
    }

    Configuration conf = getConf();
    conf = initConf(conf);

    Job job = Job.getInstance(conf, "tf idf step1 job");
    job.setJarByClass(TermCntDriver.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setMapperClass(TermCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setCombinerClass(TermCountReducer.class);

    job.setReducerClass(TermCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job;
}

From source file:cn.chinahadoop.hive.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);// www .  j a  va2s.  com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.chinahadoop.practise.TestCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   w w  w .  j a  va2s  . com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(TestCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombiner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.edu.bjut.hadoop.WcAndIk.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w  ww .  j  a va 2s . c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WcAndIk.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java

public static void main(String[] args) throws Exception {
    Path crawlPath = new Path("task2");
    Path currentPath = new Path(crawlPath, "crawldb/current");
    Path output = new Path("output");

    Configuration config = CrawlerConfiguration.create();
    FileSystem fs = FileSystem.get(config);

    if (fs.exists(output)) {
        fs.delete(output);/* w  w w  . jav a 2s .com*/
    }

    Job job = new Job(config);
    job.setJobName("dbreader " + crawlPath.toString());
    job.setMapperClass(DBReaderMapper.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, currentPath);
    FileOutputFormat.setOutputPath(job, output);

    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java

public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName)
        throws Exception {

    Job job = new Job(conf);
    job.setJobName(jobName + "  " + crawlPath.toString());
    job.setJarByClass(Merge.class);
    // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path crawldbPath = new Path(crawlPath, "crawldb");
    Path newdb = new Path(crawldbPath, "new");
    Path currentdb = new Path(crawldbPath, "current");

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }//w  ww  .j av  a2  s.  c  o  m

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }
    for (Path mergePath : mergePaths) {
        FileInputFormat.addInputPath(job, mergePath);
    }
    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.waitForCompletion(true);

}

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Merge.java

public static Job createJob(Configuration conf, Path crawldb) throws IOException {

    Job job = new Job(conf);
    //job.setJarByClass(Merge.class);
    job.getConfiguration().set("mapred",
            "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar");
    Path newdb = new Path(crawldb, "new");
    Path currentdb = new Path(crawldb, "current");

    FileSystem fs = crawldb.getFileSystem(CrawlerConfiguration.create());
    if (fs.exists(currentdb)) {
        FileInputFormat.addInputPath(job, currentdb);
    }//from   ww w  .j a va 2  s .co  m

    if (fs.exists(newdb)) {
        fs.delete(newdb);
    }

    FileOutputFormat.setOutputPath(job, newdb);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(CrawlDatum.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CrawlDatum.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job;
}

From source file:cn.itcast.hadoop.mr.wordcount.DBCountPageView.java

License:Apache License

@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    //?MySql/*from www  .  j  a  v a 2  s . co  m*/
    String driverClassName = DRIVER_CLASS;
    String url = DB_URL; //??

    //????
    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    //driverClassNameurl??
    initialize(driverClassName, url);

    //hdfs?
    Configuration conf = getConf();

    //??
    DBConfiguration.configureDB(conf, driverClassName, url); //???

    //job
    Job job = Job.getInstance(conf);

    //job??
    job.setJobName("Count Pageviews of URLs");

    //job
    job.setJarByClass(DBCountPageView.class);

    //Map
    job.setMapperClass(PageviewMapper.class);

    //Combiner
    job.setCombinerClass(LongSumReducer.class);

    //reduce
    job.setReducerClass(PageviewReducer.class);

    //DB?
    //   setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String conditions, String orderBy, String... fieldNames)
    DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames); //?

    //FileOutputFormat.setoutput ?
    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);//

    //Mapkey?
    job.setMapOutputKeyClass(Text.class);

    //MapValue?
    job.setMapOutputValueClass(LongWritable.class);

    //Reducekey?
    job.setOutputKeyClass(PageviewRecord.class);

    //Reducevalue?
    job.setOutputValueClass(NullWritable.class);

    int ret;//job?????
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;

        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java

License:Apache License

/** Create and setup a job */
@SuppressWarnings("deprecation")
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = new Job(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);

    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);/* w w w . j  a  v  a  2  s.  co  m*/

    // setup input
    job.setInputFormatClass(BbpInputFormat.class);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);

    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}