Example usage for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.elixir.hadoop.Word.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);/*from  w  w  w.j  a  va2s  . c o  m*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elixir.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from  w w w  . j  a  va 2s .c om
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.bestclient.BestClient.java

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>");
        System.exit(-1);/*  w  ww. j  a  v  a  2s .  c  o m*/
    }

    Job job = Job.getInstance();
    job.setJarByClass(BestClient.class);
    job.setJobName("Best client job");

    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.setStrings("dates", args[2], args[3]);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(BCMapper.class);
    job.setReducerClass(BCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//  w w  w.  ja v  a  2s  .c om
    }

    Job job = Job.getInstance();
    job.setJarByClass(WordCount.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//  w w  w . ja v  a  2s. c  o m
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    // HBase??/*  w  w  w.j  a va2s.co  m*/
    HbaseConfiguration hconf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_INPUT);
    String tableName = hconf.getInputTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]HBase??<" + HbaseConfiguration.INPUT_TABLE + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?
    String inputFieldName[] = hconf.getInputFieldNames();
    this.vParamSrcTargetFieldNames(hconf, inputFieldName);

    if (hconf.getInputIsCombiner()) {
        conf.setCombinerClass(DBGroupReducer.class);
    }

    // ?TIMERANGE
    String timerange[] = hconf.getInputHBaseQueryTimerange();
    this.vParamQueryTimeRange(timerange);

    // ?startrow
    String startrow = hconf.getInputHBaseQueryStartRow();
    if (null == startrow || startrow.trim().length() <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?startrow<" + HbaseConfiguration.INPUT_QUERY_STARTROW + ">.");
    }

    // ?stoprow
    String stoprow = hconf.getInputHBaseQueryStopRow();
    if (null == stoprow || stoprow.trim().length() <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?stoprow<" + HbaseConfiguration.INPUT_QUERY_STOPROW + ">.");
    }

    // ?timestamp
    long timestamp = hconf.getInputHBaseQueryTimestamp();
    if (timestamp <= -1) {
        MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMESTAMP
                + ">.");
    }

    // ?filters
    String filters = hconf.getInputHBaseQueryFilters();
    if (null == filters || filters.length() <= 0) {
        MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FILTER
                + ">.");
    }

    // ?familyColumns
    String familyColumns[] = hconf.getInputHBaseQueryFamilyColumns();
    if (null == familyColumns || familyColumns.length <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">.");
    }

    if (null != familyColumns) {
        for (String tmp : familyColumns) {
            if (tmp.split(":").length != 2) {
                String meg = "[MR ERROR]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS
                        + ">.";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }
    }

    // ?familys
    String familys[] = hconf.getInputHBaseQueryFamilys();
    if (null == familys || familys.length <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FAMILYS + ">.");
    }

    conf.setInputFormatClass(HbaseInputFormat.class);
    hconf.setInputClass(DBRecord.class);

    // ?MapTask?
    int taskNumber = HbaseInputFormat.getTableHRegionInfoCount(conf.getConfiguration(), startrow, stoprow);
    int reduceTasks = taskNumber;
    if (hconf.getInputMapEnd()) {
        reduceTasks = 0;
    }

    // 
    hconf.setNumMapTasks(taskNumber);
    hconf.setNumReduceTasks(reduceTasks);
    hconf.setInputClass(DBRecord.class);
    conf.setMapperClass(DBMapper.class);
    conf.setMapOutputKeyClass(DBRecord.class);
    conf.setMapOutputValueClass(DBRecord.class);
    if (hconf.getInputIsCombiner()) {
        conf.setCombinerClass(DBGroupReducer.class);
    }
}

From source file:com.example.bigtable.sample.WordCountHBase.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>");
        System.exit(2);// w ww  .  j a  va 2  s.com
    }

    Job job = Job.getInstance(conf, "word count");

    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }

    TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]);
    try {
        CreateTable.createTable(tableName, conf, Collections.singletonList(Bytes.toString(COLUMN_FAMILY)));
    } catch (Exception e) {
        LOG.error("Could not create the table.", e);
    }

    job.setJarByClass(WordCountHBase.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setMapOutputValueClass(IntWritable.class);

    TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*from  w w  w .  j  av  a  2  s  . co m*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.examples.ch03.ParseWeblogs_Ex_1.java

public int run(String[] args) throws Exception {
    Path inputPath = new Path("apache_clf.txt");
    Path outputPath = new Path("output");
    Configuration conf = getConf();
    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Weblog Transformer");
    weblogJob.setJarByClass(getClass());
    weblogJob.setNumReduceTasks(0);/*from w  ww  .j a v a2 s.  c  o m*/

    weblogJob.setMapperClass(CLFMapper_Ex_1.class);
    weblogJob.setMapOutputKeyClass(Text.class);
    weblogJob.setMapOutputValueClass(Text.class);

    weblogJob.setOutputKeyClass(Text.class);
    weblogJob.setOutputValueClass(Text.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);
    weblogJob.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
        return 0;
    }
    return 1;
}

From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    handleCommandLine(args, conf);//from www  .  j a  v  a  2  s .  co  m
    HadoopUtils.setMapAttempts(conf, 1);
    adjustConfigurationForHive(conf);
    HiveTools.setupJob(conf);

    Job job = new Job(conf, "hive-io-writing");
    if (job.getJar() == null) {
        job.setJarByClass(getClass());
    }
    job.setMapperClass(SampleMapper.class);
    job.setInputFormatClass(SampleInputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(HiveWritableRecord.class);
    job.setOutputFormatClass(SampleOutputFormat.class);

    job.setNumReduceTasks(0);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}