Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTemp.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);//from  w  w w . j  av a  2s .  co m
    }
    Job job = new Job();
    job.setJarByClass(MaxTemp.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTempMapper.class);
    job.setReducerClass(MaxTempReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
}

From source file:com.inmobi.conduit.distcp.tools.DistCp.java

License:Apache License

/**
 * Create Job object for submitting it, with all the configuration
 *
 * @return Reference to job object.// ww w.  jav a  2  s .  c om
 * @throws IOException - Exception if any
 */
protected Job createJob() throws IOException {
    String jobName = "distcp";
    String userChosenName = getConf().get("mapred.job.name");
    if (userChosenName != null)
        jobName += ": " + userChosenName;
    Job job = new Job(getConf(), jobName);
    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
    job.setJarByClass(CopyMapper.class);
    configureOutputFormat(job);

    job.setMapperClass(CopyMapper.class);
    job.setReducerClass(Reducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(CopyOutputFormat.class);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration().set(DistCpConstants.CONF_LABEL_NUM_MAPS, String.valueOf(inputOptions.getMaxMaps()));

    if (inputOptions.getSslConfigurationFile() != null) {
        setupSSLConfig(job.getConfiguration());
    }

    inputOptions.appendToConf(job.getConfiguration());
    return job;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

protected Job createJob(Path inputPath, long totalSize) throws IOException {
    String jobName = getName();//from   w  w w.  j  av a2 s . co m
    Configuration conf = currentCluster.getHadoopConf();
    conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY));
    Job job = new Job(conf);
    job.setJobName(jobName);
    // DistributedCache.addFileToClassPath(inputFormatJarDestPath,
    // job.getConfiguration());
    job.getConfiguration().set("tmpjars",
            inputFormatJarDestPath.toString() + "," + auditUtilJarDestPath.toString());
    LOG.debug("Adding file [" + inputFormatJarDestPath + "] to distributed cache");
    job.setInputFormatClass(UniformSizeInputFormat.class);
    Class<? extends Mapper<Text, FileStatus, NullWritable, Text>> mapperClass = getMapperClass();
    job.setJarByClass(mapperClass);

    job.setMapperClass(mapperClass);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    // setting identity reducer
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, tmpCounterOutputPath);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration().set(LOCALSTREAM_TMP_PATH, tmpPath.toString());
    job.getConfiguration().set(SRC_FS_DEFAULT_NAME_KEY, srcCluster.getHadoopConf().get(FS_DEFAULT_NAME_KEY));

    // set configurations needed for UniformSizeInputFormat
    int numMaps = getNumMapsForJob(totalSize);
    job.getConfiguration().setInt(DistCpConstants.CONF_LABEL_NUM_MAPS, numMaps);
    job.getConfiguration().setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalSize);
    job.getConfiguration().set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, inputPath.toString());
    LOG.info("Expected number of maps [" + numMaps + "] Total data size [" + totalSize + "]");

    return job;
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private Job createJob(Path inputPath) throws IOException {
    String jobName = "localstream";
    Configuration conf = cluster.getHadoopConf();
    Job job = new Job(conf);
    job.setJobName(jobName);/*from  w  w w  . j a v a2s.  c o  m*/
    KeyValueTextInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    job.setJarByClass(CopyMapper.class);
    job.setMapperClass(CopyMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(NullOutputFormat.class);
    job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
    job.getConfiguration().set("localstream.tmp.path", tmpPath.toString());

    return job;
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);//from  w  w  w .  java2s  .  co  m

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Sets up the actual job./*from w  w w. ja v  a  2 s .  co  m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.intel.hibench.DFSIOWriter.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    startTime = System.currentTimeMillis();
    benchData.put(new Put(ONE, ONE, startTime));

    Job job = context.getHadoopJob();
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(Generator.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);/*from  w w  w . jav a  2s. co  m*/

    String sizeStr = context.getRuntimeArguments().get("size");
    if (sizeStr != null) {
        LOG.info("size we get in config is : " + sizeStr);
        long totalBytes = Long.valueOf(sizeStr) * 1024 * 1024;
        job.getConfiguration().setLong(BENCH_SIZE, totalBytes);
        benchData.put(new Put(ONE, THREE, totalBytes));
    }

}

From source file:com.intel.hibench.WordCount.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    startTime = System.currentTimeMillis();
    benchData.put(new Put(ONE, ONE, startTime));
    Job job = context.getHadoopJob();
    job.setMapperClass(Tokenizer.class);
    job.setReducerClass(Counter.class);
    job.setNumReduceTasks(1);//from   w w w.  j  a va 2s.  c  o  m
}

From source file:com.j.distributed.counter.CounterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*from  w  w  w  .j  a va2  s  . c  o  m*/

    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(CounterReducer.class);
    job.setReducerClass(CounterReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(options[0]));
    FileOutputFormat.setOutputPath(job, new Path(options[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.j.distributed.sorter.SorterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());//  w ww .j a va  2  s.  c  om

    job.setMapperClass(SorterMapper.class);
    job.setCombinerClass(SorterReducer.class);
    job.setReducerClass(SorterReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    FileInputFormat.addInputPath(job, new Path(options[1]));
    FileOutputFormat.setOutputPath(job, new Path(options[2]));
    return job.waitForCompletion(true) ? 0 : 1;
}