Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner,
        Class<? extends Reducer> reducer, Schema outputKeySchema, Class<? extends Writable> outputValue,
        Configuration conf, boolean overwrite) throws IOException {
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*w ww . j a  va  2 s  .  c o  m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, outputPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);

    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, inputKeySchema);
    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    job.setReducerClass(reducer);
    AvroJob.setOutputKeySchema(job, outputKeySchema);
    job.setOutputValueClass(outputValue);

    if (overwrite) {
        HadoopUtils.delete(jobConf, outputPath);
    }

    return job;

}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner,
        Class<? extends Reducer> reducer, Class<? extends Writable> outputKey,
        Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException {
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from   www . ja  v a  2 s  . c o m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, outputPath);

    //        FileOutputFormat.setCompressOutput(job, true);
    //        FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);

    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, inputKeySchema);
    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }

    job.setReducerClass(reducer);
    job.setOutputKeyClass(outputKey);
    job.setOutputValueClass(outputValue);

    if (overwrite) {
        HadoopUtils.delete(jobConf, outputPath);
    }

    return job;

}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareJob(String inputPath, String outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //    Job job = new Job(new Configuration(conf));
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*w w  w .j  a va  2s  .c o m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath);

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath);

    return job;
}

From source file:com.github.milind.GlobalNumberAddition.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Global Addition of Numbers");
    job.setJarByClass(GlobalNumberAddition.class);
    job.setMapperClass(GlobalNumberAdditionMapper.class);
    job.setNumReduceTasks(0);//from w  w  w.jav  a 2  s  .  co  m
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.github.milind.GlobalNumberAverage.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Global Average of Numbers");
    job.setJarByClass(GlobalNumberAverage.class);
    job.setMapperClass(GlobalNumberAverageMapper.class);
    job.setNumReduceTasks(0);/* w ww  . j  a  va2  s .c om*/
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.github.milind.NumberAdditionPerLine.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Addition of Numbers Per Line");
    job.setJarByClass(NumberAdditionPerLine.class);
    job.setMapperClass(NumberAdditionPerLineMapper.class);
    job.setNumReduceTasks(0);/*from w  w  w .  j  av a2  s. c  o m*/
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.github.sakserv.minicluster.mapreduce.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("usage: [input] [output]");
        System.exit(-1);//from  w  w w  . j  a  v a 2s.c om
    }

    if (null == configuration) {
        configuration = new Configuration();
    }

    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(WordMapper.class);
    job.setReducerClass(SumReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(Driver.class);

    job.waitForCompletion(true);

}

From source file:com.github.sample.mapreduce.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    System.out.println("coder");
    //conf.addResource("etc/hadoop/hadoop-local.xml");
    //conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    //conf.setClass("mapreduce.output.fileoutputformat.compress.codec", GzipCodec.class, CompressionCodec.class);
    conf.set("fs.default.name", "hdfs://localhost:9000");
    /*/*w  w  w.j  a  v a2 s.c  o m*/
      conf.set("fs.hdfs.impl",
    org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()
    );
    conf.set("fs.file.impl",
    org.apache.hadoop.fs.LocalFileSystem.class.getName()
    );
    */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }

    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.github.sandgorgon.parmr.Main.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: parmr <input file> <output path>");
        return -1;
    }//from ww w.ja v  a  2 s.  c  o  m

    Configuration conf = super.getConf();
    conf.set("mapreduce.job.queuename", "prod");

    Job job = Job.getInstance(conf);
    job.setJobName(jobName);
    job.setJarByClass(Main.class);

    // Parquet Schema
    // Read from the input file itself the schema that we will be assuming
    Path infile = new Path(args[0]);
    List<Footer> footers = ParquetFileReader.readFooters(conf, infile.getFileSystem(conf).getFileStatus(infile),
            true);
    MessageType schema = footers.get(0).getParquetMetadata().getFileMetaData().getSchema();

    // Avro Schema
    // Convert the Parquet schema to an Avro schema
    AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter();
    Schema avroSchema = avroSchemaConverter.convert(schema);

    // Set the Mapper
    job.setMapperClass(UserMapper.class);

    // This works for predicate pushdown on record assembly read.
    AvroParquetInputFormat.setUnboundRecordFilter(job, UserRecordFilter.class);

    AvroParquetInputFormat.addInputPath(job, new Path(args[0]));
    AvroParquetInputFormat.setAvroReadSchema(job, avroSchema);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // If you needed to return an avro object from the mapper, refer to this...
    //job.setMapOutputValueClass(AvroValue.class);
    //AvroJob.setMapOutputValueSchema(job, avroSchema);

    // Reducer
    job.setReducerClass(UserReducer.class);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // If we need to return an avro class again, refer to this...
    //job.setOutputFormatClass(AvroParquetOutputFormat.class);
    //AvroParquetOutputFormat.setOutputPath(job, new Path(args[1]));
    //AvroParquetOutputFormat.setSchema(job, avroSchema);
    //job.setOutputKeyClass(Void.class);
    //job.setOutputValueClass(GenericRecord.class);

    // Rough way of testing the projection side of things.
    AvroParquetInputFormat.setRequestedProjection(job,
            Schema.parse("{\"namespace\": \"com.github.sandgorgon.parmr.avro\",\n" + " \"type\": \"record\",\n"
                    + " \"name\": \"User\",\n" + " \"fields\": [\n"
                    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
                    + "     {\"name\": \"favorite_number\",  \"type\": [\"int\", \"null\"]}\n" +
                    //                "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" +
                    " ]\n" + "}\n" + ""));

    // Do the deed!
    int completion = job.waitForCompletion(true) ? 0 : 1;

    return completion;
}

From source file:com.github.ygf.pagerank.InLinks.java

License:Apache License

private void computeInLinks(Configuration conf, Path linksFile, Path outputDir) throws Exception {

    // This job computes the number of in-links for every page. The
    // implementation is very similar to the classic word count example.

    Job job = Job.getInstance(conf, "InLinks:Computation");

    job.setJarByClass(InLinks.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(InLinksMapper.class);
    job.setCombinerClass(InLinksReducer.class);
    job.setReducerClass(InLinksReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, linksFile);
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks"));

    job.waitForCompletion(true);/*from   w  ww.ja va 2s.  com*/
}