Example usage for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.conversantmedia.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) {

    try {//from w w  w  . j av a2 s .c  o  m
        Job job = Job.getInstance(new Configuration(), "WordCount v2");

        job.setInputFormatClass(FileInputFormat.class);
        job.setOutputFormatClass(FileOutputFormat.class);

        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setCombinerClass(WordCountReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    } catch (IOException | ClassNotFoundException | InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MapperInfoHandler.java

License:Apache License

@Override
@SuppressWarnings("rawtypes")
public void process(Annotation annotation, Job job, Object target) {
    MapperInfo map = (MapperInfo) annotation;
    if (map.value() != org.apache.hadoop.mapreduce.Mapper.class) {
        Class<? extends Mapper> mapperClass = map.value();
        job.setMapperClass(mapperClass);

        // Is this a map-only job?
        Field jobField = (Field) target;
        boolean isMapOnly = isMapOnlyJob(job, jobField);

        configureOutputKeyValue(job, mapperClass, map, isMapOnly);
    }// w w  w .  j  ava 2  s.  c  o  m
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MultiInputAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {
    for (Input input : ((MultiInput) annotation).value()) {
        Path path = getInputAsPath(input.path());
        if (input.mapper() == Mapper.class) {
            MultipleInputs.addInputPath(job, path, input.format());
        } else {//from  w w w .j  a va2s . co  m
            MultipleInputs.addInputPath(job, path, input.format(), input.mapper());
            // Need to call again here so the call is captured by our aspect which
            // will replace it with the annotated delegating mapper class for resource
            // injection if required.
            job.setMapperClass(DelegatingMapper.class);
        }
    }
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a single file//from   w  w  w  . ja v  a  2s.c om
 * 
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testSingle() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testSingle()                    ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a Path containing files that will cause the Job to fail
 * /*from  w ww . java2 s.  co m*/
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultiple()                  ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test is identical to testMultiple() however the ZipFileInputFormat is set to
 * be lenient, errors that cause testMultiple() to fail will be quietly ignored here.
 * // w w w . j  a v a 2  s  . c o  m
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultipleLenient()           ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * ZipInputStream doesn't support encrypted entries thus this will fail.
 * //from   w  w  w .j av a  2  s .co  m
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testEncryptedZip()              ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test explicitly tries to read a file containing random noise as a ZIP file,
 * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is
 * encountered.//  w  w  w  .j  a  va  2  s .c o  m
 * 
 * Expected result: (quiet) failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testNonZipData()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption
 * the Job will fail and no output will be written through the Reducer.
 * /*from  www.  j  av a2  s. co m*/
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testCorruptZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testCorruptZip()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Corrupt"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption
 * the Mapper will ignore the corrupt entry and close the ZIP file. All previous
 * output will be treated as normal and passed through the Reducer. 
 * //  w w  w .  java2s . c  o m
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testCorruptZipLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testCorruptZipLenient()         ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_CorruptLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}