Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *///w ww  . j  a  v a  2s  . co m
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.conversantmedia.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) {

    try {/*from   www  .j  a  v  a 2 s  .  c om*/
        Job job = Job.getInstance(new Configuration(), "WordCount v2");

        job.setInputFormatClass(FileInputFormat.class);
        job.setOutputFormatClass(FileOutputFormat.class);

        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setCombinerClass(WordCountReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    } catch (IOException | ClassNotFoundException | InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MapperInfoHandler.java

License:Apache License

@SuppressWarnings("rawtypes")
public void configureOutputKeyValue(Job job, Class<? extends Mapper> mapperClass, MapperInfo map,
        boolean isMapOnly) {
    MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE;
    // Try and work it out from the generics
    Type[] params = util.getGenericTypeParams(mapperClass, Mapper.class);

    int length = 4;
    int keyIdx = 2;
    int valueIdx = 3;

    // Special case for TableMapper - assume if there are only two, they are the output key/value
    // TODO resolve this hack - force explicit?
    if (params != null && params.length == 2) {
        length = 2;// ww  w  . j a v a  2s . co m
        keyIdx = 0;
        valueIdx = 1;
    }

    if (map != null && map.output().key() != void.class) {
        job.setMapOutputKeyClass(map.output().key());
        if (isMapOnly) {
            job.setOutputKeyClass(map.output().key());
        }
    } else if (params != null && params.length == length) {
        job.setMapOutputKeyClass((Class<?>) params[keyIdx]);
        if (isMapOnly) {
            job.setOutputKeyClass((Class<?>) params[keyIdx]);
        }
    }

    if (map != null && map.output().value() != void.class) {
        job.setMapOutputValueClass(map.output().value());
        if (isMapOnly) {
            job.setOutputValueClass(map.output().value());
        }
    } else if (params != null && params.length == length) {
        job.setMapOutputValueClass((Class<?>) params[valueIdx]);
        if (isMapOnly) {
            job.setOutputValueClass((Class<?>) params[valueIdx]);
        }
    }
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.ReducerInfoHandler.java

License:Apache License

@SuppressWarnings("rawtypes")
protected void configureOutputKeyValue(Job job, Class<? extends Reducer> reducerClass, ReducerInfo reducer) {
    MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE;
    // Try and work it out from the generics
    Type[] params = util.getGenericTypeParams(reducerClass, Reducer.class);

    if (reducer != null && reducer.output().key() != void.class) {
        job.setOutputKeyClass(reducer.output().key());
    } else if (params != null && params.length == 4) {
        job.setOutputKeyClass((Class<?>) params[2]);
    }//w w  w.j a v  a  2  s .  c  o m

    if (reducer != null && reducer.output().value() != void.class) {
        job.setOutputValueClass(reducer.output().value());
    } else if (params != null && params.length == 4) {
        job.setOutputValueClass((Class<?>) params[3]);
    }
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a single file/*  w ww.jav  a 2 s  . c  om*/
 * 
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testSingle() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testSingle()                    ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a Path containing files that will cause the Job to fail
 * //from w w  w  .  j a  v a2s  . co  m
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultiple()                  ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test is identical to testMultiple() however the ZipFileInputFormat is set to
 * be lenient, errors that cause testMultiple() to fail will be quietly ignored here.
 * /*from   ww  w  .ja  v  a2 s  .c o m*/
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultipleLenient()           ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * ZipInputStream doesn't support encrypted entries thus this will fail.
 * /* w w w .j a va  2  s .  c  o  m*/
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testEncryptedZip()              ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test explicitly tries to read a file containing random noise as a ZIP file,
 * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is
 * encountered./*from w  ww  .j  a v  a  2  s . c  o  m*/
 * 
 * Expected result: (quiet) failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testNonZipData()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test refers to a corrupt (truncated) ZIP file, upon reaching the corruption
 * the Job will fail and no output will be written through the Reducer.
 * //from  w  w w . ja v a  2 s  .  c o m
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testCorruptZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testCorruptZip()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "corrupt.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Corrupt"));

    //
    assertFalse(job.waitForCompletion(true));
}