Example usage for org.apache.hadoop.mapreduce Job setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java

License:Apache License

/**
 * Set the mapper class implementation to use in the job,
 * as well as any related configuration (e.g., map output types).
 *//*w w  w .j av a2s  . c o  m*/
protected void configureMapper(Job job, String tableName, String tableClassName)
        throws ClassNotFoundException, IOException {
    job.setMapperClass(getMapperClass());
    job.setOutputKeyClass(String.class);
    job.setOutputValueClass(NullWritable.class);
}

From source file:com.cloudera.test.UseHCat.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Get the input and output table names as arguments
    String inputTableName = args[0];
    String outputTableName = args[1];
    // Assume the default database
    String dbName = null;//from w  ww .  j a  v  a2s.  co m

    Job job = new Job(conf, "UseHCat");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    job.setJarByClass(UseHCat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // An HCatalog record as input
    job.setInputFormatClass(HCatInputFormat.class);

    // Mapper emits a string as key and an integer as value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Ignore the key for the reducer output; emitting an HCatalog record as value
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setOutputFormatClass(HCatOutputFormat.class);

    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*www  .  j  a  va 2s .com*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *///from ww  w. j  ava2  s. c  om
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.conversantmedia.mapreduce.example.WordCount.java

License:Apache License

public static void main(String[] args) {

    try {//w ww  . j ava  2s.  c om
        Job job = Job.getInstance(new Configuration(), "WordCount v2");

        job.setInputFormatClass(FileInputFormat.class);
        job.setOutputFormatClass(FileOutputFormat.class);

        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setCombinerClass(WordCountReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    } catch (IOException | ClassNotFoundException | InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MapperInfoHandler.java

License:Apache License

@SuppressWarnings("rawtypes")
public void configureOutputKeyValue(Job job, Class<? extends Mapper> mapperClass, MapperInfo map,
        boolean isMapOnly) {
    MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE;
    // Try and work it out from the generics
    Type[] params = util.getGenericTypeParams(mapperClass, Mapper.class);

    int length = 4;
    int keyIdx = 2;
    int valueIdx = 3;

    // Special case for TableMapper - assume if there are only two, they are the output key/value
    // TODO resolve this hack - force explicit?
    if (params != null && params.length == 2) {
        length = 2;/*from ww w .  ja  v  a2  s .  c  o m*/
        keyIdx = 0;
        valueIdx = 1;
    }

    if (map != null && map.output().key() != void.class) {
        job.setMapOutputKeyClass(map.output().key());
        if (isMapOnly) {
            job.setOutputKeyClass(map.output().key());
        }
    } else if (params != null && params.length == length) {
        job.setMapOutputKeyClass((Class<?>) params[keyIdx]);
        if (isMapOnly) {
            job.setOutputKeyClass((Class<?>) params[keyIdx]);
        }
    }

    if (map != null && map.output().value() != void.class) {
        job.setMapOutputValueClass(map.output().value());
        if (isMapOnly) {
            job.setOutputValueClass(map.output().value());
        }
    } else if (params != null && params.length == length) {
        job.setMapOutputValueClass((Class<?>) params[valueIdx]);
        if (isMapOnly) {
            job.setOutputValueClass((Class<?>) params[valueIdx]);
        }
    }
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.ReducerInfoHandler.java

License:Apache License

@SuppressWarnings("rawtypes")
protected void configureOutputKeyValue(Job job, Class<? extends Reducer> reducerClass, ReducerInfo reducer) {
    MaraAnnotationUtil util = MaraAnnotationUtil.INSTANCE;
    // Try and work it out from the generics
    Type[] params = util.getGenericTypeParams(reducerClass, Reducer.class);

    if (reducer != null && reducer.output().key() != void.class) {
        job.setOutputKeyClass(reducer.output().key());
    } else if (params != null && params.length == 4) {
        job.setOutputKeyClass((Class<?>) params[2]);
    }/*from www  . ja v  a2 s.  c o m*/

    if (reducer != null && reducer.output().value() != void.class) {
        job.setOutputValueClass(reducer.output().value());
    } else if (params != null && params.length == 4) {
        job.setOutputValueClass((Class<?>) params[3]);
    }
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a single file/*from  w  ww .ja va  2 s  .c  om*/
 * 
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testSingle() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testSingle()                    ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a Path containing files that will cause the Job to fail
 * //  ww  w  .  j  av  a 2  s  . c  om
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultiple()                  ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test is identical to testMultiple() however the ZipFileInputFormat is set to
 * be lenient, errors that cause testMultiple() to fail will be quietly ignored here.
 * //from www .  ja  va  2 s  .c  o  m
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultipleLenient()           ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}