Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        printUsage();//from  w  w w  .  j a  v  a2s.co m
    }

    FileSystem.get(new Configuration()).delete(new Path(args[1]), true);
    Job job = Job.getInstance(super.getConf(), "AvroWordCount");

    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("AvroWordCount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 1 : 0;
}

From source file:com.fanlehai.hadoop.serialize.avro.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        printUsage();/*from   www  .ja  v  a 2  s.  c  om*/
    }

    FileSystem.get(new Configuration()).delete(new Path(args[1]), true);
    Job job = Job.getInstance(super.getConf(), "MapReduceAvroWordCount");

    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return job.waitForCompletion(true) ? 1 : 0;
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java

License:Apache License

private Job createJob(Configuration configuration) throws Exception {

    System.out.println("Initializing BlueShift v 2.0...");
    System.out.println("Configuration: " + dcmConfig.toString());

    Job job = Job.getInstance(configuration, "BlueShift v 2.0 - " + dcmConfig.getBatchName());

    job.setJarByClass(MirrorDistCPDriver.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MirrorMapper.class);
    job.setReducerClass(MirrorReducer.class);

    job.setInputFormatClass(MirrorFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(job, stateManager.getReportPath());

    job.setNumReduceTasks(configuration.getInt("mapreduce.reduce.tasks", 1));

    System.out.println("Job Initialization Complete, The status of the Mirror job will be written to: "
            + stateManager.getReportPath());
    return job;//from  w ww.  j  av  a 2 s  .c o  m
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

/**
 * Create a map-only Hadoop Job out of the passed in parameters.  Does not set the
 * Job name./*from   ww  w  .ja  v  a2 s  .co  m*/
 *
 * @see #getCustomJobName(String, JobContext, Class, Class)
 */
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    //    Job job = new Job(new Configuration(conf));
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

/**
 *
 * @param inputPaths//from  w w  w  . jav a 2 s.  c  om
 * @param outputPath
 * @param inputFormat
 * @param inputKey
 * @param inputValue
 * @param mapper
 * @param mapperKey
 * @param mapperValue
 * @param combiner
 * @param reducer
 * @param outputKey
 * @param outputValue
 * @param outputFormat
 * @param conf
 * @param overwrite
 * @param isCompress
 * @return
 * @throws IOException
 */
public static Job prepareAvroJob(String inputPaths, String outputPath, Class<? extends InputFormat> inputFormat,
        Object inputKey, Object inputValue, Class<? extends Mapper> mapper, Object mapperKey,
        Object mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer,
        Object outputKey, Object outputValue, Class<? extends OutputFormat> outputFormat, Configuration conf,
        boolean overwrite, boolean isCompress) throws IOException {

    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();
    if (inputKey instanceof Schema) {

        if (inputValue instanceof Schema) {
            inputFormat = inputFormat == null ? AvroKeyValueInputFormat.class : inputFormat;
        }
        inputFormat = inputFormat == null ? AvroKeyInputFormat.class : inputFormat;

    }
    if (inputFormat != null) {
        job.setInputFormatClass(inputFormat);
    }

    if (inputKey instanceof Schema) {
        AvroJob.setInputKeySchema(job, (Schema) inputKey);
    }

    if (inputValue instanceof Schema) {
        AvroJob.setInputValueSchema(job, (Schema) inputValue);
    }

    if (outputKey instanceof Schema) {

        if (outputValue instanceof Schema) {
            outputFormat = outputFormat == null ? AvroKeyValueOutputFormat.class : outputFormat;
        }
        outputFormat = outputFormat == null ? AvroKeyOutputFormat.class : outputFormat;

    }
    if (outputFormat != null) {
        job.setOutputFormatClass(outputFormat);
    }

    if (outputKey instanceof Schema) {
        AvroJob.setOutputKeySchema(job, (Schema) outputKey);
    } else if (outputKey instanceof Class) {
        job.setOutputKeyClass((Class) outputKey);
    }

    if (outputValue instanceof Schema) {
        AvroJob.setOutputValueSchema(job, (Schema) outputValue);
    } else if (outputValue instanceof Class) {
        job.setOutputValueClass((Class) outputValue);
    }

    if (reducer == null) {
        job.setNumReduceTasks(0);

        if (mapperKey instanceof Schema) {
            AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey);
        } else if (mapperKey instanceof Class) {
            job.setOutputKeyClass((Class) mapperKey);
        }

        if (mapperValue instanceof Schema) {
            AvroJob.setOutputValueSchema(job, (Schema) mapperValue);
        } else if (mapperKey instanceof Class) {
            job.setOutputValueClass((Class) mapperValue);
        }
        job.setJarByClass(mapper);

    } else if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);

    } else {
        job.setJarByClass(reducer);

    }

    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    if (isCompress) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);
    }

    job.setMapperClass(mapper);
    if (mapperKey instanceof Schema) {
        AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey);
    } else if (mapperKey instanceof Class) {
        job.setMapOutputKeyClass((Class) mapperKey);
    }

    if (mapperValue instanceof Schema) {
        AvroJob.setMapOutputValueSchema(job, (Schema) mapperValue);
    } else if (mapperKey instanceof Class) {
        job.setMapOutputValueClass((Class) mapperValue);
    }

    if (reducer != null) {
        job.setReducerClass(reducer);
    }
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }

    if (overwrite) {
        HadoopUtils.delete(jobConf, new Path(outputPath));
    }

    return job;

}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner,
        Class<? extends Reducer> reducer, Schema outputKeySchema, Class<? extends Writable> outputValue,
        Configuration conf, boolean overwrite) throws IOException {
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from ww w.j  ava 2  s. c o m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, outputPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);

    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, inputKeySchema);
    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    job.setReducerClass(reducer);
    AvroJob.setOutputKeySchema(job, outputKeySchema);
    job.setOutputValueClass(outputValue);

    if (overwrite) {
        HadoopUtils.delete(jobConf, outputPath);
    }

    return job;

}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner,
        Class<? extends Reducer> reducer, Class<? extends Writable> outputKey,
        Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException {
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from   ww  w. j  ava 2  s  . c  om
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, outputPath);

    //        FileOutputFormat.setCompressOutput(job, true);
    //        FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class);

    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, inputKeySchema);
    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }

    job.setReducerClass(reducer);
    job.setOutputKeyClass(outputKey);
    job.setOutputValueClass(outputValue);

    if (overwrite) {
        HadoopUtils.delete(jobConf, outputPath);
    }

    return job;

}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static Job prepareJob(String inputPath, String outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //    Job job = new Job(new Configuration(conf));
    Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/* w  w w  .ja v  a 2 s  .  c o  m*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath);

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath);

    return job;
}

From source file:com.github.sandgorgon.parmr.Main.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: parmr <input file> <output path>");
        return -1;
    }/* w  ww  .ja  v a2s  .c o m*/

    Configuration conf = super.getConf();
    conf.set("mapreduce.job.queuename", "prod");

    Job job = Job.getInstance(conf);
    job.setJobName(jobName);
    job.setJarByClass(Main.class);

    // Parquet Schema
    // Read from the input file itself the schema that we will be assuming
    Path infile = new Path(args[0]);
    List<Footer> footers = ParquetFileReader.readFooters(conf, infile.getFileSystem(conf).getFileStatus(infile),
            true);
    MessageType schema = footers.get(0).getParquetMetadata().getFileMetaData().getSchema();

    // Avro Schema
    // Convert the Parquet schema to an Avro schema
    AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter();
    Schema avroSchema = avroSchemaConverter.convert(schema);

    // Set the Mapper
    job.setMapperClass(UserMapper.class);

    // This works for predicate pushdown on record assembly read.
    AvroParquetInputFormat.setUnboundRecordFilter(job, UserRecordFilter.class);

    AvroParquetInputFormat.addInputPath(job, new Path(args[0]));
    AvroParquetInputFormat.setAvroReadSchema(job, avroSchema);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // If you needed to return an avro object from the mapper, refer to this...
    //job.setMapOutputValueClass(AvroValue.class);
    //AvroJob.setMapOutputValueSchema(job, avroSchema);

    // Reducer
    job.setReducerClass(UserReducer.class);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // If we need to return an avro class again, refer to this...
    //job.setOutputFormatClass(AvroParquetOutputFormat.class);
    //AvroParquetOutputFormat.setOutputPath(job, new Path(args[1]));
    //AvroParquetOutputFormat.setSchema(job, avroSchema);
    //job.setOutputKeyClass(Void.class);
    //job.setOutputValueClass(GenericRecord.class);

    // Rough way of testing the projection side of things.
    AvroParquetInputFormat.setRequestedProjection(job,
            Schema.parse("{\"namespace\": \"com.github.sandgorgon.parmr.avro\",\n" + " \"type\": \"record\",\n"
                    + " \"name\": \"User\",\n" + " \"fields\": [\n"
                    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
                    + "     {\"name\": \"favorite_number\",  \"type\": [\"int\", \"null\"]}\n" +
                    //                "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" +
                    " ]\n" + "}\n" + ""));

    // Do the deed!
    int completion = job.waitForCompletion(true) ? 0 : 1;

    return completion;
}

From source file:com.github.ygf.pagerank.InLinks.java

License:Apache License

private void summarizeResults(Configuration conf, Path outputDir) throws Exception {

    int topResults = Integer.parseInt(conf.get("inlinks.top_results"));

    Job job = Job.getInstance(conf, "InLinks:TopN");

    job.setJarByClass(InLinks.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(InLinksTopNMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(InLinksTopNReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(outputDir, "inlinks"));
    FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks-top" + topResults));

    job.setNumReduceTasks(1);//from  ww w  .  j a va 2s.  c o m
    job.waitForCompletion(true);
}