Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java

/**
 * @param args the command line arguments
 *//*from ww w. j  ava  2 s .co m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address By Hour");
    job.setJarByClass(Binning_IPAddress_By_DayDriver.class);

    job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks to 0
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 4) {
        System.out.println(// w  w  w.j  a  v  a 2 s.c o m
                "USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
        return;
    }
    System.out.println(Arrays.toString(args));
    Configuration conf = new Configuration();
    conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
    conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
    conf.setInt("number.edges", Integer.parseInt(args[1]));
    Job job = new Job(conf);

    Path generated = new Path(new Path(args[3]).getParent(), "generated");
    FileOutputFormat.setOutputPath(job, generated);
    FileSystem.get(conf).delete(generated, true);

    job.setJobName("RangeWriter");

    job.setJarByClass(SortGenMapper.class);
    job.setMapperClass(SortGenMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setInputFormatClass(RangeInputFormat.class);

    job.waitForCompletion(true);
    conf.setInt("max.id", Integer.valueOf(args[0]));
    job = new Job(conf);

    FileOutputFormat.setOutputPath(job, new Path(args[3]));
    FileSystem.get(conf).delete(new Path(args[3]), true);

    job.setJobName("Random Vertex Writer");

    FileInputFormat.addInputPath(job, generated);

    job.setJarByClass(RandomMapper.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
    job.setPartitionerClass(HashPartitioner.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.waitForCompletion(true);
}

From source file:AverageProj.AveragePrice.java

public int run(String[] args) throws Exception, ClassNotFoundException {
    Configuration conf = getConf();
    Job job = new Job(conf, "Avg");
    job.setJarByClass(AveragePrice.class);
    //final File f = new File(AveragePrice.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(AvMapper.class);
    job.setMapOutputKeyClass(YearPrice.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setGroupingComparatorClass(YearSymComparator.class);
    // job.setCombinerClass(AvReducer.class);
    job.setReducerClass(AvReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(YearPrice.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setPartitionerClass(AvgPartitioner.class);
    job.setNumReduceTasks(7);//  w w  w .j a  v  a 2 s . co  m
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:averagerating_youtube.AverageRating_Youtube.java

/**
 * @param args the command line arguments
 *///from  w w w . ja  v a  2 s .co  m
@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJobName("AverageRating_Youtube");

    job.setJarByClass(AverageRating_Youtube.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(AvgRating_CommCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AverageRating_CommentCountTuple.class);
    job.setCombinerClass(AvgRating_CommCountCombiner.class);
    job.setReducerClass(AvgRating_CommCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(AverageRating_CommentCountTuple.class);
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:avro.mr.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: AvroWordCount <input path> <output path>");
        return -1;
    }/*from  ww  w  .ja  v  a  2s.com*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("wordcount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

    return 0;
}

From source file:avro.mr.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }/*from w  ww  . j  a  v a  2 s  . c  om*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:avro.mr.MapReduceColorCountSeqFile.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }/*ww  w  .j a v  a 2s. c  o m*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCountSeqFile.class);
    job.setJobName("Color Count 2");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroSequenceFileInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG));
    AvroJob.setInputValueSchema(job, User.getClassSchema());

    job.setMapperClass(ColorCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroSequenceFileOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:azkaban.jobtype.examples.java.WordCount2.java

License:Apache License

@Override
public void run() throws Exception {
    logger.info(String.format("Starting %s", getClass().getSimpleName()));
    //getConf() and set
    Job job = createJob(MapperClass.class, ReducerClass.class);
    job.setJarByClass(WordCount2.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    super.run();//from  w ww . j  av a2 s. c o m

}

From source file:basic.PartitionGraph.java

License:Apache License

/**
 * Runs this tool./*  w  w  w .  j  av  a  2s .  c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions")
            .create(NUM_PARTITIONS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(NUM_PARTITIONS)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inPath = cmdline.getOptionValue(INPUT);
    String outPath = cmdline.getOptionValue(OUTPUT);
    int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS));
    boolean useRange = cmdline.hasOption(RANGE);

    LOG.info("Tool name: " + PartitionGraph.class.getSimpleName());
    LOG.info(" - input dir: " + inPath);
    LOG.info(" - output dir: " + outPath);
    LOG.info(" - num partitions: " + numParts);
    LOG.info(" - node cnt: " + nodeCount);
    LOG.info(" - use range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.setInt("NodeCount", nodeCount);

    Job job = Job.getInstance(conf);
    job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath);
    job.setJarByClass(PartitionGraph.class);

    job.setNumReduceTasks(numParts);

    FileInputFormat.setInputPaths(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    FileSystem.get(conf).delete(new Path(outPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

@SuppressWarnings("rawtypes")
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {
    Job job = new Job(new Configuration());

    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from ww w.j  a  v a  2 s.  co  m
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}