Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);//from  w  w w. j  a va 2  s  . c om

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}

From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    int numberOfReducers = 1;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 1;//from   w  ww . j a  va 2 s  .  c o  m
        }
    }

    System.out.println("Kissmetrics Json Schema Extrator");

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToSchemaDriver.class);
    job.setJobName("Kissmetrics Json Schema Extrator");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.class);
    job.setReducerClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.jyz.study.hadoop.hbase.mapreduce.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//from   w  ww  .jav a2  s .c o  m
 * <ul>
 * <li>Inspects the table to configure a total order partitioner</li>
 * <li>Uploads the partitions file to the cluster and adds it to the
 * DistributedCache</li>
 * <li>Sets the number of reduce tasks to match the current number of
 * regions</li>
 * <li>Sets the output key/value class to match HFileOutputFormat's
 * requirements</li>
 * <li>Sets the reducer up to perform the appropriate sorting (either
 * KeyValueSortReducer or PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either
 * KeyValue or Put before running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table,
        Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(hfileOutputFormatBase);

    // Based on the configured map output class, set the correct reducer to
    // properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceMultipleInputs.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: TestMapreduceMultipleInputs <in1> <in2> <out>");
        System.exit(2);/* w  ww.j a  v a 2s .c  o  m*/
    }
    Job job = new Job(conf, "TestMapreduceMultipleInputs");
    job.setJarByClass(TestMapreduceMultipleInputs.class);// ?
    // job.setMapperClass(Mapper1.class);
    // job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, Mapper1.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), SequenceFileInputFormat.class, Mapper2.class);

    //FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceSequenceInputFormat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TestMapreduceSequenceInputFormat <in> <out>");
        System.exit(2);// w  w w. ja v a  2  s  . c o m
    }
    Job job = new Job(conf, "TestMapreduceSequenceInputFormat");
    job.setJarByClass(TestMapreduceSequenceInputFormat.class);//?
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class); // SequenceFileInputFormat
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceTextInputFormat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TestMapreduceTextInputFormat <in> <out>");
        System.exit(2);/*from  ww w  .j av  a 2s.  c  o  m*/
    }
    Job job = new Job(conf, "TestMapreduceTextInputFormat");
    job.setJarByClass(TestMapreduceTextInputFormat.class);//?
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (log.isDebugEnabled()) {
        log.debug("run({})", Utils.toString(args));
    }/* w w  w .  ja  v  a 2  s .c o m*/

    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName("Freebase2RDFDriver");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(Freebase2RDFMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(Freebase2RDFReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.kit.hadoop.example.WordCount1.java

License:Apache License

public static void main(String[] args) throws Exception {

    //     File jarFile = WordCount1.createTempJar( "bin" );
    ////from w  ww. j a  v a  2 s.co  m
    //     WordCount1.addClasspath( "/usr/hadoop/conf" );
    //
    //     ClassLoader classLoader = WordCount1.getClassLoader();
    //
    //     Thread.currentThread().setContextClassLoader(classLoader);
    //
    //
    //     ((JobConf) job.getConfiguration()).setJar(jarFile.toString()); 

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //    if (otherArgs.length < 2) {
    //      System.err.println("Usage: wordcount <in> [<in>...] <out>");
    //      System.exit(2);
    //    }
    Job job = new Job(conf, "wordcount1_local");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //    for (int i = 0; i < otherArgs.length - 1; ++i) {
    //      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    //    }
    //    FileOutputFormat.setOutputPath(job,
    //      new Path(otherArgs[otherArgs.length - 1]));

    FileInputFormat.addInputPath(job, new Path("hdfs://172.16.19.158:8020/guohan"));

    FileOutputFormat.setOutputPath(job, new Path("hdfs://172.16.19.158:8020/guohan/output2"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    //    D:\kit\hadoop-2.6.0.tar\hadoop-2.6.0
}

From source file:com.kk.hadoop.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/* www .j a va 2s. co  m*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(2);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.knewton.mapreduce.example.SSTableMRExample.java

License:Apache License

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException, ParseException {

    long startTime = System.currentTimeMillis();
    Options options = buildOptions();/*w ww  . ja  v  a 2s .c om*/

    CommandLineParser cliParser = new BasicParser();
    CommandLine cli = cliParser.parse(options, args);
    if (cli.getArgs().length < 2 || cli.hasOption('h')) {
        printUsage(options);
    }
    Job job = getJobConf(cli);

    job.setJarByClass(SSTableMRExample.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(StudentEventWritable.class);

    job.setMapperClass(StudentEventMapper.class);
    job.setReducerClass(StudentEventReducer.class);

    job.setInputFormatClass(SSTableColumnInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // input arg
    String inputPaths = cli.getArgs()[0];
    LOG.info("Setting initial input paths to {}", inputPaths);
    SSTableInputFormat.addInputPaths(job, inputPaths);
    // output arg
    FileOutputFormat.setOutputPath(job, new Path(cli.getArgs()[1]));
    if (cli.hasOption('c')) {
        LOG.info("Using compression for output.");
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        FileOutputFormat.setCompressOutput(job, true);
    }
    job.waitForCompletion(true);
    LOG.info("Total runtime: {}s", (System.currentTimeMillis() - startTime) / 1000);
}