Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:edu.indiana.soic.ts.mapreduce.BulkDataLoader.java

License:Apache License

public static Job configureInsertAllJob(Configuration configuration, TSConfiguration tsConfiguration)
        throws IOException {
    Job job = new Job(configuration, "Bulk Import data");
    job.setJarByClass(InsertAllMapper.class);

    job.setMapperClass(InsertAllMapper.class);
    TableMapReduceUtil.initTableReducerJob(Constants.STOCK_TABLE_NAME, InsertReducer.class, job);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(tsConfiguration.getInputDir()));
    FileOutputFormat.setOutputPath(job, new Path(Constants.HDFS_OUTPUT_PATH));
    return job;/*from   w w  w  .j  a v a2 s .  co  m*/
}

From source file:edu.indiana.soic.ts.mapreduce.DateLoader.java

License:Apache License

public static Job configureInsertAllJob(Configuration configuration, TSConfiguration tsConfiguration)
        throws IOException {
    Job job = new Job(configuration, "HBase Date Table");
    job.setJarByClass(InsertDateMapper.class);

    job.setMapperClass(InsertDateMapper.class);
    TableMapReduceUtil.initTableReducerJob(Constants.STOCK_DATES_TABLE, InsertDateReducer.class, job);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(tsConfiguration.getInputDir()));
    FileOutputFormat.setOutputPath(job, new Path(Constants.HDFS_OUTPUT_PATH));
    return job;// w  w  w  .j a  v a2 s. c  o  m
}

From source file:edu.isi.mavuno.app.ie.ExtractRelations.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String typesPath = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.TypesPath", conf);
    String primaryTypes = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.PrimaryTypes", conf);
    String patternsPath = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.PatternsPath", conf);
    String instancesPath = MavunoUtils.getOptionalParam("Mavuno.ExtractRelations.InstancesPath", conf);
    String plaintextPath = MavunoUtils.getOptionalParam("Mavuno.ExtractRelations.PlaintextPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.CorpusPath", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.ExtractorClass", conf);
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.ExtractorArgs", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractRelations.OutputPath", conf);

    sLogger.info("Tool name: ExtractRelations");
    sLogger.info(" - Types path: " + typesPath);
    sLogger.info(" - Primary types: " + primaryTypes);
    sLogger.info(" - Patterns path: " + patternsPath);
    if (instancesPath != null) {
        sLogger.info(" - Instances path: " + instancesPath);
    }//from  w  w w . j  a  va  2 s.  c  o m
    if (plaintextPath != null) {
        sLogger.info(" - Plaintext path: " + plaintextPath);
    }
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor arguments: " + extractorArgs);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("ExtractRelations");

    FileInputFormat.addInputPath(job, new Path(corpusPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);
    return 0;
}

From source file:edu.isi.mavuno.app.ie.HarvestSAPInstances.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.CorpusClass", conf);
    int minMatches = Integer
            .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.MinMatches", conf));
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSAPInstances.OutputPath", conf);

    sLogger.info("Tool name: HarvestSAPInstances");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Minimum matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestSAPInstances");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);//from  w  ww  . ja  v a 2 s.  c  o m
    return 0;
}

From source file:edu.isi.mavuno.app.ie.HarvestUDAPInstances.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestUDAPInstances.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestUDAPInstances.CorpusClass", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestUDAPInstances.OutputPath", conf);

    sLogger.info("Tool name: HarvestUDAPInstances");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestUDAPInstances");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);/*from   w w  w  . j a va 2 s . c o  m*/
    return 0;
}

From source file:edu.isi.mavuno.app.mine.HarvestContextPatternPairs.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.ExtractorClass",
            conf);//from w  w w  . j a  v a2  s  .co  m
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.ExtractorArgs",
            conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.MinMatches", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestContextPatternPairs.OutputPath", conf);

    sLogger.info("Tool name: HarvestContextPatternPairs");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestContextPatternPairs");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    job.setMapOutputKeyClass(ContextPatternWritable.class);
    job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
    job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(ContextPatternWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.isi.mavuno.app.mine.HarvestParaphraseCandidates.java

License:Apache License

public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.CorpusClass", conf);
    String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.ExtractorClass",
            conf);//from ww w  . ja v a2 s . co m
    String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.ExtractorArgs",
            conf);
    String numResults = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.NumResults", conf);
    String minMatches = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.MinMatches", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParaphraseCandidates.OutputPath", conf);

    MavunoUtils.createDirectory(conf, outputPath);

    sLogger.info("Tool name: HarvestParaphraseCandidates");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Extractor class: " + extractorClass);
    sLogger.info(" - Extractor args: " + extractorArgs);
    sLogger.info(" - Min matches: " + minMatches);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestParaphraseCandidates");

    // harvest all (context, pattern) triples
    conf.set("Mavuno.HarvestContextPatternPairs.CorpusPath", corpusPath);
    conf.set("Mavuno.HarvestContextPatternPairs.CorpusClass", corpusClass);
    conf.set("Mavuno.HarvestContextPatternPairs.ExtractorClass", extractorClass);
    conf.set("Mavuno.HarvestContextPatternPairs.ExtractorArgs", extractorArgs);
    conf.set("Mavuno.HarvestContextPatternPairs.MinMatches", minMatches);
    conf.set("Mavuno.HarvestContextPatternPairs.OutputPath", outputPath + "/triples");
    new HarvestContextPatternPairs(conf).run();

    FileInputFormat.addInputPath(job, new Path(outputPath + "/triples"));
    FileOutputFormat.setOutputPath(job, new Path(outputPath + "/patterns-all"));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(ContextPatternWritable.class);
    job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
    job.setPartitionerClass(ContextPatternWritable.IdContextPartitioner.class);
    job.setMapOutputValueClass(TextLongPairWritable.class);

    job.setOutputKeyClass(ContextPatternWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);

    // combine scores
    //      conf.set("Mavuno.CombineScores.InputPath", outputPath + "/patterns-all");
    //      conf.set("Mavuno.CombineScores.OutputPath", outputPath + "/patterns");
    //      new CombineScores(conf).run();
    //            
    // only retain the top paraphrases
    conf.set("Mavuno.GetTopResults.InputPath", outputPath + "/patterns-all");
    conf.set("Mavuno.GetTopResults.OutputPath", outputPath + "/top-k");
    conf.set("Mavuno.GetTopResults.NumResults", numResults);
    conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", false);
    new GetTopResults(conf).run();

    MavunoUtils.removeDirectory(conf, outputPath + "/patterns-all");

    return 0;
}

From source file:edu.isi.mavuno.app.mine.HarvestSentences.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String patternPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.PatternPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.CorpusClass", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.OutputPath", conf);

    sLogger.info("Tool name: HarvestSentences");
    sLogger.info(" - Pattern file: " + patternPath);
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestSentences");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setNumReduceTasks(0);// www.j a  v  a  2s  . co  m

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.isi.mavuno.app.nlp.HarvestParseGraph.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParseGraph.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestParseGraph.CorpusClass", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestParseGraph.OutputPath", conf);

    sLogger.info("Tool name: HarvestParseGraph");
    sLogger.info(" - Corpus path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Output path: " + outputPath);

    Job job = new Job(conf);
    job.setJobName("HarvestParseGraph");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.waitForCompletion(true);//from w  w w  . j  a  v a2  s.  c  om

    return 0;
}

From source file:edu.isi.mavuno.app.nlp.ProcessStanfordNLP.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    // required parameters
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.CorpusPath", conf);
    String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.CorpusClass", conf);
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.OutputPath", conf);

    // optional parameters
    String suTime = MavunoUtils.getOptionalParam("Mavuno.ProcessStanfordNLP.UseSUTime", conf);
    String textOutput = MavunoUtils.getOptionalParam("Mavuno.ProcessStanfordNLP.TextOutputFormat", conf);

    sLogger.info("Tool name: ProcessStanfordNLP");
    sLogger.info(" - Input path: " + corpusPath);
    sLogger.info(" - Corpus class: " + corpusClass);
    sLogger.info(" - Output path: " + outputPath);

    if (suTime != null && Boolean.parseBoolean(suTime)) {
        sLogger.info("- SUTime enabled");
    }/*from  w  w  w . j  a va 2  s  .c  om*/

    boolean textOutputFormat = false;
    if (textOutput != null && Boolean.parseBoolean(textOutput)) {
        sLogger.info("- Text output format enabled");
        textOutputFormat = true;
    }

    Job job = new Job(conf);
    job.setJobName("ProcessStanfordNLP");

    MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));

    // output format -- either plain text or sequencefile (default)
    if (textOutputFormat) {
        job.setOutputFormatClass(TextOutputFormat.class);
    } else {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        FileOutputFormat.setCompressOutput(job, true);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    }

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StanfordParsedDocument.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StanfordParsedDocument.class);

    job.setMapperClass(MyMapper.class);

    job.setJarByClass(ProcessStanfordNLP.class);

    // no reducers needed
    job.setNumReduceTasks(0);

    // run job
    job.waitForCompletion(true);

    // print job statistics
    Counters counters = job.getCounters();
    sLogger.info(" - Total documents: " + counters.findCounter(MyCounters.TOTAL_DOCUMENTS).getValue());
    sLogger.info(" - Total sentences: " + counters.findCounter(MyCounters.TOTAL_SENTENCES).getValue());
    sLogger.info(" - Total tokens: " + counters.findCounter(MyCounters.TOTAL_TOKENS).getValue());

    return 0;
}