Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Creates a JobConf for a Job that will sum up the TrackStatistics per track.
     * //from  w ww  . j a va2  s .c o  m
     * @param inputDir The path to the folder containing the raw input data files.
     * @return The sum JobConf.
     */
    private JobConf getSumJobConf(Path inputDir) {
        log.info("Creating configuration for sum job");
        // output results to a temporary intermediate folder, this will get deleted by start() method
        Path playsOutput = new Path("sum");

        JobConf conf = new JobConf(TrackStatisticsProgram.class);
        conf.setOutputKeyClass(IntWritable.class); // track id
        conf.setOutputValueClass(TrackStats.class); // statistics for a track
        conf.setInputFormat(TextInputFormat.class); // raw listening data
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setMapperClass(SumMapper.class);
        conf.setCombinerClass(SumReducer.class);
        conf.setReducerClass(SumReducer.class);

        FileInputFormat.addInputPath(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, playsOutput);
        conf.setJobName("sum");
        return conf;
    }

From source file:crunch.MaxTemperature.java

License:Apache License

/**
     * Creates a JobConf for a Job that will merge the unique listeners and track statistics.
     * /* w w  w  .  j  a va2 s . c o m*/
     * @param outputPath The path for the results to be output to.
     * @param sumInputDir The path containing the data from the sum Job.
     * @param listenersInputDir The path containing the data from the unique listeners job.
     * @return The merge JobConf.
     */
    private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) {
        log.info("Creating configuration for merge job");
        JobConf conf = new JobConf(TrackStatisticsProgram.class);
        conf.setOutputKeyClass(IntWritable.class); // track id
        conf.setOutputValueClass(TrackStats.class); // overall track statistics
        conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here
        conf.setReducerClass(SumReducer.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileOutputFormat.setOutputPath(conf, outputPath);

        MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class);
        MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class,
                MergeListenersMapper.class);
        conf.setJobName("merge");
        return conf;
    }

From source file:de.tudarmstadt.lt.n2n.hadoop.FilterByCountJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), FilterByCountJob.class);
    conf.setJobName(FilterByCountJob.class.getSimpleName());

    conf.setMapperClass(FilterByCountMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    JobClient.runJob(conf);/*from  w w  w. ja  v a  2 s.com*/
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.FilterByVocabularyJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), FilterByVocabularyJob.class);
    conf.setJobName(FilterByVocabularyJob.class.getSimpleName());

    conf.setMapperClass(FilterByVocabularyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    String word_list_file = conf.get(SHARED_CONSTANTS.PARAM_WORD_LIST);
    if (word_list_file == null)
        throw new MissingArgumentException(
                "Please specify word list with '-Dnlkg.filterbywordsfile=<path-to-file-in-hdfs>'.");

    DistributedCache.addFileToClassPath(new Path(word_list_file), conf);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from  w w w.j a  v a2 s. c  o  m*/
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.FlipJoBims.java

License:Apache License

public static void main(String[] args) throws Exception {

    JobConf conf = new JobConf(FlipJoBims.class);

    /* begin necessary for UKP cluster */
    conf.setMemoryForMapTask(1000L); // 1 GB /* necessary for UKP cdh3 */
    conf.setMemoryForReduceTask(1000L); // 1 GB /* necessary for UKP cdh3 */
    FileOutputFormat.setCompressOutput(conf, true); // compress output
    FileOutputFormat.setOutputCompressorClass(conf,
            org.apache.hadoop.io.compress.BZip2Codec.class); /* use the bzip2 codec for compression */
    conf.setCompressMapOutput(true); // compress mapper output
    /* end necessary for UKP cluster */

    conf.setJobName(FlipJoBims.class.getSimpleName());
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(FlipJoBims.Map.class);
    conf.setNumReduceTasks(0);/*from w w w .ja  va2s. c  o m*/
    // conf.setReducerClass(IdentityReducer.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setOutputKeyClass(Text.class);

    conf.setMapOutputValueClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:de.tudarmstadt.lt.n2n.hadoop.GoogleSyntacticsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob.class);
    conf.setJobName(GoogleSyntacticsJob.class.getSimpleName());

    conf.setMapperClass(GoogleSyntacticsJob3Mapper.class);
    conf.setReducerClass(GoogleSyntacticsJob3Reducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(JoBimFormat.class);
    conf.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from   w  w  w  . j  a v  a  2s  . co  m

    String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
    for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
        DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf);

    JobClient.runJob(conf);
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob2.class);
    conf.setJobName(GoogleSyntacticsJob2.class.getSimpleName());

    conf.setMapperClass(GoogleSyntacticsJob2Mapper.class);
    conf.setReducerClass(GoogleSyntacticsJob2Reducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // conf.setMapOutputKeyClass(Text.class);
    // conf.setMapOutputValueClass(NullWritable.class);

    conf.setOutputKeyClass(JoBimFormat.class);
    conf.setOutputValueClass(IntWritable.class);

    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from  w  ww  .  j  a v  a  2  s  .  co  m

    String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
    for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
        DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf);

    JobClient.runJob(conf);
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }/*from   ww w.j  a va2s.  c  om*/
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setMapperClass(JoBimMapper.class);
    job.setReducerClass(JoBimReducer.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMemoryForMapTask(4096);
    job.setMemoryForReduceTask(4096);
    job.set("mapred.child.java.opts", "-Xmx4096m");
    job.setNumReduceTasks(1); // reset to default
}

From source file:de.tudarmstadt.lt.n2n.hadoop.RelationToOneHoleTransformerJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), RelationToOneHoleTransformerJob.class);

    conf.setJobName(RelationToOneHoleTransformerJob.class.getSimpleName());
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(RelationToOneHoleTransformerJob.Map.class);
    conf.setNumReduceTasks(0);//from  ww w.  j a v  a  2 s  .c  o  m
    // conf.setReducerClass(IdentityReducer.class); // sort or no sort? - that is here the question

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    JobClient.runJob(conf);

    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.RemoveExactDuplicatesJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), RemoveExactDuplicatesJob.class);
    conf.setJobName(RemoveExactDuplicatesJob.class.getSimpleName());

    conf.setMapperClass(LineMapper.class);
    conf.setReducerClass(KeyReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(NullWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    JobClient.runJob(conf);//from   w w w . jav a 2s.c  o  m
    return 0;
}