Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:com.liveramp.hank.cascading.DomainBuilderTap.java

License:Apache License

public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) {
    super.sinkConfInit(process, conf);
    // Output Format
    conf.setOutputFormat(this.outputFormatClass);
    // Output Committer
    conf.setOutputCommitter(DomainBuilderOutputCommitter.class);
    // Set this tap's Domain name locally in the conf
    if (conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) {
        throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName
                + " but it was previously set to "
                + conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME));
    } else {//from   w  w w .  ja  v  a 2 s.co m
        conf.set(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName);
    }
}

From source file:com.liveramp.hank.hadoop.AbstractHadoopDomainBuilder.java

License:Apache License

private void configureJobCommon(DomainBuilderProperties properties, int versionNumber, int numPartitions,
        JobConf conf) throws IOException {
    // Hank specific configuration
    properties.setJobConfProperties(conf, versionNumber);
    // Output Committer
    conf.setOutputCommitter(DomainBuilderOutputCommitter.class);
    // Output path (set to tmp output path)
    FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber)));
    // Output format
    conf.setOutputFormat(properties.getOutputFormatClass());
    // Num reduce tasks
    conf.setNumReduceTasks(numPartitions);
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];//from ww  w .j  ava 2 s  .c  om
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<JobConf> process,
        Tap<JobConf, RecordReader<AvroWrapper<Record>, Writable>, OutputCollector<AvroWrapper<Record>, Writable>> tap,
        JobConf conf) {
    conf.set(AvroJob.OUTPUT_SCHEMA, dataSchema.toString());
    conf.setOutputFormat(AvroOutputFormat.class);
    conf.setOutputKeyClass(AvroWrapper.class);

    // set compression
    AvroOutputFormat.setDeflateLevel(conf, 6);
    AvroJob.setOutputCodec(conf, DataFileConstants.DEFLATE_CODEC);
    AvroOutputFormat.setSyncInterval(conf, 1048576);
}

From source file:com.mh2c.WikipediaDumpLoaderDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP dump loader");

    // Set the mapper class, but skip the reduce phase
    conf.setMapperClass(WikipediaDumpLoaderMapper.class);
    conf.setNumReduceTasks(0);//from   ww  w .ja va  2s . co m
    // The object key/value pairs are text
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Stream XML into the job
    conf.setInputFormat(StreamInputFormat.class);
    StreamInputFormat.addInputPath(conf, new Path(args[0]));
    // Use the XML record reader, with each page as one record
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");
    // Emit sequence files
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.mh2c.WikipediaWordCountDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP word count");

    // Set the mapper and reducer classes, and use the reducer as a combiner
    conf.setMapperClass(WikipediaWordCountMapper.class);
    conf.setReducerClass(WikipediaWordCountReducer.class);
    conf.setCombinerClass(WikipediaWordCountReducer.class);
    // The object key/value pairs are text words and integer counts
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // Read in sequence files
    conf.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(conf, new Path(args[0]));
    // Emit ordinary text files
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/* w w w.  j a  va2 s  . com*/
    return 0;
}

From source file:com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfigV2.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    final JobConf job = new JobConf(conf);
    job.setReducerClass(TreasuryYieldReducerV2.class);
    job.setMapperClass(TreasuryYieldMapperV2.class);
    job.setOutputFormat(MongoOutputFormat.class);
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setMapOutputKeyClass(MongoConfigUtil.getMapperOutputKey(conf));
    job.setMapOutputValueClass(MongoConfigUtil.getMapperOutputValue(conf));
    job.setInputFormat(MongoInputFormat.class);
    JobClient.runJob(job);/*from ww  w.  ja v a  2s .c  o m*/
    return 0;
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**//from w ww .  j  av a2  s . com
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}

From source file:com.mycompany.mavenproject1.App.java

public static void main(String[] args) throws IOException {

    // give time to attach debugger
    try {/*from   w  w  w .j  a  v a  2 s.c om*/
        Thread.sleep(8000);
    } catch (InterruptedException ex) {
        Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
    }

    JobConf conf = new JobConf(App.class);

    // purge existing output file
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(args[1]), true); // delete file, true for recursive 

    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(WholeFileInputFormat.class);
    // conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
}

From source file:com.mycompany.MyHadoopSamples1.TransposeJob.java

License:Apache License

public static Configuration buildTransposeJobConf(Configuration initialConf, Path matrixInputPath,
        Path matrixOutputPath, int numInputRows) throws IOException {
    JobConf conf = new JobConf(initialConf, TransposeJob.class);
    conf.setJobName("TransposeJob: " + matrixInputPath + " transpose -> " + matrixOutputPath);
    FileSystem fs = FileSystem.get(conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    conf.setInt(NUM_ROWS_KEY, numInputRows);

    FileInputFormat.addInputPath(conf, matrixInputPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(conf, matrixOutputPath);
    System.out.println("OUTPUT --> " + matrixOutputPath.toString());
    conf.setMapperClass(TransposeMapper.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(VectorWritable.class);
    conf.setCombinerClass(MergeVectorsCombiner.class);
    conf.setReducerClass(MergeVectorsReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(VectorWritable.class);
    return conf;//from  w  w  w  .ja  v  a  2 s  .com
}