Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.vrecord.VrecordIngressMR.java

License:Open Source License

public void run(int numProcs, String inputpath, String outputpath) throws IOException {

    JobConf conf = new JobConf(VrecordIngressMR.class);
    conf.setJobName("Vrecord Mapreduce");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(VrecordIngressMapper.class);
    conf.setReducerClass(VrecordIngressReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(MultiDirOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    if (gzip) {/* ww w  .j  a  va 2  s. com*/
        TextOutputFormat.setCompressOutput(conf, true);
        TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
    }

    LOG.info("====== Job: Distributed Vertex Records to partitions =========");
    LOG.info("input: " + inputpath);
    LOG.info("output: " + outputpath);
    LOG.info("numProc = " + numProcs);
    LOG.info("gzip = " + Boolean.toString(gzip));
    LOG.info("==============================================================");

    JobClient.runJob(conf);
    LOG.info("==========================Done===============================");
}

From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];//from  ww  w. ja  v  a  2 s  . c o m
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

private static JobConf createJobConf(Configuration conf) {
    JobConf jobconf = new JobConf(conf, DistExec.class);
    jobconf.setJobName(NAME);//from   w w w  .j av a 2 s.c om

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobconf.setMapSpeculativeExecution(false);

    jobconf.setInputFormat(ExecInputFormat.class);
    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(Text.class);

    jobconf.setMapperClass(ExecFilesMapper.class);
    jobconf.setNumReduceTasks(0);
    // TODO implement singleOut by setting single reducer and prepending file name to output
    return jobconf;
}

From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java

License:Apache License

@Override
protected void configureJob(JobConf conf) {
    // Input specification
    conf.setInputFormat(inputFormatClass);
    FileInputFormat.setInputPaths(conf, inputPath);
    // Mapper class and key/value classes
    conf.setMapperClass(mapperClass);//  www .j  a  v  a  2  s.co  m
    conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class);
    conf.setMapOutputValueClass(ValueWritable.class);
    // Reducer class and key/value classes
    conf.setReducerClass(DomainBuilderReducer.class);
    conf.setOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setOutputValueClass(ValueWritable.class);
    // Partitioner
    conf.setPartitionerClass(DomainBuilderPartitioner.class);
}

From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java

License:Apache License

@Override
protected void configureJob(JobConf conf) {
    // Input format
    conf.setInputFormat(HadoopDomainCompactorInputFormat.class);

    // Mappers//from w w  w .  ja  va2 s  .  co  m
    conf.setMapperClass(HadoopDomainCompactorMapper.class);
    conf.setMapOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setMapOutputValueClass(ValueWritable.class);

    // No reducers
    conf.setNumReduceTasks(0);

    // Output
    conf.setOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setOutputValueClass(ValueWritable.class);
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];/*from  ww w.java 2  s .  com*/
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.mh2c.WikipediaDumpLoaderDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP dump loader");

    // Set the mapper class, but skip the reduce phase
    conf.setMapperClass(WikipediaDumpLoaderMapper.class);
    conf.setNumReduceTasks(0);// w  w  w  .ja  v a 2  s .c  o  m
    // The object key/value pairs are text
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Stream XML into the job
    conf.setInputFormat(StreamInputFormat.class);
    StreamInputFormat.addInputPath(conf, new Path(args[0]));
    // Use the XML record reader, with each page as one record
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");
    // Emit sequence files
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.mh2c.WikipediaWordCountDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP word count");

    // Set the mapper and reducer classes, and use the reducer as a combiner
    conf.setMapperClass(WikipediaWordCountMapper.class);
    conf.setReducerClass(WikipediaWordCountReducer.class);
    conf.setCombinerClass(WikipediaWordCountReducer.class);
    // The object key/value pairs are text words and integer counts
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // Read in sequence files
    conf.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(conf, new Path(args[0]));
    // Emit ordinary text files
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w w w .jav a 2s  .co  m*/
    return 0;
}

From source file:com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfigV2.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    final JobConf job = new JobConf(conf);
    job.setReducerClass(TreasuryYieldReducerV2.class);
    job.setMapperClass(TreasuryYieldMapperV2.class);
    job.setOutputFormat(MongoOutputFormat.class);
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setMapOutputKeyClass(MongoConfigUtil.getMapperOutputKey(conf));
    job.setMapOutputValueClass(MongoConfigUtil.getMapperOutputValue(conf));
    job.setInputFormat(MongoInputFormat.class);
    JobClient.runJob(job);//from  w  w  w .jav  a  2s.co  m
    return 0;
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**// w ww. j a  v a 2 s . c o m
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}