Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass)

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.intel.hadoop.graphbuilder.partition.mapreduce.vrecord.VrecordIngressMR.java

License:Open Source License

public void run(int numProcs, String inputpath, String outputpath) throws IOException {

    JobConf conf = new JobConf(VrecordIngressMR.class);
    conf.setJobName("Vrecord Mapreduce");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(VrecordIngressMapper.class);
    conf.setReducerClass(VrecordIngressReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(MultiDirOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    if (gzip) {/* ww w  .j  a  va 2  s. com*/
        TextOutputFormat.setCompressOutput(conf, true);
        TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
    }

    LOG.info("====== Job: Distributed Vertex Records to partitions =========");
    LOG.info("input: " + inputpath);
    LOG.info("output: " + outputpath);
    LOG.info("numProc = " + numProcs);
    LOG.info("gzip = " + Boolean.toString(gzip));
    LOG.info("==============================================================");

    JobClient.runJob(conf);
    LOG.info("==========================Done===============================");
}

From source file:com.jyz.study.hadoop.mapreduce.datajoin.DataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];//from  ww  w. ja  v  a  2 s  . c o m
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

private static JobConf createJobConf(Configuration conf) {
    JobConf jobconf = new JobConf(conf, DistExec.class);
    jobconf.setJobName(NAME);//from   w w w  .j av a 2 s.c om

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobconf.setMapSpeculativeExecution(false);

    jobconf.setInputFormat(ExecInputFormat.class);
    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(Text.class);

    jobconf.setMapperClass(ExecFilesMapper.class);
    jobconf.setNumReduceTasks(0);
    // TODO implement singleOut by setting single reducer and prepending file name to output
    return jobconf;
}

From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java

License:Apache License

@Override
protected void configureJob(JobConf conf) {
    // Input specification
    conf.setInputFormat(inputFormatClass);
    FileInputFormat.setInputPaths(conf, inputPath);
    // Mapper class and key/value classes
    conf.setMapperClass(mapperClass);//  www .j  a  v  a  2  s.co  m
    conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class);
    conf.setMapOutputValueClass(ValueWritable.class);
    // Reducer class and key/value classes
    conf.setReducerClass(DomainBuilderReducer.class);
    conf.setOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setOutputValueClass(ValueWritable.class);
    // Partitioner
    conf.setPartitionerClass(DomainBuilderPartitioner.class);
}

From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java

License:Apache License

@Override
protected void configureJob(JobConf conf) {
    // Input format
    conf.setInputFormat(HadoopDomainCompactorInputFormat.class);

    // Mappers//from w w  w .  ja  va2 s  .  co  m
    conf.setMapperClass(HadoopDomainCompactorMapper.class);
    conf.setMapOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setMapOutputValueClass(ValueWritable.class);

    // No reducers
    conf.setNumReduceTasks(0);

    // Output
    conf.setOutputKeyClass(KeyAndPartitionWritable.class);
    conf.setOutputValueClass(ValueWritable.class);
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];/*from  ww w.java 2  s .  com*/
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

From source file:com.mh2c.WikipediaDumpLoaderDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP dump loader");

    // Set the mapper class, but skip the reduce phase
    conf.setMapperClass(WikipediaDumpLoaderMapper.class);
    conf.setNumReduceTasks(0);// w  w  w  .ja  v a 2  s .c  o  m
    // The object key/value pairs are text
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Stream XML into the job
    conf.setInputFormat(StreamInputFormat.class);
    StreamInputFormat.addInputPath(conf, new Path(args[0]));
    // Use the XML record reader, with each page as one record
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");
    // Emit sequence files
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
}

From source file:com.mh2c.WikipediaWordCountDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // arg checks

    JobConf conf = new JobConf(getClass());
    conf.setJobName("WP word count");

    // Set the mapper and reducer classes, and use the reducer as a combiner
    conf.setMapperClass(WikipediaWordCountMapper.class);
    conf.setReducerClass(WikipediaWordCountReducer.class);
    conf.setCombinerClass(WikipediaWordCountReducer.class);
    // The object key/value pairs are text words and integer counts
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // Read in sequence files
    conf.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(conf, new Path(args[0]));
    // Emit ordinary text files
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w w w .jav a 2s  .co  m*/
    return 0;
}

From source file:com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfigV2.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    final JobConf job = new JobConf(conf);
    job.setReducerClass(TreasuryYieldReducerV2.class);
    job.setMapperClass(TreasuryYieldMapperV2.class);
    job.setOutputFormat(MongoOutputFormat.class);
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setMapOutputKeyClass(MongoConfigUtil.getMapperOutputKey(conf));
    job.setMapOutputValueClass(MongoConfigUtil.getMapperOutputValue(conf));
    job.setInputFormat(MongoInputFormat.class);
    JobClient.runJob(job);//from  w  w  w .jav  a  2s.co  m
    return 0;
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**// w ww. j a  v a 2 s . c o m
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}