Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java

License:Apache License

/**
 * Maps from (targetID, (anchor, count)) to (anchor, (targetID, count)).
 *
 * @param inputPath// w  w  w. j av  a 2s  .  c  om
 * @param outputPath
 * @throws IOException
 */
private void task4(String inputPath, String outputPath) throws IOException {
    LOG.info("Extracting anchor text (phase 4)...");
    LOG.info(" - input:   " + inputPath);
    LOG.info(" - output:  " + outputPath);

    JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
    conf.setJobName(
            String.format("ExtractWikipediaAnchorText:phase4[input: %s, output: %s]", inputPath, outputPath));

    conf.setNumReduceTasks(1);

    //FileInputFormat.addInputPath(conf, new Path(inputPath + "/part-00000/data"));
    FileInputFormat.addInputPath(conf, new Path(inputPath + "/part-*/data"));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(MapFileOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(HMapSIW.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(HMapSIW.class);

    conf.setMapperClass(MyMapper4.class);
    conf.setReducerClass(MyReducer4.class);

    JobClient.runJob(conf);
}

From source file:com.zfylin.demo.bigdata.hadoop.mr.WordCount2.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.setProperty("HADOOP_USER_NAME", "hdfs");

    //?     ???hadoop?
    String input = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student";
    /**//from  www . j  a v a 2s.  c o  m
     * HDFSout
     * ???
     */
    String output = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student/output/";

    JobConf conf = new JobConf(WordCount2.class);
    /**
     * ERROR: Exception message: /bin/bash: line 0: fg: no job control
       */
    conf.set("mapreduce.app-submission.cross-platform", "true");

    conf.setJobName("WordCount");
    //        conf.addResource("classpath:/hadoop/core-site.xml");
    //        conf.addResource("classpath:/hadoop/hdfs-site.xml");
    //        conf.addResource("classpath:/hadoop/mapred-site.xml");
    //??
    conf.setOutputKeyClass(Text.class);
    //?? int
    conf.setOutputValueClass(IntWritable.class);
    //mapper
    conf.setMapperClass(WordCountMapper.class);
    /**
     * ??Reducer
     * ???mapreduce??
     * ????
     * ????
     * ?
     * ???
     * ?????
     * ?
     */
    conf.setCombinerClass(WordCountReducer.class);
    //reduce
    conf.setReducerClass(WordCountReducer.class);
    /**
     * ?TextInputFormat?
     * ????
     * LongWritable????
     * Text
     */
    conf.setInputFormat(TextInputFormat.class);
    /**
     * ?TextOutpuTFormat?
     * ????toString()
     * 
     */
    conf.setOutputFormat(TextOutputFormat.class);
    //?
    FileInputFormat.setInputPaths(conf, new Path(input));
    //???
    FileOutputFormat.setOutputPath(conf, new Path(output));
    //?mapreduce
    JobClient.runJob(conf);
    System.exit(0);
}

From source file:com.zjy.mongo.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**//from w  w w . j a  v  a  2s.co  m
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    if (LOG.isDebugEnabled()) {
        LOG.debug("Mapper Class: " + mapper);
        LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    }
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}

From source file:combiner.CombinerDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(combiner.CombinerDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(combiner.CombinerMapper.class);
    conf.setCombinerClass(combiner.CombinerReducer.class);
    conf.setReducerClass(combiner.CombinerReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);/*w ww  .ja v a  2s .c o m*/
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:contrail.stages.GraphToFasta.java

License:Open Source License

@Override
public RunningJob runJob() throws Exception {
    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");

    sLogger.info(" - inputpath: " + inputPath);
    sLogger.info(" - outputpath: " + outputPath);

    JobConf conf = new JobConf(GraphToFasta.class);

    AvroJob.setInputSchema(conf, GraphNodeData.SCHEMA$);

    initializeJobConfiguration(conf);/*from   w  w  w  .java2s.c o  m*/

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    AvroInputFormat<GraphNodeData> input_format = new AvroInputFormat<GraphNodeData>();
    conf.setInputFormat(input_format.getClass());
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    // Make it mapper only.
    conf.setNumReduceTasks(0);
    conf.setMapperClass(GraphToFastqMapper.class);

    if (stage_options.containsKey("writeconfig")) {
        writeJobConfig(conf);
    } else {
        // Delete the output directory if it exists already
        Path out_path = new Path(outputPath);
        if (FileSystem.get(conf).exists(out_path)) {
            // TODO(jlewi): We should only delete an existing directory
            // if explicitly told to do so.
            sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists.");
            FileSystem.get(conf).delete(out_path, true);
        }

        long starttime = System.currentTimeMillis();
        RunningJob result = JobClient.runJob(conf);
        long endtime = System.currentTimeMillis();

        float diff = (float) ((endtime - starttime) / 1000.0);

        System.out.println("Runtime: " + diff + " s");
        return result;
    }
    return null;
}

From source file:Corrector.Correction.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Correction [0/7]");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(Correction.class);
    conf.setJobName("Correction " + inputPath + " " + Config.K);

    Config.initializeConfiguration(conf);

    FileInputFormat.addInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    //conf.setBoolean("mapred.output.compress", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(CorrectionMapper.class);
    conf.setReducerClass(CorrectionReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.FindError.java

License:Apache License

public RunningJob run(String inputPath, String outputPath, int idx, String hkmerlist) throws Exception {
    sLogger.info("Tool name: FindError");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(FindError.class);
    conf.setJobName("FindError " + inputPath + " " + Config.K);
    conf.setLong("IDX", idx);
    //\\//from  w  w  w .  j a  v a2 s.c o m
    DistributedCache.addCacheFile(new URI(hkmerlist), conf);
    //\\

    Config.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(FindErrorMapper.class);
    conf.setReducerClass(FindErrorReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.Graph2Fasta.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Graph2Fasta [7/7]");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(Graph2Fasta.class);
    conf.setJobName("Graph2Fasta " + inputPath);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Graph2FastaMapper.class);

    Config.initializeConfiguration(conf);
    conf.setNumReduceTasks(0);//from w w w . j  a  va2 s  .  co m

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.Graph2Sfa.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Graph2Sfa [0/7]");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(Graph2Sfa.class);
    conf.setJobName("Graph2Sfa " + inputPath);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Graph2SfaMapper.class);

    Config.initializeConfiguration(conf);
    conf.setNumReduceTasks(0);//from ww w  .j  av a  2s  . com

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.IdentifyTrustedReads.java

License:Apache License

public RunningJob run(String inputPath, String outputPath, long kmer_threshold) throws Exception {
    sLogger.info("Tool name: IdentifyTrustedReads");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(IdentifyTrustedReads.class);
    conf.setJobName("IdentifyTrustedReads " + inputPath + " " + Config.K);
    conf.setLong("KmerThreshold", kmer_threshold);
    // conf.setLong("AllKmer", allkmer);

    Config.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(IdentifyTrustedReads.IdentifyTrustedReadsMapper.class);
    conf.setReducerClass(IdentifyTrustedReads.IdentifyTrustedReadsReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}