Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:contrail.correct.InvokeFlash.java

License:Apache License

public RunningJob runJob() throws Exception {
    JobConf conf = new JobConf(InvokeFlash.class);
    conf.setJobName("Flash invocation");
    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");
    String flashPath = (String) stage_options.get("flash_binary");
    if (flashPath.length() == 0) {
        throw new Exception("Flash binary location required");
    }// w w  w.ja  va 2 s  .  c om
    DistributedCache.addCacheFile(new Path(flashPath).toUri(), conf);
    //Sets the parameters in JobConf
    initializeJobConfiguration(conf);
    AvroJob.setMapperClass(conf, RunFlashMapper.class);
    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    //Input  
    MatePair read = new MatePair();
    AvroJob.setInputSchema(conf, read.getSchema());
    AvroJob.setOutputSchema(conf, new fastqrecord().getSchema());

    //Map Only Job
    conf.setNumReduceTasks(0);

    // Delete the output directory if it exists already
    Path out_path = new Path(outputPath);
    if (FileSystem.get(conf).exists(out_path)) {
        FileSystem.get(conf).delete(out_path, true);
    }

    long starttime = System.currentTimeMillis();
    RunningJob result = JobClient.runJob(conf);
    long endtime = System.currentTimeMillis();
    float diff = (float) (((float) (endtime - starttime)) / 1000.0);
    System.out.println("Runtime: " + diff + " s");
    return result;
}

From source file:contrail.correct.InvokeQuakeForMatePairs.java

License:Apache License

public RunningJob runJob() throws Exception {
    JobConf conf = new JobConf(InvokeQuakeForMatePairs.class);
    conf.setJobName("Quake Paired invocation");
    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");
    String quakePath = (String) stage_options.get("quake_binary");
    String bithashPath = (String) stage_options.get("bithashpath");
    //User wants to run quake
    if (quakePath.length() != 0) {
        DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf);
    } else {/*from w w  w  .j  a  v a2  s  .  c  om*/
        throw new Exception("Please specify Quake binary path");
    }
    if (bithashPath.length() != 0) {
        DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf);
    } else {
        throw new Exception("Please specify bithash path");
    }
    //Sets the parameters in JobConf
    initializeJobConfiguration(conf);
    AvroJob.setMapperClass(conf, RunQuakeMapper.class);
    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    ///Input  
    MatePair read = new MatePair();
    AvroJob.setInputSchema(conf, read.getSchema());
    AvroJob.setOutputSchema(conf, new fastqrecord().getSchema());
    //Map Only Job
    conf.setNumReduceTasks(0);
    // Delete the output directory if it exists already
    Path out_path = new Path(outputPath);
    if (FileSystem.get(conf).exists(out_path)) {
        FileSystem.get(conf).delete(out_path, true);
    }
    long starttime = System.currentTimeMillis();
    RunningJob result = JobClient.runJob(conf);
    long endtime = System.currentTimeMillis();
    float diff = (float) (((float) (endtime - starttime)) / 1000.0);
    System.out.println("Runtime: " + diff + " s");
    return result;
}

From source file:contrail.correct.InvokeQuakeForSingles.java

License:Apache License

public RunningJob runJob() throws Exception {
    JobConf conf = new JobConf(InvokeQuakeForSingles.class);
    conf.setJobName("Quake Singles invocation");
    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");
    String quakePath = (String) stage_options.get("quake_binary");
    String bithashPath = (String) stage_options.get("bithashpath");
    //User wants to run quake
    if (quakePath.length() != 0) {
        DistributedCache.addCacheFile(new Path(quakePath).toUri(), conf);
    } else {/*from  w  w  w . j  a  va 2s.co  m*/
        throw new Exception("Please specify Quake path");
    }
    if (bithashPath.length() != 0) {
        DistributedCache.addCacheFile(new Path(bithashPath).toUri(), conf);
    } else {
        throw new Exception("Please specify bithash path");
    }
    //Sets the parameters in JobConf
    initializeJobConfiguration(conf);
    AvroJob.setMapperClass(conf, RunQuakeMapper.class);
    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    ///Input  
    fastqrecord read = new fastqrecord();
    AvroJob.setInputSchema(conf, read.getSchema());
    AvroJob.setOutputSchema(conf, new fastqrecord().getSchema());
    //Map Only Job
    conf.setNumReduceTasks(0);
    // Delete the output directory if it exists already
    Path out_path = new Path(outputPath);
    if (FileSystem.get(conf).exists(out_path)) {
        FileSystem.get(conf).delete(out_path, true);
    }
    long starttime = System.currentTimeMillis();
    RunningJob result = JobClient.runJob(conf);
    long endtime = System.currentTimeMillis();
    float diff = (float) (((float) (endtime - starttime)) / 1000.0);
    System.out.println("Runtime: " + diff + " s");
    return result;
}

From source file:contrail.stages.GraphStats.java

License:Open Source License

@Override
public RunningJob runJob() throws Exception {
    String[] required_args = { "inputpath", "outputpath" };
    checkHasParametersOrDie(required_args);

    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");

    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    Configuration base_conf = getConf();
    JobConf conf = null;
    if (base_conf != null) {
        conf = new JobConf(getConf(), PairMergeAvro.class);
    } else {/*ww  w .java 2 s  . com*/
        conf = new JobConf(PairMergeAvro.class);
    }

    conf.setJobName("GraphStats " + inputPath);

    initializeJobConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    Pair<Integer, GraphStatsData> mapOutput = new Pair<Integer, GraphStatsData>(0, new GraphStatsData());

    GraphNodeData nodeData = new GraphNodeData();
    AvroJob.setInputSchema(conf, nodeData.getSchema());
    AvroJob.setMapOutputSchema(conf, mapOutput.getSchema());
    AvroJob.setOutputSchema(conf, mapOutput.value().getSchema());

    AvroJob.setMapperClass(conf, GraphStatsMapper.class);
    AvroJob.setCombinerClass(conf, GraphStatsCombiner.class);
    AvroJob.setReducerClass(conf, GraphStatsReducer.class);

    // Use a single reducer task that we accumulate all the stats in one
    // reducer.
    conf.setNumReduceTasks(1);

    if (stage_options.containsKey("writeconfig")) {
        writeJobConfig(conf);
    } else {
        // Delete the output directory if it exists already
        Path out_path = new Path(outputPath);
        if (FileSystem.get(conf).exists(out_path)) {
            // TODO(jlewi): We should only delete an existing directory
            // if explicitly told to do so.
            sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists.");
            FileSystem.get(conf).delete(out_path, true);
        }

        long starttime = System.currentTimeMillis();
        RunningJob job = JobClient.runJob(conf);
        long endtime = System.currentTimeMillis();

        float diff = (float) ((endtime - starttime) / 1000.0);
        System.out.println("Runtime: " + diff + " s");

        // Create iterators to read the output
        Iterator<GraphStatsData> binsIterator = createOutputIterator();

        // Compute the N50 stats for each bin.
        ArrayList<GraphN50StatsData> N50Stats = computeN50Stats(binsIterator);

        // Write the N50 stats to a file.
        writeN50StatsToFile(N50Stats);

        Integer topn_contigs = (Integer) stage_options.get("topn_contigs");
        if (topn_contigs > 0) {
            // Get the lengths of the n contigs.
            binsIterator = createOutputIterator();
            List<Integer> topN = topNContigs(binsIterator, topn_contigs);
            writeTopNContigs(topN);
        }
        return job;
    }
    return null;
}

From source file:contrail.stages.GraphToFasta.java

License:Open Source License

@Override
public RunningJob runJob() throws Exception {
    String inputPath = (String) stage_options.get("inputpath");
    String outputPath = (String) stage_options.get("outputpath");

    sLogger.info(" - inputpath: " + inputPath);
    sLogger.info(" - outputpath: " + outputPath);

    JobConf conf = new JobConf(GraphToFasta.class);

    AvroJob.setInputSchema(conf, GraphNodeData.SCHEMA$);

    initializeJobConfiguration(conf);//from   w ww .  j a  v a  2 s . com

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    AvroInputFormat<GraphNodeData> input_format = new AvroInputFormat<GraphNodeData>();
    conf.setInputFormat(input_format.getClass());
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    // Make it mapper only.
    conf.setNumReduceTasks(0);
    conf.setMapperClass(GraphToFastqMapper.class);

    if (stage_options.containsKey("writeconfig")) {
        writeJobConfig(conf);
    } else {
        // Delete the output directory if it exists already
        Path out_path = new Path(outputPath);
        if (FileSystem.get(conf).exists(out_path)) {
            // TODO(jlewi): We should only delete an existing directory
            // if explicitly told to do so.
            sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists.");
            FileSystem.get(conf).delete(out_path, true);
        }

        long starttime = System.currentTimeMillis();
        RunningJob result = JobClient.runJob(conf);
        long endtime = System.currentTimeMillis();

        float diff = (float) ((endtime - starttime) / 1000.0);

        System.out.println("Runtime: " + diff + " s");
        return result;
    }
    return null;
}

From source file:Corrector.Config.java

License:Apache License

public static void initializeConfiguration(JobConf conf) {
    validateConfiguration();// w w w  .  java2s  .  co m

    conf.setNumMapTasks(HADOOP_MAPPERS);
    conf.setNumReduceTasks(HADOOP_REDUCERS);
    conf.set("mapred.child.java.opts", HADOOP_JAVAOPTS);
    conf.set("mapred.task.timeout", Long.toString(HADOOP_TIMEOUT));
    conf.setLong("LOCALNODES", HADOOP_LOCALNODES);

    conf.setLong("RANDOM_PASS", RANDOM_PASS);

    conf.setLong("UP_KMER", UP_KMER);
    conf.setLong("LOW_KMER", LOW_KMER);
    conf.setLong("K", K);
    conf.setLong("READLENGTH", READLEN);

}

From source file:Corrector.Graph2Fasta.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Graph2Fasta [7/7]");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(Graph2Fasta.class);
    conf.setJobName("Graph2Fasta " + inputPath);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Graph2FastaMapper.class);

    Config.initializeConfiguration(conf);
    conf.setNumReduceTasks(0);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.Graph2Sfa.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Graph2Sfa [0/7]");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(Graph2Sfa.class);
    conf.setJobName("Graph2Sfa " + inputPath);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Graph2SfaMapper.class);

    Config.initializeConfiguration(conf);
    conf.setNumReduceTasks(0);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Corrector.PreProcessReads.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: PreProcessReads");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    JobConf conf = new JobConf(PreProcessReads.class);
    conf.setJobName("PreProcessReads " + inputPath + " " + Config.K);

    Config.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(PreProcessReadsMapper.class);
    //conf.setReducerClass(PreProcessReadsReducer.class);
    conf.setNumReduceTasks(0);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public int run(String[] args) throws Exception {

        if (args.length != 3) {
            System.err.printf("Usage: %s [generic options] <input> <output> <schema-file>\n",
                    getClass().getSimpleName());
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        }/*from   w w w  . j av a2  s.  co m*/

        String input = args[0];
        String output = args[1];
        String schemaFile = args[2];

        JobConf conf = new JobConf(getConf(), getClass());
        conf.setJobName("Avro projection");

        FileInputFormat.addInputPath(conf, new Path(input));
        FileOutputFormat.setOutputPath(conf, new Path(output));

        Schema schema = new Schema.Parser().parse(new File(schemaFile));
        AvroJob.setInputSchema(conf, schema);
        AvroJob.setMapOutputSchema(conf, schema);
        AvroJob.setOutputSchema(conf, schema);
        conf.setNumReduceTasks(0);

        JobClient.runJob(conf);
        return 0;
    }