Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:bb.BranchAndBound.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {//from  www . j  ava 2  s . c  o  m
       System.err.println("Usage: branchandbound <input> <output>");
       System.exit(2);
    }
    Job job = new Job(conf, "branch and bound");
    job.setJarByClass(BranchAndBound.class);
    job.setMapperClass(BBMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    //      job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);*/
    int n;
    String[] inputargs = new GenericOptionsParser(new Configuration(), args).getRemainingArgs();
    if (inputargs.length != 2) {
        System.err.println("Usage: branchandbound <data directory> <n>");
        System.exit(2);
    }
    n = Integer.parseInt(inputargs[1]);
    String dataDir = inputargs[0];
    String prev_output = dataDir + "/input";
    /*      for( int i = 1 ; i <= n ; i++ ) {
             for( int j = 0 ; j < 2 ; j++ ) {
    String input = prev_output ;
    String output = inputargs[1] + "/iteration" + i + "_" + j ;
    Job job = getJob(input, output, i, j) ;
    job.waitForCompletion(true) ; // if failed ????
    prev_output = output;
             }
          }
    */
    //prev_output = dataDir + "/output" + "/iteration" + 17;
    long totalNodes = 0;
    long searchedNodes = 0;
    long cutbyDEE = 0;
    int mapTotal = 768;
    for (int i = 0; i <= n; i++) {
        iterRound = i;
        String input = prev_output;
        String output = dataDir + "/output" + "/iteration" + i;
        Job job = getJob(input, output, dataDir, i);
        if (i == n) {
            numReduceTasks = 1;
        }
        //job.setNumMapTasks(200);
        if (numOutput > mapTotal) {
            FileInputFormat.setMaxInputSplitSize(job, 10 * (8 * n + 10) + numOutput * (8 * n + 10) / 3000);
            FileInputFormat.setMinInputSplitSize(job, Math.max((8 * n + 10), numOutput * (8 * n + 10) / 5000));
        } else {
            FileInputFormat.setMaxInputSplitSize(job, (8 * n + 10));
        }
        /*
        if( i == 0 ) {
        job.setNumReduceTasks(1);
        } else {
        job.setNumReduceTasks(0);
        }
         */
        job.setNumReduceTasks(0);
        job.waitForCompletion(true); // if failed ????
        prev_output = output;
        Counters counters = job.getCounters();
        Counter counter = counters.findCounter("MyCounter", "Map Output Counter");
        numOutput = counter.getValue();
        totalNodes += numOutput;
        cutbyDEE += counters.findCounter("MyCounter", "Cut By DEE").getValue();
        searchedNodes += totalNodes + cutbyDEE + counters.findCounter("MyCounter", "Cut By Bound").getValue();
        System.out.println(numOutput + " " + (8 * n + 10) + " " + (numOutput * (8 * n + 10) / 768));
    }
    System.out.println("searchedNodes " + searchedNodes);
    System.out.println(totalNodes);
    System.out.println("cut by dee " + cutbyDEE);
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir,
        String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
    int prefixSize = opt.prefixLength;

    System.out/*  w w  w.java  2s  . c om*/
            .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class,
            AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class,
            IntWritable.class, TextOutputFormat.class);

    job.setJobName(info);
    job.setJarByClass(BigFIMDriver.class);

    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);
    if (nrLines != -1) {
        conf.setLong(NUMBER_OF_LINES_KEY, nrLines);
    }

    if (cacheFile != null) {
        addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf);
    }

    runJob(job, info);

    if (prefixSize <= i
            && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) {
        return false;
    }
    if (prefixSize < i) {
        System.out.println(
                "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize);
    }
    return true;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startCreatePrefixGroups(FIMOptions opt, int phase)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) {
        String cacheFile = status.getPath().toString();
        String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length());
        trieGroupCount = trieGroupCount.split("-")[1];
        String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount;
        System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir
                + ", cache: " + cacheFile);

        Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
                ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class,
                IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

        job.setJobName("Create Prefix Groups");
        job.setJarByClass(BigFIMDriver.class);
        job.setNumReduceTasks(1);

        Configuration conf = job.getConfiguration();
        setConfigurationValues(conf, opt);
        conf.setInt(PREFIX_LENGTH_KEY, phase);

        addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration());

        runJob(job, "Prefix Creation");
    }//from   w ww. j  a  v a 2  s  . co m
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException {
    String inputFilesDir = opt.outputDir + separator + "pg" + separator;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(BigFIMDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);//from   www .  ja v a2s. co m

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given
 * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The
 * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one
 * based on ascending frequency and divides the singletons among available mappers.
 * // w ww  .  j  a v  a 2s  .co  m
 * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based
 * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution).
 * 
 * @param outputFile
 * @param opt
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void readHorizontalDb(String outputFile, FIMOptions opt)
        throws IOException, ClassNotFoundException, InterruptedException {
    System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class,
            ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class,
            IntWritable.class, Writable.class, TextOutputFormat.class);

    job.setJobName("Read Singletons");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class);
    addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class,
            IntMatrixWritable.class);

    runJob(job, "Item Reading");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs.
 * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the
 * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are
 * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin,
 * Lowest-Frequency, .../* w w  w.j ava  2 s .c  o m*/
 * 
 * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups
 * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution).
 * 
 * @param inputDir
 * @param outputDir
 * @param opt
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFile = inputDir + separator + OSingletonsDistribution + rExt;
    String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt;
    String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt;

    System.out.println("[PrefixComputation]: input: " + inputFile);

    Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class,
            PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class,
            IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class);

    job.setJobName("Compute Prefixes");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration());
    addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration());

    runJob(job, "Partition Prefixes");
}

From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of
 * closed sets. All information is reported to the reducer which finally writes the output to disk.
 * //from  ww  w .  ja  v a  2  s. c  o  m
 * 
 * @param inputDir
 * @param config
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 * @throws URISyntaxException
 */
private void startMining(String inputDir, FIMOptions opt)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {

    String inputFilesDir = inputDir;
    String outputFile = opt.outputDir + separator + OFis;
    System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile);

    Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class,
            EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class,
            TextOutputFormat.class);

    job.setJobName("Start Mining");
    job.setJarByClass(DistEclatDriver.class);
    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);

    List<Path> inputPaths = new ArrayList<Path>();

    FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*"));
    for (FileStatus fstat : listStatus) {
        inputPaths.add(fstat.getPath());
    }

    if (inputPaths.isEmpty()) {
        System.out.println("[StartMining]: No prefixes to extend further");
        return;
    }

    setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()]));

    runJob(job, "Mining");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true,
            halvadeOpts.useBamInput);//  ww w .  java  2s  .c om
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for (FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if (skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if (!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1);
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String pipeline = "";
    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        HalvadeConf.setIsPass2(halvadeConf, true);
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = RNA_PASS2;/*from   w  w w .  j a  va2 s .  c  o  m*/
    } else if (jobType == HalvadeResourceManager.DNA) {
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = DNA;
    }
    HalvadeConf.setOutDir(halvadeConf, tmpOutDir);
    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf);
    if (outFs.exists(new Path(tmpOutDir))) {
        Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);
    }
    if (halvadeOpts.useBamInput)
        setHeaderFile(halvadeOpts.in, halvadeConf);

    Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline);
    halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob);
    FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir));

    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class);
    } else if (jobType == HalvadeResourceManager.DNA) {
        halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class);
    }

    halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class);
    halvadeJob.setMapOutputValueClass(SAMRecordWritable.class);
    halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class);
    halvadeJob.setOutputKeyClass(Text.class);
    if (halvadeOpts.mergeBam) {
        halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class);
        halvadeJob.setOutputValueClass(SAMRecordWritable.class);
    } else {
        halvadeJob.setPartitionerClass(ChrRgPartitioner.class);
        halvadeJob.setSortComparatorClass(ChrRgSortComparator.class);
        halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class);
        halvadeJob.setOutputValueClass(VariantContextWritable.class);
    }

    if (halvadeOpts.justAlign)
        halvadeJob.setNumReduceTasks(0);
    else if (halvadeOpts.mergeBam) {
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class);
        halvadeJob.setNumReduceTasks(1);
    } else
        halvadeJob.setNumReduceTasks(halvadeOpts.reduces);

    if (halvadeOpts.useBamInput) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class);
        halvadeJob.setInputFormatClass(BAMInputFormat.class);
    }

    return runTimedJob(halvadeJob, "Halvade Job");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Configuration combineConf = getConf();
    if (!halvadeOpts.out.endsWith("/"))
        halvadeOpts.out += "/";
    HalvadeConf.setInputDir(combineConf, halvadeOutDir);
    HalvadeConf.setOutDir(combineConf, mergeOutDir);
    FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf);
    if (outFs.exists(new Path(mergeOutDir))) {
        Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);/*from  w  w  w .  jav  a 2 s .  co  m*/
    }
    HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
    HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false,
            halvadeOpts.useBamInput);
    Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF");
    combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);

    addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf");
    FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir));

    combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);
    combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class);
    combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);
    combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class);
    combineJob.setNumReduceTasks(1);
    combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class);
    combineJob.setOutputKeyClass(Text.class);
    combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);

    return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job");
}