List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:bb.BranchAndBound.java
License:Apache License
public static void main(String[] args) throws Exception { /*Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) {//from www . j ava 2 s . c o m System.err.println("Usage: branchandbound <input> <output>"); System.exit(2); } Job job = new Job(conf, "branch and bound"); job.setJarByClass(BranchAndBound.class); job.setMapperClass(BBMapper.class); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);*/ int n; String[] inputargs = new GenericOptionsParser(new Configuration(), args).getRemainingArgs(); if (inputargs.length != 2) { System.err.println("Usage: branchandbound <data directory> <n>"); System.exit(2); } n = Integer.parseInt(inputargs[1]); String dataDir = inputargs[0]; String prev_output = dataDir + "/input"; /* for( int i = 1 ; i <= n ; i++ ) { for( int j = 0 ; j < 2 ; j++ ) { String input = prev_output ; String output = inputargs[1] + "/iteration" + i + "_" + j ; Job job = getJob(input, output, i, j) ; job.waitForCompletion(true) ; // if failed ???? prev_output = output; } } */ //prev_output = dataDir + "/output" + "/iteration" + 17; long totalNodes = 0; long searchedNodes = 0; long cutbyDEE = 0; int mapTotal = 768; for (int i = 0; i <= n; i++) { iterRound = i; String input = prev_output; String output = dataDir + "/output" + "/iteration" + i; Job job = getJob(input, output, dataDir, i); if (i == n) { numReduceTasks = 1; } //job.setNumMapTasks(200); if (numOutput > mapTotal) { FileInputFormat.setMaxInputSplitSize(job, 10 * (8 * n + 10) + numOutput * (8 * n + 10) / 3000); FileInputFormat.setMinInputSplitSize(job, Math.max((8 * n + 10), numOutput * (8 * n + 10) / 5000)); } else { FileInputFormat.setMaxInputSplitSize(job, (8 * n + 10)); } /* if( i == 0 ) { job.setNumReduceTasks(1); } else { job.setNumReduceTasks(0); } */ job.setNumReduceTasks(0); job.waitForCompletion(true); // if failed ???? prev_output = output; Counters counters = job.getCounters(); Counter counter = counters.findCounter("MyCounter", "Map Output Counter"); numOutput = counter.getValue(); totalNodes += numOutput; cutbyDEE += counters.findCounter("MyCounter", "Cut By DEE").getValue(); searchedNodes += totalNodes + cutbyDEE + counters.findCounter("MyCounter", "Cut By Bound").getValue(); System.out.println(numOutput + " " + (8 * n + 10) + " " + (numOutput * (8 * n + 10) / 768)); } System.out.println("searchedNodes " + searchedNodes); System.out.println(totalNodes); System.out.println("cut by dee " + cutbyDEE); }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir, String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { int prefixSize = opt.prefixLength; System.out/* w w w.java 2s . c om*/ .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir); Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class, AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class, IntWritable.class, TextOutputFormat.class); job.setJobName(info); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); if (nrLines != -1) { conf.setLong(NUMBER_OF_LINES_KEY, nrLines); } if (cacheFile != null) { addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf); } runJob(job, info); if (prefixSize <= i && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) { return false; } if (prefixSize < i) { System.out.println( "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize); } return true; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startCreatePrefixGroups(FIMOptions opt, int phase) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Path path = new Path(opt.outputDir + separator + "tg" + phase); for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) { String cacheFile = status.getPath().toString(); String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length()); trieGroupCount = trieGroupCount.split("-")[1]; String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount; System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir + ", cache: " + cacheFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Create Prefix Groups"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); conf.setInt(PREFIX_LENGTH_KEY, phase); addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Prefix Creation"); }//from w ww. j a v a 2 s . co m }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { String inputFilesDir = opt.outputDir + separator + "pg" + separator; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt);//from www . ja v a2s. co m List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one * based on ascending frequency and divides the singletons among available mappers. * // w ww . j a v a 2s .co m * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution). * * @param outputFile * @param opt * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ private void readHorizontalDb(String outputFile, FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class, IntWritable.class, Writable.class, TextOutputFormat.class); job.setJobName("Read Singletons"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class, IntMatrixWritable.class); runJob(job, "Item Reading"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs. * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin, * Lowest-Frequency, .../* w w w.j ava 2 s .c o m*/ * * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution). * * @param inputDir * @param outputDir * @param opt * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFile = inputDir + separator + OSingletonsDistribution + rExt; String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt; String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt; System.out.println("[PrefixComputation]: input: " + inputFile); Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class, PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Compute Prefixes"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration()); addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Partition Prefixes"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of * closed sets. All information is reported to the reducer which finally writes the output to disk. * //from ww w . ja v a 2 s. c o m * * @param inputDir * @param config * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startMining(String inputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFilesDir = inputDir; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } if (inputPaths.isEmpty()) { System.out.println("[StartMining]: No prefixes to extend further"); return; } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);// ww w . java 2s .c om Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String pipeline = ""; if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { HalvadeConf.setIsPass2(halvadeConf, true); HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = RNA_PASS2;/*from w w w . j a va2 s . c o m*/ } else if (jobType == HalvadeResourceManager.DNA) { HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = DNA; } HalvadeConf.setOutDir(halvadeConf, tmpOutDir); FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf); if (outFs.exists(new Path(tmpOutDir))) { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } if (halvadeOpts.useBamInput) setHeaderFile(halvadeOpts.in, halvadeConf); Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline); halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob); FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir)); if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class); } else if (jobType == HalvadeResourceManager.DNA) { halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class); } halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class); halvadeJob.setMapOutputValueClass(SAMRecordWritable.class); halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class); halvadeJob.setOutputKeyClass(Text.class); if (halvadeOpts.mergeBam) { halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class); halvadeJob.setOutputValueClass(SAMRecordWritable.class); } else { halvadeJob.setPartitionerClass(ChrRgPartitioner.class); halvadeJob.setSortComparatorClass(ChrRgSortComparator.class); halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class); halvadeJob.setOutputValueClass(VariantContextWritable.class); } if (halvadeOpts.justAlign) halvadeJob.setNumReduceTasks(0); else if (halvadeOpts.mergeBam) { halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class); halvadeJob.setNumReduceTasks(1); } else halvadeJob.setNumReduceTasks(halvadeOpts.reduces); if (halvadeOpts.useBamInput) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class); halvadeJob.setInputFormatClass(BAMInputFormat.class); } return runTimedJob(halvadeJob, "Halvade Job"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if (!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2);/*from w w w . jav a 2 s . co m*/ } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }