List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:basic.PartitionGraph.java
License:Apache License
/** * Runs this tool./* ww w . j a v a 2 s .c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions") .create(NUM_PARTITIONS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(NUM_PARTITIONS)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inPath = cmdline.getOptionValue(INPUT); String outPath = cmdline.getOptionValue(OUTPUT); int nodeCount = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int numParts = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS)); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: " + PartitionGraph.class.getSimpleName()); LOG.info(" - input dir: " + inPath); LOG.info(" - output dir: " + outPath); LOG.info(" - num partitions: " + numParts); LOG.info(" - node cnt: " + nodeCount); LOG.info(" - use range partitioner: " + useRange); Configuration conf = getConf(); conf.setInt("NodeCount", nodeCount); Job job = Job.getInstance(conf); job.setJobName(PartitionGraph.class.getSimpleName() + ":" + inPath); job.setJarByClass(PartitionGraph.class); job.setNumReduceTasks(numParts); FileInputFormat.setInputPaths(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useRange) { job.setPartitionerClass(RangePartitioner.class); } FileSystem.get(conf).delete(new Path(outPath), true); job.waitForCompletion(true); return 0; }
From source file:bb.BranchAndBound.java
License:Apache License
static Job getJob(String input, String output, String dataDir, int iteration) throws Exception { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); FileStatus[] fileStatus = hdfs.listStatus(new Path(input)); for (int i = 0; i < fileStatus.length; ++i) { if (fileStatus[i].getLen() == 0) { hdfs.delete(fileStatus[i].getPath()); }/*from w w w. j a v a2 s .co m*/ } DistributedCache.addCacheFile(new URI(dataDir + "/data"), conf); Job ret = new Job(conf, dataDir + "_iteration_" + iteration); ret.setJarByClass(BranchAndBound.class); ret.setMapperClass(BBMapper1.class); ret.setReducerClass(BBReducer.class); //ret.setReducerClass(MergeReducer.class); FileInputFormat.setInputPaths(ret, new Path(input)); //if( iteration > 7 ) FileInputFormat.setMinInputSplitSize(ret, 67108864); FileOutputFormat.setOutputPath(ret, new Path(output)); ret.setOutputKeyClass(NullWritable.class); ret.setOutputValueClass(Text.class); return ret; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir, String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { int prefixSize = opt.prefixLength; System.out/*from ww w . j a v a 2s . c o m*/ .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir); Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class, AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class, IntWritable.class, TextOutputFormat.class); job.setJobName(info); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); if (nrLines != -1) { conf.setLong(NUMBER_OF_LINES_KEY, nrLines); } if (cacheFile != null) { addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf); } runJob(job, info); if (prefixSize <= i && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) { return false; } if (prefixSize < i) { System.out.println( "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize); } return true; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startCreatePrefixGroups(FIMOptions opt, int phase) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Path path = new Path(opt.outputDir + separator + "tg" + phase); for (FileStatus status : path.getFileSystem(new Configuration()).listStatus(path)) { String cacheFile = status.getPath().toString(); String trieGroupCount = cacheFile.substring(cacheFile.lastIndexOf('/'), cacheFile.length()); trieGroupCount = trieGroupCount.split("-")[1]; String outputFile = opt.outputDir + separator + "pg-trieGroup" + trieGroupCount; System.out.println("[CreatePrefixGroups]: input: " + opt.inputFile + ", output: " + opt.outputDir + ", cache: " + cacheFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ComputeTidListReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Create Prefix Groups"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1);//from w ww . j av a 2 s . c om Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); conf.setInt(PREFIX_LENGTH_KEY, phase); addCacheFile(new URI(cacheFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Prefix Creation"); } }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private void startMining(FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { String inputFilesDir = opt.outputDir + separator + "pg" + separator; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1);//w ww. j a va 2s . c o m Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one * based on ascending frequency and divides the singletons among available mappers. * /*from w w w . j a v a 2s . com*/ * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution). * * @param outputFile * @param opt * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ private void readHorizontalDb(String outputFile, FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class, IntWritable.class, Writable.class, TextOutputFormat.class); job.setJobName("Read Singletons"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class, IntMatrixWritable.class); runJob(job, "Item Reading"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs. * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin, * Lowest-Frequency, ...//from www . j a v a 2 s . c om * * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution). * * @param inputDir * @param outputDir * @param opt * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFile = inputDir + separator + OSingletonsDistribution + rExt; String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt; String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt; System.out.println("[PrefixComputation]: input: " + inputFile); Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class, PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Compute Prefixes"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration()); addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Partition Prefixes"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of * closed sets. All information is reported to the reducer which finally writes the output to disk. * /* w w w .j a v a 2s .c o m*/ * * @param inputDir * @param config * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startMining(String inputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFilesDir = inputDir; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } if (inputPaths.isEmpty()) { System.out.println("[StartMining]: No prefixes to extend further"); return; } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
@SuppressWarnings("rawtypes") public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration()); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from ww w . j a v a 2 s. c o m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);/*from www . ja v a2 s . co m*/ Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }