List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the first MapReduce cycle. First, the transaction file is partitioned into a number of chunks that is given * to different mappers. Each mapper reads a chunk and return the items together with their partial tid-lists. The * reducer attaches the partial tid-lists to each other, then discards the infrequent ones and sorts the frequent one * based on ascending frequency and divides the singletons among available mappers. * /*from w w w.j av a 2 s . c o m*/ * This method generates three files, the frequent singletons (OSingletonsTids), the order file for singletons based * on ascending frequency (OSingletonsOrder) and the singletons distribution file (OSingletonsDistribution). * * @param outputFile * @param opt * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ private void readHorizontalDb(String outputFile, FIMOptions opt) throws IOException, ClassNotFoundException, InterruptedException { System.out.println("[ItemReading]: input: " + opt.inputFile + ", output: " + outputFile); Job job = prepareJob(new Path(opt.inputFile), new Path(outputFile), SplitByKTextInputFormat.class, ComputeTidListMapper.class, Text.class, IntArrayWritable.class, ItemReaderReducer.class, IntWritable.class, Writable.class, TextOutputFormat.class); job.setJobName("Read Singletons"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addNamedOutput(job, OSingletonsDistribution, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsOrder, TextOutputFormat.class, Text.class, Text.class); addNamedOutput(job, OSingletonsTids, SequenceFileOutputFormat.class, IntWritable.class, IntMatrixWritable.class); runJob(job, "Item Reading"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the second MapReduce cycle. Each mapper gets a list of singletons from which it should start building X-FIs. * Each mapper uses Eclat to quickly compute the list of X-FIs. The total set of X-FIs is again obtained by the * reducer, which then gets divided into independent sets. All sets that have been computed from level 1 to X are * already reported. The distribution of seeds is obtained by some allocation scheme, e.g., Round-Robin, * Lowest-Frequency, .../*from ww w . j a v a 2 s . co m*/ * * This method generates three files, the frequent itemsets from level 1 to X (OFises), the prefix groups * (OPrefixGroups) and the prefix distribution file (OPrefixDistribution). * * @param inputDir * @param outputDir * @param opt * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startPrefixComputation(String inputDir, String outputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFile = inputDir + separator + OSingletonsDistribution + rExt; String singletonsOrderFile = inputDir + separator + OSingletonsOrder + rExt; String singletonsTidsFile = inputDir + separator + OSingletonsTids + rExt; System.out.println("[PrefixComputation]: input: " + inputFile); Job job = prepareJob(new Path(inputFile), new Path(outputDir), NLineInputFormat.class, PrefixComputerMapper.class, Text.class, IntMatrixWritable.class, PrefixComputerReducer.class, IntArrayWritable.class, IntMatrixWritable.class, SequenceFileOutputFormat.class); job.setJobName("Compute Prefixes"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); addCacheFile(new URI(singletonsOrderFile.replace(" ", "%20")), job.getConfiguration()); addCacheFile(new URI(singletonsTidsFile.replace(" ", "%20")), job.getConfiguration()); runJob(job, "Partition Prefixes"); }
From source file:be.uantwerpen.adrem.disteclat.DistEclatDriver.java
License:Apache License
/** * Starts the third MapReduce cycle. Each mapper reads the prefix groups assigned to it and computes the collection of * closed sets. All information is reported to the reducer which finally writes the output to disk. * /*from w w w.j a v a 2 s.c o m*/ * * @param inputDir * @param config * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws URISyntaxException */ private void startMining(String inputDir, FIMOptions opt) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { String inputFilesDir = inputDir; String outputFile = opt.outputDir + separator + OFis; System.out.println("[StartMining]: input: " + inputFilesDir + ", output: " + outputFile); Job job = prepareJob(new Path(inputFilesDir), new Path(outputFile), NoSplitSequenceFileInputFormat.class, EclatMinerMapper.class, Text.class, Text.class, EclatMinerReducer.class, Text.class, Text.class, TextOutputFormat.class); job.setJobName("Start Mining"); job.setJarByClass(DistEclatDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); List<Path> inputPaths = new ArrayList<Path>(); FileStatus[] listStatus = FileSystem.get(conf).globStatus(new Path(inputFilesDir + "bucket*")); for (FileStatus fstat : listStatus) { inputPaths.add(fstat.getPath()); } if (inputPaths.isEmpty()) { System.out.println("[StartMining]: No prefixes to extend further"); return; } setInputPaths(job, inputPaths.toArray(new Path[inputPaths.size()])); runJob(job, "Mining"); }
From source file:biglayer.AutoCoder.java
License:Apache License
/** * Runs this tool.//from w ww . j a va 2 s.com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/MNIST-mingled-key/part*"; String outputPath = "shangfu/layeroutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); job0.setNumReduceTasks(reduceTasks); job0.getConfiguration().setInt("layer_ind", 0); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(ModelNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(ModelNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "_0"); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); long codeStart = System.currentTimeMillis(); double codeTimeSum = 0; job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) { Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); job1.setNumReduceTasks(reduceTasks); job1.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1))); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train")); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + (iterations - 1)); LOG.info(" - output path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - number of reducers: " + reduceTasks); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(ModelNode.class); job1.setOutputKeyClass(PairOfInts.class); job1.setOutputValueClass(ModelNode.class); job1.setMapperClass(MyMapper.class); job1.setReducerClass(MyReducer_Train.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations + "_train"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Job job2 = Job.getInstance(conf); job2.setJobName(AutoCoder.class.getSimpleName()); job2.setJarByClass(AutoCoder.class); job2.setNumReduceTasks(reduceTasks); job2.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train"))); FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations)); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - output path: " + outputPath + "_" + iterations); LOG.info(" - number of reducers: " + reduceTasks); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(SequenceFileOutputFormat.class); job2.setMapOutputKeyClass(PairOfInts.class); job2.setMapOutputValueClass(ModelNode.class); job2.setOutputKeyClass(PairOfInts.class); job2.setOutputValueClass(ModelNode.class); job2.setMapperClass(MyMapper.class); job2.setReducerClass(MyReducer_GenData.class); job2.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job2.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; } LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER); LOG.info(" - output path: " + outputPath); reduceTasks = 1; LOG.info(" - number of reducers: " + reduceTasks); Job job_super = Job.getInstance(conf); job_super.setJobName(AutoCoder.class.getSimpleName()); job_super.setJarByClass(AutoCoder.class); job_super.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER)); FileOutputFormat.setOutputPath(job_super, new Path(outputPath)); job_super.setInputFormatClass(SequenceFileInputFormat.class); job_super.setOutputFormatClass(SequenceFileOutputFormat.class); job_super.setMapOutputKeyClass(PairOfInts.class); job_super.setMapOutputValueClass(ModelNode.class); job_super.setOutputKeyClass(NullWritable.class); job_super.setOutputValueClass(NullWritable.class); job_super.setMapperClass(MyMapper_Super.class); job_super.setReducerClass(MyReducer_Super.class); job_super.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job_super.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + codeTimeSum + " seconds."); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:bigmodel.AutoCoderLocal.java
License:Apache License
/** * Runs this tool.//from www . j a va 2 s . co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT) + "/part-r-00000"; String outputPath = cmdline.getOptionValue(OUTPUT); String dataPath = cmdline.getOptionValue(INPUT) + "/common"; //String inputPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/part*"; //String outputPath = "output"; //String dataPath = "/home/qiwang321/mapreduce-data/data/in-mingled1-5/common"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoderLocal.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); initialParameters(conf); conf.set("dataPath", dataPath); Job job = Job.getInstance(conf); job.setJobName(AutoCoderLocal.class.getSimpleName()); job.setJarByClass(AutoCoderLocal.class); // set the path of the information of k clusters in this iteration job.getConfiguration().set("sidepath", inputPath + "/side_output"); job.setNumReduceTasks(reduceTasks); dataShuffle(); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ModelNode.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SuperModel.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:bigsidemodel.AutoCoder.java
License:Apache License
/** * Runs this tool./*from ww w .ja va 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/best5-mingled-key-56x56/part*"; String outputPath = "shangfu/bigoutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath + "0"); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job0.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(DataNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(DataNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "0"); FileSystem.get(getConf()).delete(outputDir, true); long codeStart = System.currentTimeMillis(); double jobTimeSum = 0; long startTime = System.currentTimeMillis(); job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; //======= Job 1 LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "0"); LOG.info(" - output path: " + outputPath + "1"); LOG.info(" - number of reducers: " + 1); int nModel = reduceTasks; reduceTasks = 1; Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job1.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job1, new Path(outputPath + "0")); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1")); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(DataNode.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(NullWritable.class); job1.setMapperClass(MyMapper1.class); job1.setReducerClass(MyReducer1.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "1"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + jobTimeSum + " seconds."); return 0; }
From source file:boostingPL.driver.AdaBoostPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }// w w w . ja v a 2 s.c o m @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(AdaBoostPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "AdaBoost"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:boostingPL.driver.SAMMEPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }//from w ww .j ava2s . co m @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(SAMMEPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); FileSystem fs = modelPath.getFileSystem(getConf()); if (fs.exists(modelPath)) { fs.delete(modelPath, true); } job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "SAMME"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java
License:Apache License
/** * Runs this tool.//w w w. j av a2s. c o m */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + PerfectX.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(PerfectX.class.getSimpleName()); job.setJarByClass(PerfectX.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.WordCount.java
License:Apache License
/** * Runs this tool./*from www. j a v a 2 s .c om*/ */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + WordCount.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(WordCount.class.getSimpleName()); job.setJarByClass(WordCount.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }