List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:hu.sztaki.ilab.bigdata.common.tools.hbase.PerformanceEvaluation.java
License:Apache License
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = new Job(this.conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1);//from www . j a v a 2s . c om job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir, "outputs")); job.waitForCompletion(true); }
From source file:info.halo9pan.word2vec.hadoop.mr.WordSort.java
License:Apache License
public int run(String[] args) throws Exception { logger.info("starting"); Job job = Job.getInstance(getConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); boolean useSimplePartitioner = getUseSimplePartitioner(job); SortInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("WordSort"); job.setJarByClass(WordSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(SortOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else {/*www . j a va 2 s.c o m*/ long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, SortInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + SortInputFormat.PARTITION_FILENAME); try { SortInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { logger.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); SortOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; logger.info("done"); return ret; }
From source file:info.halo9pan.word2vec.hadoop.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments//from w w w . j a v a 2s . co m */ public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:info.halo9pan.word2vec.hadoop.terasort.TeraValidate.java
License:Apache License
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage();/*from w ww . j a v a 2 s . c o m*/ return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:io.aos.mapreduce.count.WordCountTool.java
License:Apache License
public int run(String[] args) throws Exception { if (!((args.length > 0) && (args.length < 3))) { System.out.println("WordCount <inDir> <outDir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w ww . j a v a 2 s. co m Path inPath = new Path(args[0]); Path outPath = new Path(args[1]); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName("WordCount_" + inPath.getName()); job.setJar("./target/datalayer-hadoop-mapreduce-1.0.0-SNAPSHOT.jar"); // job.setJarByClass(WordCountTool.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, inPath); FileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(TextOutputFormat.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:io.aos.mapreduce.grep.GrepTool.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); org.apache.hadoop.util.Tool t; return 2; }/*from www.j a v a 2 s .co m*/ Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = getConf(); conf.set(RegexMapper.PATTERN, args[2]); if (args.length == 4) { conf.set(RegexMapper.GROUP, args[3]); } try { Job greJob = Job.getInstance(conf); greJob.setJobName("GrepSearch"); FileInputFormat.setInputPaths(greJob, args[0]); greJob.setMapperClass(RegexMapper.class); greJob.setCombinerClass(LongSumReducer.class); greJob.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(greJob, tempDir); greJob.setOutputFormatClass(SequenceFileOutputFormat.class); greJob.setOutputKeyClass(Text.class); greJob.setOutputValueClass(LongWritable.class); greJob.waitForCompletion(true); Job sortJob = Job.getInstance(conf); sortJob.setJobName("GrepSort"); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormatClass(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); // Write a single file sortJob.setNumReduceTasks(1); FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob.waitForCompletion(true); } catch (Exception e) { return 2; } finally { FileSystem.get(conf).delete(tempDir, true); } return 0; }
From source file:io.aos.t4f.hadoop.mapreduce.WordCountMapReduceTest2.java
License:Apache License
public static int main(String... args) throws Exception { // Get the default configuration object Configuration conf = new Configuration(); // Add resources conf.addResource("hdfs-default.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("mapred-default.xml"); conf.addResource("mapred-site.xml"); Job job = new Job(conf); job.setJobName("WordCount"); List<String> other_args = parseArguments(args, job); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); job.setMapperClass(MapClass.class); job.setCombinerClass(ReduceClass.class); job.setReducerClass(ReduceClass.class); // Set the input format class job.setInputFormatClass(TextInputFormat.class); // Set the output format class job.setOutputFormatClass(TextOutputFormat.class); // Set the input path TextInputFormat.setInputPaths(job, other_args.get(0)); // Set the output path TextOutputFormat.setOutputPath(job, new Path(other_args.get(1))); /*/* w ww .j a v a 2 s .c o m*/ * Set the minimum and maximum split sizes This parameter helps to * specify the number of map tasks. For each input split, there will be * a separate map task. In this example each split is of size 32 MB */ TextInputFormat.setMinInputSplitSize(job, 32 * MEGABYTES); TextInputFormat.setMaxInputSplitSize(job, 32 * MEGABYTES); // Set the jar file to run job.setJarByClass(WordCountMapReduceTest2.class); // Submit the job Date startTime = new Date(); System.out.println("Job started: " + startTime); int exitCode = job.waitForCompletion(true) ? 0 : 1; if (exitCode == 0) { Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); } else { System.out.println("Job Failed!!!"); } return exitCode; }
From source file:io.aos.t4f.hadoop.mapreduce.WordCountMapReduceTest3.java
License:Apache License
public void testMapReduce() throws Exception { String inputPath = "/docs/ChangesFancyStyle.css"; String outputPath = "/out"; Configuration configuration = new Configuration(); configuration.set("fs.default.name", "hdfs://ppc006:54310"); configuration.set("mapred.job.tracker", "ppc006:54311"); configuration.set("mapred.job.tracker", "local"); configuration.set("fs.default.name", "local"); DistributedCache.addArchiveToClassPath(new Path("/jar/t4f-nosql-hadoop-1.0-SNAPSHOT.jar"), configuration); Job job = new Job(configuration); // job.setJarByClass(Driver.class); job.setJobName("TestJob"); job.setMapperClass(MapClass.class); job.setReducerClass(ReduceClass.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true);//from w w w. j a v a2 s . c o m }
From source file:io.bfscan.clueweb12.BuildDictionary.java
License:Apache License
/** * Runs this tool.//from w ww . ja v a 2 s .c o m */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(COUNT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + ComputeTermStatistics.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); Configuration conf = getConf(); conf.set(HADOOP_OUTPUT_OPTION, output); conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION))); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.map.java.opts", "-Xmx4096m"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m"); Job job = Job.getInstance(conf); job.setJobName(BuildDictionary.class.getSimpleName() + ":" + input); job.setJarByClass(BuildDictionary.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfIntLong.class); job.setOutputKeyClass(Text.class); job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class); job.setMapperClass(Mapper.class); job.setReducerClass(MyReducer.class); FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:io.bfscan.clueweb12.BuildPForDocVectors.java
License:Apache License
/** * Runs this tool.// ww w .j a v a 2s. c om */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(REDUCERS_OPTION)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String preprocessing = cmdline.getOptionValue(PREPROCESSING); Job job = Job.getInstance(getConf()); job.setJobName(BuildPForDocVectors.class.getSimpleName() + ":" + input); job.setJarByClass(BuildPForDocVectors.class); LOG.info("Tool name: " + BuildPForDocVectors.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - preprocessing: " + preprocessing); if (cmdline.hasOption(REDUCERS_OPTION)) { int numReducers = Integer.parseInt(cmdline.getOptionValue(REDUCERS_OPTION)); LOG.info(" - reducers: " + numReducers); job.setNumReduceTasks(numReducers); } else { job.setNumReduceTasks(0); } FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); job.getConfiguration().set(DICTIONARY_OPTION, dictionary); job.getConfiguration().set(PREPROCESSING, preprocessing); job.setInputFormatClass(ClueWeb12InputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntArrayWritable.class); job.setMapperClass(MyMapper.class); FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }