List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexJob.java
License:Apache License
private void setupReduceOutput(Path output, short sharding) throws IOException { job.setReducerClass(InvertedIndexReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setNumReduceTasks(sharding);//from w ww.ja v a 2 s . co m FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.OUTPUT_PATH, output.toString()); deletePath(job.getConfiguration(), output); }
From source file:com.kylinolap.job.hadoop.invertedindex.RandomKeyDistributionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from www.ja v a 2 s .c o m*/ options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_KEY_CLASS); options.addOption(OPTION_REGION_MB); parseOptions(options, args); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); job.setJarByClass(this.getClass()); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); String keyClass = getOptionValue(OPTION_KEY_CLASS); Class<?> keyClz = Class.forName(keyClass); int regionMB = Integer.parseInt(getOptionValue(OPTION_REGION_MB)); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RandomKeyDistributionMapper.class); job.setMapOutputKeyClass(keyClz); job.setMapOutputValueClass(NullWritable.class); // Reducer - only one job.setReducerClass(RandomKeyDistributionReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(keyClz); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); this.deletePath(job.getConfiguration(), output); // total map input MB double totalMapInputMB = this.getTotalMapInputMB(); int regionCount = Math.max(1, (int) (totalMapInputMB / regionMB)); int mapSampleNumber = 1000; System.out.println("Total Map Input MB: " + totalMapInputMB); System.out.println("Region Count: " + regionCount); // set job configuration job.getConfiguration().set(BatchConstants.MAPPER_SAMPLE_NUMBER, String.valueOf(mapSampleNumber)); job.getConfiguration().set(BatchConstants.REGION_NUMBER, String.valueOf(regionCount)); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.laizuozuoba.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0"); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from www . j av a2 s .c om } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); Job job2 = new Job(conf, "uv"); job2.setJarByClass(WordCount.class); job2.setMapperClass(UVMapper.class); job2.setCombinerClass(UVReducer.class); job2.setReducerClass(UVReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2")); ControlledJob controlledJob = new ControlledJob(job.getConfiguration()); ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration()); controlledJob2.addDependingJob(controlledJob); JobControl jc = new JobControl("123"); jc.addJob(controlledJob); jc.addJob(controlledJob2); Thread jcThread = new Thread(jc); jcThread.start(); while (true) { if (jc.allFinished()) { System.out.println(jc.getSuccessfulJobList()); jc.stop(); break; } if (jc.getFailedJobList().size() > 0) { System.out.println(jc.getFailedJobList()); jc.stop(); break; } Thread.sleep(1000); } System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!"); }
From source file:com.lakhani.anchorgraph.anchorgraph.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf); conf.set("numberCentroids", args[3]); conf.set("numberFeatures", args[4]); Job job = new Job(conf, "anchorgraph"); job.addCacheFile(new URI(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setJarByClass(anchorgraph.class); job.submit();/* ww w .j a v a 2 s .c o m*/ int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.testCache.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "testCache"); job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(testCache.class); job.submit();//from www. j av a 2s .c om int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.wordcount.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setJarByClass(wordcount.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from ww w . ja va 2 s .co m }
From source file:com.leon.hadoop.loganalyse.DistributedGrep.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2);//w ww . j av a 2s. c o m } @SuppressWarnings("deprecation") Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.leon.hadoop.loganalyse.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.BlockHasher.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: BlockHasher <imageID> <image> <output>"); return 2; }/*from w ww . ja v a 2s.c o m*/ final String imageID = args[0]; final String image = args[1]; final String output = args[2]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, image, "BlockHasher", conf); job.setJarByClass(BlockHasher.class); job.setMapperClass(BlockHashMapper.class); // job.setReducerClass(Reducer.class); job.setNumReduceTasks(0); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(MD5Hash.class); FileOutputFormat.setOutputPath(job, new Path(output)); conf.setInt("mapred.job.reuse.jvm.num.tasks", -1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.FolderCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: FolderCount <table> <outpath>"); System.exit(2);//from w w w . j a v a 2 s .com } final Job job = new Job(conf, "FolderCount"); job.setJarByClass(FolderCount.class); job.setMapperClass(FolderCountMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); final Scan scan = new Scan(); scan.addFamily(HBaseTables.ENTRIES_COLFAM_B); job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }