List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName) throws Exception { Job job = new Job(conf); job.setJobName(jobName + " " + crawlPath.toString()); job.setJarByClass(Merge.class); // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); FileSystem fs = FileSystem.get(conf); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }/* w w w. j a v a 2s .c o m*/ if (fs.exists(newdb)) { fs.delete(newdb); } for (Path mergePath : mergePaths) { FileInputFormat.addInputPath(job, mergePath); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java
public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception { Path segmentPath = new Path(crawlPath, "segments/" + segmentName); Path generatePath = new Path(segmentPath, "generate"); Job job = new Job(conf); job.setJobName("fetch " + crawlPath.toString()); job.setJarByClass(Fetcher.class); job.setReducerClass(FetcherReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(FetcherOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, generatePath); FileOutputFormat.setOutputPath(job, segmentPath); job.waitForCompletion(true);/*ww w . j av a2 s .c o m*/ }
From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Merge.java
public static Job createJob(Configuration conf, Path crawldb) throws IOException { Job job = new Job(conf); //job.setJarByClass(Merge.class); job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path newdb = new Path(crawldb, "new"); Path currentdb = new Path(crawldb, "current"); FileSystem fs = crawldb.getFileSystem(CrawlerConfiguration.create()); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }/*from w w w. ja va2s . c om*/ if (fs.exists(newdb)) { fs.delete(newdb); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job; }
From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java
License:Apache License
/** Run a map/reduce job to compute Pi. */ private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf, PrintStream out) throws IOException { final String name = startDigit + "_" + nDigits; // setup wroking directory out.println("Working Directory = " + workingDir); out.println();/*from w w w . java2 s . c om*/ // final FileSystem fs = FileSystem.get(conf);// ? final FileSystem fs = new Path(workingDir, "part-r-00000").getFileSystem(conf);// ? final Path dir = fs.makeQualified(new Path(workingDir)); if (fs.exists(dir)) { throw new IOException("Working directory " + dir + " already exists. Please remove it first."); } else if (!fs.mkdirs(dir)) { throw new IOException("Cannot create working directory " + dir); } out.println("Start Digit = " + startDigit); out.println("Number of Digits = " + nDigits); out.println("Number of Maps = " + nMaps); // setup a job final Job job = createJob(name, conf); final Path hexfile = new Path(dir, "pi_" + name + ".hex"); FileOutputFormat.setOutputPath(job, new Path(dir, "out")); // setup custom properties job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString()); job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString()); job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit); job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits); job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps); // start a map/reduce job out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; out.println("Duration is " + duration + " seconds."); } out.println("Output file: " + hexfile); }
From source file:cn.jpush.hdfs.mr.example.WordMedian.java
License:Apache License
public int run(String[] args) throws Exception { long random = new Random().nextLong(); log.info("random -> " + random); args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random) }; setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:cn.lhfei.hadoop.ch02.MaxTemperature.java
License:Apache License
public static void main(String[] args) { log.debug("Logging ... "); if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1);/*from w w w . jav a2s . c om*/ } try { Job job = new Job(); job.setJarByClass(MaxTemperature.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); /*FileInputFormat.addInputPath(job, new Path(INPUT)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT));*/ job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IllegalStateException e) { log.error(e.getMessage(), e); } catch (IllegalArgumentException e) { log.error(e.getMessage(), e); } catch (ClassNotFoundException e) { log.error(e.getMessage(), e); } catch (IOException e) { log.error(e.getMessage(), e); } catch (InterruptedException e) { log.error(e.getMessage(), e); } }
From source file:cn.lhfei.hadoop.ch04.MaxTemperatureWithCompression.java
License:Apache License
public static void main(String[] args) { if (args.length != 2) { System.err.println("Usage: MaxTemperatureWithCompression <input path> " + "<output path>"); System.exit(-1);/*from w w w . j av a2 s. c o m*/ } try { Job job = new Job(); job.setJarByClass(MaxTemperatureWithCompression.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:cn.lhfei.hadoop.ch05.v2.MaxTemperatureDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w w w .j a v a2 s. com*/ Job job = new Job(getConf(), "Max temperature"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setCombinerClass(MaxTemperatureReducer.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); FileOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }/*from w w w.j a v a 2 s . c o m*/ Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); FileOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:co.cask.cdap.internal.app.runtime.batch.WordCount.java
License:Apache License
public static void configureJob(Job job, String inputPath, String outputPath) throws IOException { job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); }