List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from w ww.j a va 2 s. co m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder .withArgName("en|sv|nl|de|fr|ru|it|es|vi|pl|ja|pt|zh|uk|ca|fa|no|fi|id|ar|sr|ko|hi|zh_yue|cs|tr") .hasArg().withDescription("two-letter or six-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (!(language.length() == 2 || language.length() == 6)) { // Added length check for 6 to include languages like zh_yue System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikipediaDocnoMappingBuilder.class); job.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); job.getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(tmpPath)); FileOutputFormat.setCompressOutput(job, false); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); job.waitForCompletion(true); long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-r-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; }
From source file:edu.umd.cloud9.example.pagerank.BasicMonteCarloPPR.java
License:Apache License
private long phase1(int iterNum, String basePath, String inputFile) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "Phase 1: Initialize the " + iterNum + " random walks"); job.setJarByClass(BasicMonteCarloPPR.class); String in = basePath + "/input/" + inputFile; String out = basePath + "/tmp/iter0"; int reduceNo = iterNum; job.getConfiguration().setInt("RandomWalks", iterNum); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); job.setNumReduceTasks(reduceNo);/* www.java 2 s . c om*/ FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FingerPrint.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(FingerPrint.class); job.setMapperClass(RandomWalkInitMapClass.class); job.setReducerClass(RandomWalkReduceClass.class); job.waitForCompletion(true); Counters counters = job.getCounters(); return counters.findCounter(COUNTERS.TOTAL_RW).getValue(); }
From source file:edu.umd.cloud9.example.pagerank.BasicMonteCarloPPR.java
License:Apache License
private long phase2(int reduceNo, int iterCnt, String basePath, float jumpFactor) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(getConf(), "Phase 2: Processing the " + (iterCnt + 1) + "-th iterations of random walk Monte Carlo approximation"); job.setJarByClass(BasicMonteCarloPPR.class); String in = basePath + "/tmp/iter" + iterCnt; String out = basePath + "/tmp/iter" + (iterCnt + 1); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); job.setNumReduceTasks(reduceNo);/*from w w w . j a va 2s . c o m*/ FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntSetWritable.class); job.setMapOutputValueClass(FingerPrint.class); job.setOutputKeyClass(IntSetWritable.class); job.setOutputValueClass(FingerPrint.class); job.setReducerClass(RandomWalkReduceClass.class); job.waitForCompletion(true); Counters counters = job.getCounters(); return counters.findCounter(COUNTERS.FINISHED_RW).getValue(); }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * @param args//from www . j ava 2 s. c om * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { GenericOptionsParser parser = new GenericOptionsParser(args); OperationsParams params = new OperationsParams(parser); if (!params.checkInputOutput()) { printUsage(); System.exit(1); } Path[] inFiles = params.getInputPaths(); Path outFile = params.getOutputPath(); long t1 = System.currentTimeMillis(); Job job = delaunay(inFiles, outFile, params); long t2 = System.currentTimeMillis(); System.out.println("Total time: " + (t2 - t1) + " millis"); if (job != null) { System.out.println("Map final sites: " + job.getCounters().findCounter(DelaunayCounters.MAP_FINAL_SITES).getValue()); System.out.println("Map non-final sites: " + job.getCounters().findCounter(DelaunayCounters.MAP_NONFINAL_SITES).getValue()); System.out.println("Reduce final sites: " + job.getCounters().findCounter(DelaunayCounters.REDUCE_FINAL_SITES).getValue()); System.out.println("Reduce non-final sites: " + job.getCounters().findCounter(DelaunayCounters.REDUCE_NONFINAL_SITES).getValue()); } }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFToText.java
License:Open Source License
/** * Performs an HDF to text operation as a MapReduce job and returns total * number of points generated.// w ww . j av a 2 s. c om * @param inPath * @param outPath * @param datasetName * @param skipFillValue * @return * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static long HDFToTextMapReduce(Path inPath, Path outPath, String datasetName, boolean skipFillValue, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(params, "HDFToText"); Configuration conf = job.getConfiguration(); job.setJarByClass(HDFToText.class); job.setJobName("HDFToText"); // Set Map function details job.setMapperClass(HDFToTextMap.class); job.setNumReduceTasks(0); // Set input information job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inPath); if (conf.get("shape") == null) conf.setClass("shape", NASAPoint.class, Shape.class); conf.set("dataset", datasetName); conf.setBoolean("skipfillvalue", skipFillValue); // Set output information job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outPath); // Run the job boolean verbose = conf.getBoolean("verbose", false); job.waitForCompletion(verbose); Counters counters = job.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java
License:Open Source License
/** * @param args//from w ww.j a v a 2 s. com * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { GenericOptionsParser parser = new GenericOptionsParser(args); OperationsParams params = new OperationsParams(parser); if (!params.checkInputOutput()) { printUsage(); System.exit(1); } Path[] inFiles = params.getInputPaths(); Path outPath = params.getOutputPath(); long t1 = System.currentTimeMillis(); Job job = closestPair(inFiles, outPath, params); long t2 = System.currentTimeMillis(); System.out.println("Total time: " + (t2 - t1) + " millis"); if (job != null) { System.out.println( "Input points: " + job.getCounters().findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue()); System.out.println("Map output points: " + job.getCounters().findCounter(Task.Counter.MAP_OUTPUT_RECORDS).getValue()); System.out.println("Reduce output points: " + job.getCounters().findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS).getValue()); } }
From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java
License:Open Source License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { GenericOptionsParser parser = new GenericOptionsParser(args); OperationsParams params = new OperationsParams(parser); if (!params.checkInputOutput()) { printUsage();/*from ww w.j av a 2s . co m*/ System.exit(1); } Path[] inFiles = params.getInputPaths(); Path outPath = params.getOutputPath(); long t1 = System.currentTimeMillis(); Job job = farthestPair(inFiles, outPath, params); long t2 = System.currentTimeMillis(); System.out.println("Total time: " + (t2 - t1) + " millis"); if (job != null) { Counter processedPairs = job.getCounters().findCounter(FarthestPairCounters.FP_ProcessedPairs); System.out.println("Processed " + processedPairs.getValue() + " pairs"); } }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
/** * A MapReduce version of KNN query./*from w ww.j a v a 2 s. c om*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNN.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();//from ww w.ja va2 s . co m System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { System.err.println("You must provide a query range"); printUsage(); System.exit(1); } final Path inPath = params.getInputPath(); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outPath, queryParams); jobs.add(job); } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.sthadoop.hdfs.KNNJoin.java
License:Open Source License
static void knnJoinMapReduce(OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { final Path[] inputPaths = params.getInputPaths(); Path outputPath = params.getOutputPath(); //final int k = params.getInt("k", 1); KNNJRecordReader.params = params;/*from w w w.ja va 2 s. co m*/ //System.out.println(params.getInputPaths().length); long t1 = System.currentTimeMillis(); // phase 1 params.set("type", "phase1"); Job job = Job.getInstance(params, "KNNJoin Phase1"); job.setJarByClass(KNNJoin.class); job.setInputFormatClass(KNNJInputFormat.class); KNNJInputFormat.setInputPaths(job, inputPaths[0], inputPaths[1]); job.setMapperClass(KNNJMap.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); MultipleOutputs.addNamedOutput(job, "phase2", TextOutputFormat3.class, Text.class, Text.class); // Submit the job if (job.waitForCompletion(true)) { LOG.info("[stat:job[0]"); } else { LOG.info("[stat:job[1]"); return; } long t2 = System.currentTimeMillis() - t1; t1 = System.currentTimeMillis(); Counters counters = job.getCounters(); long refSplits = counters.findCounter(KNNJMap.Stats.refSplits).getValue(); long qSplits = counters.findCounter(KNNJMap.Stats.qSplits).getValue(); long numRefRecs = counters.findCounter(KNNJMap.Stats.numRefRecs).getValue(); long numQRecs = counters.findCounter(KNNJMap.Stats.numQRecs).getValue(); long numP2Recs = counters.findCounter(KNNJMap.Stats.phase2Recs).getValue(); String str = String.format( "stat:counters[refSplits=%s;qSplits=%s;numRefRecs=%s;" + "numQRecs=%s;numP2Recs=%s;t1=%s]", refSplits, qSplits, numRefRecs, numQRecs, numP2Recs, t2); LOG.info(str); // LOG.info("[stat:counter:refSplits="+refSplits+"]"); // LOG.info("[stat:counter:qSplits="+qSplits+"]"); // LOG.info("[stat:counter:numRefRecs="+numRefRecs+"]"); // LOG.info("[stat:counter:numQRecs="+numQRecs+"]"); // LOG.info("[stat:counter:numP2Recs="+numP2Recs+"]"); /* * for (Iterator<String> iterator = counters.getGroupNames().iterator(); * iterator.hasNext();) { * String str = (String) iterator.next(); * LOG.info("[stat:counter="+str+"]"); * } */ // end of phase 1 // phase 2 /*params.set("type", "phase2"); Job job2 = Job.getInstance(params, "KNNJoin Phase2"); job2.setJarByClass(KNNJoin.class); job2.setMapperClass(TokenizerMapper.class); job2.setReducerClass(GroupingReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileSystem outputFS = outputPath.getFileSystem(params); Path p2OutPath; do { p2OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000)); } while (outputFS.exists(p2OutPath)); FileSystem p2OutPathFS = FileSystem.get(p2OutPath.toUri(), params); job2.setInputFormatClass(KNNJInputFormatPhase2.class); KNNJInputFormatPhase2.setInputPaths(job2, outputPath); job2.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job2, p2OutPath); MultipleOutputs.addNamedOutput(job2, "phase3", TextOutputFormat3.class, NullWritable.class, Text.class); // Submit the job * if (job2.waitForCompletion(true)) { * LOG.info("Job2 succeeded."); * } else { * LOG.info("Job2 failed."); * return; * } // end of phase 2 t2 = System.currentTimeMillis() - t1; LOG.info("[stat:time:2=" + t2 + "]"); t1 = System.currentTimeMillis(); // phase 3 params.set("type", "phase3"); Job job3 = Job.getInstance(params, "KNNJoin Phase3"); job3.setJarByClass(KNNJoin.class); job3.setMapperClass(KNNJMapPhase3.class); job3.setOutputKeyClass(NullWritable.class); job3.setOutputValueClass(Text.class); job3.setNumReduceTasks(0); Path p3OutPath; do { p3OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000)); } while (outputFS.exists(p3OutPath)); FileSystem p3OutPathFS = FileSystem.get(p3OutPath.toUri(), params); job3.setInputFormatClass(KNNJInputFormatPhase3.class); KNNJInputFormatPhase3.setInputPaths(job3, p2OutPath, inputPaths[1]); job3.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job3, p3OutPath); // Submit the job * if (job3.waitForCompletion(true)) { * LOG.info("Job3 succeeded."); * } else { * LOG.info("Job3 failed."); * return; * } // end of phase 3 // cleaning temporary dirs and files p2OutPathFS.delete(p2OutPath, true); p3OutPathFS.delete(p3OutPath, true); t2 = System.currentTimeMillis() - t1; LOG.info("[stat:time:3=" + t2 + "]");*/ }