List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
/** * A MapReduce version of KNN query./*from ww w . jav a2 s. com*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "PKNN"); job.setJarByClass(HSPKNNQ.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.sthadoop.operations.STJoins.java
License:Open Source License
static void JoinMapReduce(OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { final Path[] inputPaths = params.getInputPaths(); Path outputPath = params.getOutputPath(); //final int k = params.getInt("k", 1); HdfsRecordReader.params = params;// w ww . j a v a2s.c om //System.out.println(params.getInputPaths().length); long t1 = System.currentTimeMillis(); // phase 1 params.set("type", "phase1"); Job job = Job.getInstance(params, "ST-Join Phase1"); job.setJarByClass(STJoinsMapper.class); job.setInputFormatClass(HdfsInputFormat.class); HdfsInputFormat.setInputPaths(job, inputPaths[0], inputPaths[1]); job.setMapperClass(STJoinsMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); MultipleOutputs.addNamedOutput(job, "phase2", TextOutputFormat3.class, Text.class, Text.class); // Submit the job if (job.waitForCompletion(true)) { LOG.info("[stat:job[0]"); } else { LOG.info("[stat:job[1]"); return; } long t2 = System.currentTimeMillis() - t1; t1 = System.currentTimeMillis(); Counters counters = job.getCounters(); long refSplits = counters.findCounter(STJoinsMapper.Stats.refSplits).getValue(); long qSplits = counters.findCounter(STJoinsMapper.Stats.qSplits).getValue(); long numRefRecs = counters.findCounter(STJoinsMapper.Stats.numRefRecs).getValue(); long numQRecs = counters.findCounter(STJoinsMapper.Stats.numQRecs).getValue(); long numP2Recs = counters.findCounter(STJoinsMapper.Stats.phase2Recs).getValue(); String str = String.format( "stat:counters[refSplits=%s;qSplits=%s;numRefRecs=%s;" + "numQRecs=%s;numP2Recs=%s;t1=%s]", refSplits, qSplits, numRefRecs, numQRecs, numP2Recs, t2); LOG.info(str); // LOG.info("[stat:counter:refSplits="+refSplits+"]"); // LOG.info("[stat:counter:qSplits="+qSplits+"]"); // LOG.info("[stat:counter:numRefRecs="+numRefRecs+"]"); // LOG.info("[stat:counter:numQRecs="+numQRecs+"]"); // LOG.info("[stat:counter:numP2Recs="+numP2Recs+"]"); /* * for (Iterator<String> iterator = counters.getGroupNames().iterator(); * iterator.hasNext();) { * String str = (String) iterator.next(); * LOG.info("[stat:counter="+str+"]"); * } */ // end of phase 1 // phase 2 /*params.set("type", "phase2"); Job job2 = Job.getInstance(params, "KNNJoin Phase2"); job2.setJarByClass(KNNJoin.class); job2.setMapperClass(TokenizerMapper.class); job2.setReducerClass(GroupingReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileSystem outputFS = outputPath.getFileSystem(params); Path p2OutPath; do { p2OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000)); } while (outputFS.exists(p2OutPath)); FileSystem p2OutPathFS = FileSystem.get(p2OutPath.toUri(), params); job2.setInputFormatClass(KNNJInputFormatPhase2.class); KNNJInputFormatPhase2.setInputPaths(job2, outputPath); job2.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job2, p2OutPath); MultipleOutputs.addNamedOutput(job2, "phase3", TextOutputFormat3.class, NullWritable.class, Text.class); // Submit the job * if (job2.waitForCompletion(true)) { * LOG.info("Job2 succeeded."); * } else { * LOG.info("Job2 failed."); * return; * } // end of phase 2 t2 = System.currentTimeMillis() - t1; LOG.info("[stat:time:2=" + t2 + "]"); t1 = System.currentTimeMillis(); // phase 3 params.set("type", "phase3"); Job job3 = Job.getInstance(params, "KNNJoin Phase3"); job3.setJarByClass(KNNJoin.class); job3.setMapperClass( STJoinsMapperPhase3.class); job3.setOutputKeyClass(NullWritable.class); job3.setOutputValueClass(Text.class); job3.setNumReduceTasks(0); Path p3OutPath; do { p3OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000)); } while (outputFS.exists(p3OutPath)); FileSystem p3OutPathFS = FileSystem.get(p3OutPath.toUri(), params); job3.setInputFormatClass(KNNJInputFormatPhase3.class); KNNJInputFormatPhase3.setInputPaths(job3, p2OutPath, inputPaths[1]); job3.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job3, p3OutPath); // Submit the job * if (job3.waitForCompletion(true)) { * LOG.info("Job3 succeeded."); * } else { * LOG.info("Job3 failed."); * return; * } // end of phase 3 // cleaning temporary dirs and files p2OutPathFS.delete(p2OutPath, true); p3OutPathFS.delete(p3OutPath, true); t2 = System.currentTimeMillis() - t1; LOG.info("[stat:time:3=" + t2 + "]");*/ }
From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java
License:Open Source License
public static void rangeQueryOperation(OperationsParams parameters) throws Exception { final OperationsParams params = parameters; final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();// w w w. j a v a2 s. c om System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { String x1 = "-" + Double.toString(Double.MAX_VALUE); String y1 = "-" + Double.toString(Double.MAX_VALUE); String x2 = Double.toString(Double.MAX_VALUE); String y2 = Double.toString(Double.MAX_VALUE); System.out.println(x1 + "," + y1 + "," + x2 + "," + y2); params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2); // System.err.println("You must provide a query range"); // printUsage(); // System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or STRectangle"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java
License:Open Source License
public static void main(String[] args) throws Exception { // args = new String[7]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "/home/louai/nyc-taxi/resultSTRQ"; // args[2] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[3] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[4] = "interval:2015-01-01,2015-01-02"; // args[5] = "-overwrite"; // args[6] = "-no-local"; // Query for test with output // args = new String[6]; // args[0] = "/home/louai/nyc-taxi/yellowIndex"; // args[1] = "shape:edu.umn.cs.sthadoop.core.STPoint"; // args[2] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391"; // args[3] = "interval:2015-01-01,2015-01-03"; // args[4] = "-overwrite"; // args[5 ] = "-no-local"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();/*from w w w . j a v a2 s.c o m*/ System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("rect") == null) { String x1 = "-" + Double.toString(Double.MAX_VALUE); String y1 = "-" + Double.toString(Double.MAX_VALUE); String x2 = Double.toString(Double.MAX_VALUE); String y2 = Double.toString(Double.MAX_VALUE); System.out.println(x1 + "," + y1 + "," + x2 + "," + y2); params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2); // System.err.println("You must provide a query range"); // printUsage(); // System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) { LOG.error("Shape is not instance of STPoint or STRectangle"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
/** * A MapReduce version of KNN query.// w ww .j a v a2 s . co m * * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNNDTW.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with // it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple // iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance // among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered // at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java
License:Open Source License
public static void main(String[] args) throws Exception { // args = new String[8]; // args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife"; // args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result"; // args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory"; // args[3] = "interval:2008-05-01,2008-05-30"; // args[4] = "time:month"; // args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883"; // args[6] = "-overwrite"; // args[7] = "-local";//"-no-local"; final OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); final Path[] paths = params.getPaths(); if (paths.length <= 1 && !params.checkInput()) { printUsage();// www .j a va 2s.c om System.exit(1); } if (paths.length >= 2 && !params.checkInputOutput()) { printUsage(); System.exit(1); } if (params.get("traj") == null) { System.err.println("Trajectory query is missing"); printUsage(); System.exit(1); } // Invoke method to compute the trajectory MBR. String rectangle = getTrajectoryRectangle(params.get("traj")); params.set("rect", rectangle); if (params.get("rect") == null) { System.err.println("You must provide a Trajectory Query"); printUsage(); System.exit(1); } if (params.get("interval") == null) { System.err.println("Temporal range missing"); printUsage(); System.exit(1); } TextSerializable inObj = params.getShape("shape"); if (!(inObj instanceof STPoint)) { LOG.error("Shape is not instance of STPoint"); printUsage(); System.exit(1); } // Get spatio-temporal slices. List<Path> STPaths = getIndexedSlices(params); final Path outPath = params.getOutputPath(); final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle()); // All running jobs final Vector<Long> resultsCounts = new Vector<Long>(); Vector<Job> jobs = new Vector<Job>(); Vector<Thread> threads = new Vector<Thread>(); long t1 = System.currentTimeMillis(); for (Path stPath : STPaths) { final Path inPath = stPath; for (int i = 0; i < queryRanges.length; i++) { final OperationsParams queryParams = new OperationsParams(params); OperationsParams.setShape(queryParams, "rect", queryRanges[i]); if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) { // Run in local mode final Rectangle queryRange = queryRanges[i]; final Shape shape = queryParams.getShape("shape"); final Path output = outPath == null ? null : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i))); Thread thread = new Thread() { @Override public void run() { FSDataOutputStream outFile = null; final byte[] newLine = System.getProperty("line.separator", "\n").getBytes(); try { ResultCollector<Shape> collector = null; if (output != null) { FileSystem outFS = output.getFileSystem(queryParams); final FSDataOutputStream foutFile = outFile = outFS.create(output); collector = new ResultCollector<Shape>() { final Text tempText = new Text2(); @Override public synchronized void collect(Shape r) { try { tempText.clear(); r.toText(tempText); foutFile.write(tempText.getBytes(), 0, tempText.getLength()); foutFile.write(newLine); } catch (IOException e) { e.printStackTrace(); } } }; } else { outFile = null; } long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector); resultsCounts.add(resultCount); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } finally { try { if (outFile != null) outFile.close(); } catch (IOException e) { e.printStackTrace(); } } } }; thread.start(); threads.add(thread); } else { // Run in MapReduce mode Path outTempPath = outPath == null ? null : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName()); queryParams.setBoolean("background", true); Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams); jobs.add(job); } } } while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); System.exit(1); } Counters counters = firstJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); resultsCounts.add(outputRecordCounter.getValue()); jobs.remove(0); } while (!threads.isEmpty()) { try { Thread thread = threads.firstElement(); thread.join(); threads.remove(0); } catch (InterruptedException e) { e.printStackTrace(); } } long t2 = System.currentTimeMillis(); System.out.println("QueryPlan:"); for (Path stPath : STPaths) { System.out.println(stPath.getName()); } System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis"); System.out.println("Results counts: " + resultsCounts); }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java
License:LGPL
/** * Wait the completion of a job.// w ww.ja v a 2 s. c o m * @param job the job to submit * @param jobDescription the description of the job * @param waitTimeInMillis waiting time between 2 checks of the completion of * jobs * @param status step status * @param counterGroup group of the counter to log * @throws EoulsanException if the job fail or if an exception occurs while * submitting or waiting the end of the job */ public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis, final TaskStatus status, final String counterGroup) throws EoulsanException { if (job == null) { throw new NullPointerException("The job is null"); } if (jobDescription == null) { throw new NullPointerException("The jobDescription is null"); } try { // Set the description of the context status.setDescription(job.getJobName()); // Submit the job job.submit(); // Add the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job); // Job the completion of the job (non verbose mode) job.waitForCompletion(false); // Remove the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job); // Check if the job has been successfully executed if (!job.isSuccessful()) { status.setProgressMessage("FAILED"); throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile()); } // Set the counters status.setCounters(new HadoopReporter(job.getCounters()), counterGroup); } catch (ClassNotFoundException | InterruptedException | IOException e) { throw new EoulsanException(e); } }
From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 5) { System.err.println("Usage: " + this.getClass().getName() + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>..."); return 1; }//from ww w . j av a 2 s .c om // Parse arguments Path outputPath = new Path(args[0]); float proportionToSample = Float.parseFloat(args[1]); int numberTabletServers = Integer.parseInt(args[2]); Path resultingSplitsFile = new Path(args[3]); Path[] inputPaths = new Path[args.length - 4]; for (int i = 0; i < inputPaths.length; i++) { inputPaths[i] = new Path(args[i + 4]); } // Conf and job Configuration conf = getConf(); conf.setFloat("proportion_to_sample", proportionToSample); String jobName = "Estimate split points: input = "; for (int i = 0; i < inputPaths.length; i++) { jobName += inputPaths[i] + ", "; } jobName += "output = " + outputPath; Job job = Job.getInstance(conf, jobName); job.setJarByClass(getClass()); // Input job.setInputFormatClass(SequenceFileInputFormat.class); for (int i = 0; i < inputPaths.length; i++) { SequenceFileInputFormat.addInputPath(job, inputPaths[i]); } // Mapper job.setMapperClass(EstimateSplitPointsMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); // Reducer job.setReducerClass(EstimateSplitPointsReducer.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(1); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Number of records output // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier // versions of Hadoop. @SuppressWarnings("deprecation") Counter counter = job.getCounters() .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS); long recordsOutput = counter.getValue(); System.out.println("Number of records output = " + recordsOutput); // Work out when to output a split point. The number of split points // needed is the number of tablet servers minus 1 (because you don't // have to output the start of the first tablet or the end of the // last tablet). long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1); // Read through resulting file, pick out the split points and write to // file. FileSystem fs = FileSystem.get(conf); Path resultsFile = new Path(outputPath, "part-r-00000"); @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf); PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true))); Key key = new Key(); Value value = new Value(); long count = 0; int numberSplitPointsOutput = 0; while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) { count++; if (count % outputEveryNthRecord == 0) { numberSplitPointsOutput++; splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes()))); System.out.println("Written split point: " + key.getRow()); } } reader.close(); splitsWriter.close(); System.out.println("Number of split points output = " + numberSplitPointsOutput); return 0; }
From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java
License:Open Source License
public int run(String[] args) throws Exception { Job job = createSubmittableJob(args); job.waitForCompletion(true);//from w w w .j a va 2s . c om Counters counters = job.getCounters(); regions = counters.getGroup("org.apache.hadoop.mapred.Task$Counter").findCounter("REDUCE_OUTPUT_RECORDS") .getValue() + 1; return 0; }
From source file:hadoop.examples.WordMedian.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }//w ww .j a va 2s.c o m setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }