Example usage for org.apache.hadoop.mapreduce Counter getValue

List of usage examples for org.apache.hadoop.mapreduce Counter getValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Counter getValue.

Prototype

long getValue();

Source Link

Document

What is the current value of this counter?

Usage

From source file:datafu.hourglass.jobs.StagedOutputJob.java

License:Apache License

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /*from  w w  w .j  a v  a 2  s  . c  o  m*/
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:dz.lab.mapred.counter.StartsWithCountJob_PrintCounters.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // the following property will enable mapreduce to use its packaged local job runner
    //conf.set("mapreduce.framework.name", "local");

    Job job = Job.getInstance(conf, "StartsWithCountJob");
    job.setJarByClass(getClass());/*from  w  w  w.j a  va  2s .c o m*/

    // configure output and input source
    TextInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    // configure mapper and reducer
    job.setMapperClass(StartsWithCountMapper.class);
    job.setCombinerClass(StartsWithCountReducer.class);
    job.setReducerClass(StartsWithCountReducer.class);

    // configure output
    TextOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    int resultCode = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("Job is complete! Printing Counters:");
    Counters counters = job.getCounters();

    for (String groupName : counters.getGroupNames()) {
        CounterGroup group = counters.getGroup(groupName);
        System.out.println(group.getDisplayName());

        for (Counter counter : group.getUnderlyingGroup()) {
            System.out.println(" " + counter.getDisplayName() + "=" + counter.getValue());
        }
    }
    return resultCode;
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFToText.java

License:Open Source License

/**
 * Performs an HDF to text operation as a MapReduce job and returns total
 * number of points generated./*from w w  w.  j  ava  2  s .co  m*/
 * @param inPath
 * @param outPath
 * @param datasetName
 * @param skipFillValue
 * @return
 * @throws IOException
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static long HDFToTextMapReduce(Path inPath, Path outPath, String datasetName, boolean skipFillValue,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "HDFToText");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(HDFToText.class);
    job.setJobName("HDFToText");

    // Set Map function details
    job.setMapperClass(HDFToTextMap.class);
    job.setNumReduceTasks(0);

    // Set input information
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPath);
    if (conf.get("shape") == null)
        conf.setClass("shape", NASAPoint.class, Shape.class);
    conf.set("dataset", datasetName);
    conf.setBoolean("skipfillvalue", skipFillValue);

    // Set output information
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outPath);

    // Run the job
    boolean verbose = conf.getBoolean("verbose", false);
    job.waitForCompletion(verbose);
    Counters counters = job.getCounters();
    Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
    final long resultCount = outputRecordCounter.getValue();

    return resultCount;
}

From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java

License:Open Source License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    GenericOptionsParser parser = new GenericOptionsParser(args);
    OperationsParams params = new OperationsParams(parser);

    if (!params.checkInputOutput()) {
        printUsage();/*from  ww  w .  j a  va2s . c  o  m*/
        System.exit(1);
    }

    Path[] inFiles = params.getInputPaths();
    Path outPath = params.getOutputPath();

    long t1 = System.currentTimeMillis();
    Job job = farthestPair(inFiles, outPath, params);
    long t2 = System.currentTimeMillis();
    System.out.println("Total time: " + (t2 - t1) + " millis");
    if (job != null) {
        Counter processedPairs = job.getCounters().findCounter(FarthestPairCounters.FP_ProcessedPairs);
        System.out.println("Processed " + processedPairs.getValue() + " pairs");
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

/**
 * A MapReduce version of KNN query.//from w w  w .j  av  a2 s.c om
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNN.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//from  w ww.  ja  v  a2  s  .c o  m
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        System.err.println("You must provide a query range");
        printUsage();
        System.exit(1);
    }
    final Path inPath = params.getInputPath();
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (int i = 0; i < queryRanges.length; i++) {
        final OperationsParams queryParams = new OperationsParams(params);
        OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
        if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
            // Run in local mode
            final Rectangle queryRange = queryRanges[i];
            final Shape shape = queryParams.getShape("shape");
            final Path output = outPath == null ? null
                    : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
            Thread thread = new Thread() {
                @Override
                public void run() {
                    FSDataOutputStream outFile = null;
                    final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                    try {
                        ResultCollector<Shape> collector = null;
                        if (output != null) {
                            FileSystem outFS = output.getFileSystem(queryParams);
                            final FSDataOutputStream foutFile = outFile = outFS.create(output);
                            collector = new ResultCollector<Shape>() {
                                final Text tempText = new Text2();

                                @Override
                                public synchronized void collect(Shape r) {
                                    try {
                                        tempText.clear();
                                        r.toText(tempText);
                                        foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                        foutFile.write(newLine);
                                    } catch (IOException e) {
                                        e.printStackTrace();
                                    }
                                }
                            };
                        } else {
                            outFile = null;
                        }
                        long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams, collector);
                        resultsCounts.add(resultCount);
                    } catch (IOException e) {
                        e.printStackTrace();
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    } finally {
                        try {
                            if (outFile != null)
                                outFile.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }
            };
            thread.start();
            threads.add(thread);
        } else {
            // Run in MapReduce mode
            queryParams.setBoolean("background", true);
            Job job = rangeQueryMapReduce(inPath, outPath, queryParams);
            jobs.add(job);
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();

    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

/**
 * A MapReduce version of KNN query.// w w w . jav  a 2  s  .  c o m
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "PKNN");
    job.setJarByClass(HSPKNNQ.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void rangeQueryOperation(OperationsParams parameters) throws Exception {
    final OperationsParams params = parameters;

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/*from  w w w.j  a v  a 2 s  .co  m*/
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void main(String[] args) throws Exception {
    //      args = new String[7];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "/home/louai/nyc-taxi/resultSTRQ";
    //      args[2] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[3] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[4] = "interval:2015-01-01,2015-01-02";
    //      args[5] = "-overwrite";
    //      args[6] = "-no-local";

    // Query for test with output
    //      args = new String[6];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[2] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[3] = "interval:2015-01-01,2015-01-03";
    //      args[4] = "-overwrite";
    //      args[5   ] = "-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/*from  w  w w  . java2s.c om*/
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

/**
 * A MapReduce version of KNN query.//from   w  ww .  ja v a  2  s . c o  m
 * 
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNNDTW.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with
    // it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple
            // iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance
                // among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered
                // at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}