Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

/**
 * A MapReduce version of KNN query./*from ww w . jav  a2  s.  com*/
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "PKNN");
    job.setJarByClass(HSPKNNQ.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.sthadoop.operations.STJoins.java

License:Open Source License

static void JoinMapReduce(OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path[] inputPaths = params.getInputPaths();
    Path outputPath = params.getOutputPath();
    //final int k = params.getInt("k", 1);
    HdfsRecordReader.params = params;// w  ww  . j a v  a2s.c om
    //System.out.println(params.getInputPaths().length);

    long t1 = System.currentTimeMillis();
    // phase 1
    params.set("type", "phase1");
    Job job = Job.getInstance(params, "ST-Join Phase1");
    job.setJarByClass(STJoinsMapper.class);
    job.setInputFormatClass(HdfsInputFormat.class);
    HdfsInputFormat.setInputPaths(job, inputPaths[0], inputPaths[1]);
    job.setMapperClass(STJoinsMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);
    MultipleOutputs.addNamedOutput(job, "phase2", TextOutputFormat3.class, Text.class, Text.class);

    // Submit the job
    if (job.waitForCompletion(true)) {
        LOG.info("[stat:job[0]");
    } else {
        LOG.info("[stat:job[1]");
        return;
    }
    long t2 = System.currentTimeMillis() - t1;
    t1 = System.currentTimeMillis();
    Counters counters = job.getCounters();
    long refSplits = counters.findCounter(STJoinsMapper.Stats.refSplits).getValue();
    long qSplits = counters.findCounter(STJoinsMapper.Stats.qSplits).getValue();
    long numRefRecs = counters.findCounter(STJoinsMapper.Stats.numRefRecs).getValue();
    long numQRecs = counters.findCounter(STJoinsMapper.Stats.numQRecs).getValue();
    long numP2Recs = counters.findCounter(STJoinsMapper.Stats.phase2Recs).getValue();
    String str = String.format(
            "stat:counters[refSplits=%s;qSplits=%s;numRefRecs=%s;" + "numQRecs=%s;numP2Recs=%s;t1=%s]",
            refSplits, qSplits, numRefRecs, numQRecs, numP2Recs, t2);
    LOG.info(str);
    // LOG.info("[stat:counter:refSplits="+refSplits+"]");
    // LOG.info("[stat:counter:qSplits="+qSplits+"]");
    // LOG.info("[stat:counter:numRefRecs="+numRefRecs+"]");
    // LOG.info("[stat:counter:numQRecs="+numQRecs+"]");
    // LOG.info("[stat:counter:numP2Recs="+numP2Recs+"]");
    /*
     * for (Iterator<String> iterator = counters.getGroupNames().iterator();
     * iterator.hasNext();) {
     * String str = (String) iterator.next();
     * LOG.info("[stat:counter="+str+"]");
     * }
     */
    // end of phase 1

    // phase 2
    /*params.set("type", "phase2");
    Job job2 = Job.getInstance(params, "KNNJoin Phase2");
    job2.setJarByClass(KNNJoin.class);
    job2.setMapperClass(TokenizerMapper.class);
    job2.setReducerClass(GroupingReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
            
    FileSystem outputFS = outputPath.getFileSystem(params);
    Path p2OutPath;
    do {
       p2OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p2OutPath));
    FileSystem p2OutPathFS = FileSystem.get(p2OutPath.toUri(), params);
            
    job2.setInputFormatClass(KNNJInputFormatPhase2.class);
    KNNJInputFormatPhase2.setInputPaths(job2, outputPath);
    job2.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job2, p2OutPath);
    MultipleOutputs.addNamedOutput(job2, "phase3", TextOutputFormat3.class, NullWritable.class, Text.class);
            
    // Submit the job
            
     * if (job2.waitForCompletion(true)) {
     * LOG.info("Job2 succeeded.");
     * } else {
     * LOG.info("Job2 failed.");
     * return;
     * }
             
    // end of phase 2
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:2=" + t2 + "]");
    t1 = System.currentTimeMillis();
            
    // phase 3
    params.set("type", "phase3");
    Job job3 = Job.getInstance(params, "KNNJoin Phase3");
    job3.setJarByClass(KNNJoin.class);
            
    job3.setMapperClass( STJoinsMapperPhase3.class);
    job3.setOutputKeyClass(NullWritable.class);
    job3.setOutputValueClass(Text.class);
    job3.setNumReduceTasks(0);
            
    Path p3OutPath;
    do {
       p3OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p3OutPath));
    FileSystem p3OutPathFS = FileSystem.get(p3OutPath.toUri(), params);
            
    job3.setInputFormatClass(KNNJInputFormatPhase3.class);
    KNNJInputFormatPhase3.setInputPaths(job3, p2OutPath, inputPaths[1]);
    job3.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job3, p3OutPath);
            
    // Submit the job
            
     * if (job3.waitForCompletion(true)) {
     * LOG.info("Job3 succeeded.");
     * } else {
     * LOG.info("Job3 failed.");
     * return;
     * }
             
    // end of phase 3
            
    // cleaning temporary dirs and files
    p2OutPathFS.delete(p2OutPath, true);
    p3OutPathFS.delete(p3OutPath, true);
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:3=" + t2 + "]");*/
}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void rangeQueryOperation(OperationsParams parameters) throws Exception {
    final OperationsParams params = parameters;

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();// w w  w. j a v a2 s. c  om
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void main(String[] args) throws Exception {
    //      args = new String[7];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "/home/louai/nyc-taxi/resultSTRQ";
    //      args[2] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[3] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[4] = "interval:2015-01-01,2015-01-02";
    //      args[5] = "-overwrite";
    //      args[6] = "-no-local";

    // Query for test with output
    //      args = new String[6];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[2] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[3] = "interval:2015-01-01,2015-01-03";
    //      args[4] = "-overwrite";
    //      args[5   ] = "-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/*from   w w  w  . j a  v  a2 s.c o m*/
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

/**
 * A MapReduce version of KNN query.// w ww  .j a  v  a2 s  . co m
 * 
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNNDTW.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with
    // it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple
            // iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance
                // among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered
                // at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java

License:Open Source License

public static void main(String[] args) throws Exception {

    //      args = new String[8];
    //      args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife";
    //      args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result";
    //      args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory";
    //      args[3] = "interval:2008-05-01,2008-05-30";
    //      args[4] = "time:month";
    //      args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883";
    //      args[6] = "-overwrite";
    //      args[7] = "-local";//"-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();// www .j  a  va  2s.c  om
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("traj") == null) {
        System.err.println("Trajectory query is missing");
        printUsage();
        System.exit(1);
    }

    // Invoke method to compute the trajectory MBR. 
    String rectangle = getTrajectoryRectangle(params.get("traj"));
    params.set("rect", rectangle);

    if (params.get("rect") == null) {
        System.err.println("You must provide a Trajectory Query");
        printUsage();
        System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint)) {
        LOG.error("Shape is not instance of STPoint");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java

License:LGPL

/**
 * Wait the completion of a job.//  w  ww.ja v  a  2 s.  c o  m
 * @param job the job to submit
 * @param jobDescription the description of the job
 * @param waitTimeInMillis waiting time between 2 checks of the completion of
 *          jobs
 * @param status step status
 * @param counterGroup group of the counter to log
 * @throws EoulsanException if the job fail or if an exception occurs while
 *           submitting or waiting the end of the job
 */
public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis,
        final TaskStatus status, final String counterGroup) throws EoulsanException {

    if (job == null) {
        throw new NullPointerException("The job is null");
    }

    if (jobDescription == null) {
        throw new NullPointerException("The jobDescription is null");
    }

    try {

        // Set the description of the context
        status.setDescription(job.getJobName());

        // Submit the job
        job.submit();

        // Add the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job);

        // Job the completion of the job (non verbose mode)
        job.waitForCompletion(false);

        // Remove the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job);

        // Check if the job has been successfully executed
        if (!job.isSuccessful()) {

            status.setProgressMessage("FAILED");

            throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile());
        }

        // Set the counters
        status.setCounters(new HadoopReporter(job.getCounters()), counterGroup);

    } catch (ClassNotFoundException | InterruptedException | IOException e) {
        throw new EoulsanException(e);
    }
}

From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 5) {
        System.err.println("Usage: " + this.getClass().getName()
                + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>...");
        return 1;
    }//from   ww w . j av  a  2  s .c  om

    // Parse arguments
    Path outputPath = new Path(args[0]);
    float proportionToSample = Float.parseFloat(args[1]);
    int numberTabletServers = Integer.parseInt(args[2]);
    Path resultingSplitsFile = new Path(args[3]);
    Path[] inputPaths = new Path[args.length - 4];
    for (int i = 0; i < inputPaths.length; i++) {
        inputPaths[i] = new Path(args[i + 4]);
    }

    // Conf and job
    Configuration conf = getConf();
    conf.setFloat("proportion_to_sample", proportionToSample);
    String jobName = "Estimate split points: input = ";
    for (int i = 0; i < inputPaths.length; i++) {
        jobName += inputPaths[i] + ", ";
    }
    jobName += "output = " + outputPath;
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(getClass());

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    for (int i = 0; i < inputPaths.length; i++) {
        SequenceFileInputFormat.addInputPath(job, inputPaths[i]);
    }

    // Mapper
    job.setMapperClass(EstimateSplitPointsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Reducer
    job.setReducerClass(EstimateSplitPointsReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(1);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    @SuppressWarnings("deprecation")
    Counter counter = job.getCounters()
            .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    long recordsOutput = counter.getValue();
    System.out.println("Number of records output = " + recordsOutput);

    // Work out when to output a split point. The number of split points
    // needed is the number of tablet servers minus 1 (because you don't
    // have to output the start of the first tablet or the end of the
    // last tablet).
    long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to
    // file.
    FileSystem fs = FileSystem.get(conf);
    Path resultsFile = new Path(outputPath, "part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
    PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true)));
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
        count++;
        if (count % outputEveryNthRecord == 0) {
            numberSplitPointsOutput++;
            splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes())));
            System.out.println("Written split point: " + key.getRow());
        }
    }
    reader.close();
    splitsWriter.close();
    System.out.println("Number of split points output = " + numberSplitPointsOutput);
    return 0;
}

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public int run(String[] args) throws Exception {

    Job job = createSubmittableJob(args);
    job.waitForCompletion(true);//from  w w  w .j  a  va  2s  . c  om
    Counters counters = job.getCounters();
    regions = counters.getGroup("org.apache.hadoop.mapred.Task$Counter").findCounter("REDUCE_OUTPUT_RECORDS")
            .getValue() + 1;

    return 0;
}

From source file:hadoop.examples.WordMedian.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }//w  ww .j a  va 2s.c  o m

    setConf(new Configuration());
    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}