Example usage for org.apache.hadoop.util PriorityQueue insert

List of usage examples for org.apache.hadoop.util PriorityQueue insert

Introduction

In this page you can find the example usage for org.apache.hadoop.util PriorityQueue insert.

Prototype

public boolean insert(T element) 

Source Link

Document

Adds element to the PriorityQueue in log(size) time if either the PriorityQueue is not full, or not lessThan(element, top()).

Usage

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() {
            {//from   ww  w .j ava  2s .c o m
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() {
            {//  w  ww.j  av  a  2 s.  c  o m
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() {
            {//from w w w  .  jav  a  2  s. c o  m
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:org.apache.mahout.classifier.bayes.BayesClassifier.java

License:Apache License

/**
 * Classify the document and return the top <code>numResults</code>
 *
 * @param model           The model/* w w w . j  a v a  2  s. c  om*/
 * @param document        The document to classify
 * @param defaultCategory The default category to assign
 * @param numResults      The maximum number of results to return, ranked by score. Ties are broken by comparing the
 *                        category
 * @return A Collection of {@link ClassifierResult}s.
 */
@Override
public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory,
        int numResults) {
    Collection<String> categories = model.getLabels();

    PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
    ClassifierResult tmp;
    for (String category : categories) {
        double prob = documentWeight(model, category, document);
        if (prob > 0.0) {
            tmp = new ClassifierResult(category, prob);
            pq.insert(tmp);
        }
    }

    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
    while ((tmp = pq.pop()) != null) {
        result.addLast(tmp);
    }
    if (result.isEmpty()) {
        result.add(new ClassifierResult(defaultCategory, 0));
    }
    return result;
}

From source file:org.apache.mahout.classifier.cbayes.CBayesClassifier.java

License:Apache License

/**
 * Classify the document and return the top <code>numResults</code>
 *
 * @param model           The model/*from   w w  w  .j a  va 2s  . c o  m*/
 * @param document        The document to classify
 * @param defaultCategory The default category to assign
 * @param numResults      The maximum number of results to return, ranked by score. Ties are broken by comparing the
 *                        category
 * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
 */
@Override
public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory,
        int numResults) {
    Collection<String> categories = model.getLabels();
    PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
    ClassifierResult tmp;
    for (String category : categories) {
        double prob = documentWeight(model, category, document);
        if (prob < 0.0) {
            tmp = new ClassifierResult(category, prob);
            pq.insert(tmp);
        }
    }

    Deque<ClassifierResult> result = new LinkedList<ClassifierResult>();
    while ((tmp = pq.pop()) != null) {
        result.addLast(tmp);
    }
    if (result.isEmpty()) {
        result.add(new ClassifierResult(defaultCategory, 0));
    }
    return result;
}