List of usage examples for org.apache.hadoop.util PriorityQueue insert
public boolean insert(T element)
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() { {//from ww w .j ava 2s .c o m initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() { {// w ww.j av a 2 s. c o m initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() { {//from w w w . jav a 2 s. c o m initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:org.apache.mahout.classifier.bayes.BayesClassifier.java
License:Apache License
/** * Classify the document and return the top <code>numResults</code> * * @param model The model/* w w w . j a v a 2 s. c om*/ * @param document The document to classify * @param defaultCategory The default category to assign * @param numResults The maximum number of results to return, ranked by score. Ties are broken by comparing the * category * @return A Collection of {@link ClassifierResult}s. */ @Override public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) { Collection<String> categories = model.getLabels(); PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults); ClassifierResult tmp; for (String category : categories) { double prob = documentWeight(model, category, document); if (prob > 0.0) { tmp = new ClassifierResult(category, prob); pq.insert(tmp); } } Deque<ClassifierResult> result = new LinkedList<ClassifierResult>(); while ((tmp = pq.pop()) != null) { result.addLast(tmp); } if (result.isEmpty()) { result.add(new ClassifierResult(defaultCategory, 0)); } return result; }
From source file:org.apache.mahout.classifier.cbayes.CBayesClassifier.java
License:Apache License
/** * Classify the document and return the top <code>numResults</code> * * @param model The model/*from w w w .j a va 2s . c o m*/ * @param document The document to classify * @param defaultCategory The default category to assign * @param numResults The maximum number of results to return, ranked by score. Ties are broken by comparing the * category * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s. */ @Override public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) { Collection<String> categories = model.getLabels(); PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults); ClassifierResult tmp; for (String category : categories) { double prob = documentWeight(model, category, document); if (prob < 0.0) { tmp = new ClassifierResult(category, prob); pq.insert(tmp); } } Deque<ClassifierResult> result = new LinkedList<ClassifierResult>(); while ((tmp = pq.pop()) != null) { result.addLast(tmp); } if (result.isEmpty()) { result.add(new ClassifierResult(defaultCategory, 0)); } return result; }