List of usage examples for org.apache.hadoop.util PriorityQueue pop
public final T pop()
From source file:com.ricemap.spateDB.core.RTree.java
License:Apache License
/** * Performs a spatial join between records in two R-trees * //from ww w.j av a 2 s .c om * @param R * @param S * @param output * @return * @throws IOException */ protected static <S1 extends Shape, S2 extends Shape> int spatialJoinDisk(final RTree<S1> R, final RTree<S2> S, final ResultCollector2<S1, S2> output) throws IOException { // Reserve locations for nodes MBRs and data offset [start, end) final Prism[] r_nodes = new Prism[R.degree]; for (int i = 0; i < r_nodes.length; i++) r_nodes[i] = new Prism(); final int[] r_data_offset = new int[R.degree + 1]; final Prism[] s_nodes = new Prism[S.degree]; for (int i = 0; i < s_nodes.length; i++) s_nodes[i] = new Prism(); final int[] s_data_offset = new int[S.degree + 1]; PriorityQueue<Long> nodesToJoin = new PriorityQueue<Long>() { { initialize(R.leafNodeCount + S.leafNodeCount); } @Override protected boolean lessThan(Object a, Object b) { return ((Long) a) < ((Long) b); } }; nodesToJoin.put(0L); LruCache<Integer, Shape[]> r_records_cache = new LruCache<Integer, Shape[]>(R.degree * 2); LruCache<Integer, Shape[]> s_records_cache = new LruCache<Integer, Shape[]>(S.degree * R.degree * 4); Text line = new Text2(); int result_count = 0; LineReader r_lr = null, s_lr = null; // Last offset read from r and s int r_last_offset = 0; int s_last_offset = 0; while (nodesToJoin.size() > 0) { long nodes_to_join = nodesToJoin.pop(); int r_node = (int) (nodes_to_join >>> 32); int s_node = (int) (nodes_to_join & 0xFFFFFFFF); // Read all R nodes int r_mbrsToTest = r_node == 0 ? 1 : R.degree; boolean r_leaf = r_node * R.degree + 1 >= R.nodeCount; long nodeOffset = NodeSize * r_node; R.structure.seek(nodeOffset); for (int i = 0; i < r_mbrsToTest; i++) { r_data_offset[i] = R.structure.readInt(); r_nodes[i].readFields(R.structure); } r_data_offset[r_mbrsToTest] = (r_node + r_mbrsToTest) == R.nodeCount ? R.treeSize : R.structure.readInt(); // Read all S nodes int s_mbrsToTest = s_node == 0 ? 1 : S.degree; boolean s_leaf = s_node * S.degree + 1 >= S.nodeCount; if (r_leaf != s_leaf) { // This case happens when the two trees are of different heights if (r_leaf) r_mbrsToTest = 1; else s_mbrsToTest = 1; } nodeOffset = NodeSize * s_node; S.structure.seek(nodeOffset); for (int i = 0; i < s_mbrsToTest; i++) { s_data_offset[i] = S.structure.readInt(); s_nodes[i].readFields(S.structure); } s_data_offset[s_mbrsToTest] = (s_node + s_mbrsToTest) == S.nodeCount ? S.treeSize : S.structure.readInt(); // Find overlapping nodes by Cartesian product for (int i = 0; i < r_mbrsToTest; i++) { for (int j = 0; j < s_mbrsToTest; j++) { if (r_nodes[i].isIntersected(s_nodes[j])) { if (r_leaf && s_leaf) { // Reached leaf nodes in both trees. Start comparing // records int r_start_offset = r_data_offset[i]; int r_end_offset = r_data_offset[i + 1]; int s_start_offset = s_data_offset[j]; int s_end_offset = s_data_offset[j + 1]; // ///////////////////////////////////////////////////////////////// // Read or retrieve r_records Shape[] r_records = r_records_cache.get(r_start_offset); if (r_records == null) { int cache_key = r_start_offset; r_records = r_records_cache.popUnusedEntry(); if (r_records == null) { r_records = new Shape[R.degree * 2]; } // Need to read it from stream if (r_last_offset != r_start_offset) { long seekTo = r_start_offset + R.treeStartOffset; R.data.seek(seekTo); r_lr = new LineReader(R.data); } int record_i = 0; while (r_start_offset < r_end_offset) { r_start_offset += r_lr.readLine(line); if (r_records[record_i] == null) r_records[record_i] = R.stockObject.clone(); r_records[record_i].fromText(line); record_i++; } r_last_offset = r_start_offset; // Nullify other records while (record_i < r_records.length) r_records[record_i++] = null; r_records_cache.put(cache_key, r_records); } // Read or retrieve s_records Shape[] s_records = s_records_cache.get(s_start_offset); if (s_records == null) { int cache_key = s_start_offset; // Need to read it from stream if (s_lr == null || s_last_offset != s_start_offset) { // Need to reposition s_lr (LineReader of S) long seekTo = s_start_offset + S.treeStartOffset; S.data.seek(seekTo); s_lr = new LineReader(S.data); } s_records = s_records_cache.popUnusedEntry(); if (s_records == null) { s_records = new Shape[S.degree * 2]; } int record_i = 0; while (s_start_offset < s_end_offset) { s_start_offset += s_lr.readLine(line); if (s_records[record_i] == null) s_records[record_i] = S.stockObject.clone(); s_records[record_i].fromText(line); record_i++; } // Nullify other records while (record_i < s_records.length) s_records[record_i++] = null; // Put in cache s_records_cache.put(cache_key, s_records); s_last_offset = s_start_offset; } // Do Cartesian product between records to find // overlapping pairs for (int i_r = 0; i_r < r_records.length && r_records[i_r] != null; i_r++) { for (int i_s = 0; i_s < s_records.length && s_records[i_s] != null; i_s++) { if (r_records[i_r].isIntersected(s_records[i_s])) { result_count++; if (output != null) { output.collect((S1) r_records[i_r], (S2) s_records[i_s]); } } } } // ///////////////////////////////////////////////////////////////// } else { // Add a new pair to node pairs to be tested // Go down one level if possible int new_r_node, new_s_node; if (!r_leaf) { new_r_node = (r_node + i) * R.degree + 1; } else { new_r_node = r_node + i; } if (!s_leaf) { new_s_node = (s_node + j) * S.degree + 1; } else { new_s_node = s_node + j; } long new_pair = (((long) new_r_node) << 32) | new_s_node; nodesToJoin.put(new_pair); } } } } } return result_count; }
From source file:edu.umn.cs.spatialHadoop.core.RTree.java
License:Open Source License
/** * Performs a spatial join between records in two R-trees * @param R//from w w w. ja v a 2 s .c o m * @param S * @param output * @return * @throws IOException * SuppresWarnings("resource") is used because we create LineReaders on the * internal data stream of both R and S. We do not want to close the * LineReader because it will subsequently close the internal data stream * of R and S which is something we want to avoid because both R and S are * not created by this function and it should not free these resources. */ protected static <S1 extends Shape, S2 extends Shape> int spatialJoinDisk(final RTree<S1> R, final RTree<S2> S, final ResultCollector2<S1, S2> output, final Reporter reporter) throws IOException { // Reserve locations for nodes MBRs and data offset [start, end) final Rectangle[] r_nodes = new Rectangle[R.degree]; for (int i = 0; i < r_nodes.length; i++) r_nodes[i] = new Rectangle(); final int[] r_data_offset = new int[R.degree + 1]; final Rectangle[] s_nodes = new Rectangle[S.degree]; for (int i = 0; i < s_nodes.length; i++) s_nodes[i] = new Rectangle(); final int[] s_data_offset = new int[S.degree + 1]; PriorityQueue<Long> nodesToJoin = new PriorityQueue<Long>() { { initialize(R.leafNodeCount + S.leafNodeCount); } @Override protected boolean lessThan(Object a, Object b) { return ((Long) a) < ((Long) b); } }; nodesToJoin.put(0L); LruCache<Integer, Shape[]> r_records_cache = new LruCache<Integer, Shape[]>(R.degree * 2); LruCache<Integer, Shape[]> s_records_cache = new LruCache<Integer, Shape[]>(S.degree * R.degree * 4); Text line = new Text2(); int result_count = 0; LineReader r_lr = null, s_lr = null; // Last offset read from r and s int r_last_offset = 0; int s_last_offset = 0; while (nodesToJoin.size() > 0) { long nodes_to_join = nodesToJoin.pop(); int r_node = (int) (nodes_to_join >>> 32); int s_node = (int) (nodes_to_join & 0xFFFFFFFF); // Read all R nodes int r_mbrsToTest = r_node == 0 ? 1 : R.degree; boolean r_leaf = r_node * R.degree + 1 >= R.nodeCount; long nodeOffset = NodeSize * r_node; R.structure.seek(nodeOffset); for (int i = 0; i < r_mbrsToTest; i++) { r_data_offset[i] = R.structure.readInt(); r_nodes[i].readFields(R.structure); } r_data_offset[r_mbrsToTest] = (r_node + r_mbrsToTest) == R.nodeCount ? R.treeSize : R.structure.readInt(); // Read all S nodes int s_mbrsToTest = s_node == 0 ? 1 : S.degree; boolean s_leaf = s_node * S.degree + 1 >= S.nodeCount; if (r_leaf != s_leaf) { // This case happens when the two trees are of different heights if (r_leaf) r_mbrsToTest = 1; else s_mbrsToTest = 1; } nodeOffset = NodeSize * s_node; S.structure.seek(nodeOffset); for (int i = 0; i < s_mbrsToTest; i++) { s_data_offset[i] = S.structure.readInt(); s_nodes[i].readFields(S.structure); } s_data_offset[s_mbrsToTest] = (s_node + s_mbrsToTest) == S.nodeCount ? S.treeSize : S.structure.readInt(); // Find overlapping nodes by Cartesian product for (int i = 0; i < r_mbrsToTest; i++) { for (int j = 0; j < s_mbrsToTest; j++) { if (r_nodes[i].isIntersected(s_nodes[j])) { if (r_leaf && s_leaf) { // Reached leaf nodes in both trees. Start comparing records int r_start_offset = r_data_offset[i]; int r_end_offset = r_data_offset[i + 1]; int s_start_offset = s_data_offset[j]; int s_end_offset = s_data_offset[j + 1]; /////////////////////////////////////////////////////////////////// // Read or retrieve r_records Shape[] r_records = r_records_cache.get(r_start_offset); if (r_records == null) { int cache_key = r_start_offset; r_records = r_records_cache.popUnusedEntry(); if (r_records == null) { r_records = new Shape[R.degree * 2]; } // Need to read it from stream if (r_last_offset != r_start_offset) { long seekTo = r_start_offset + R.treeStartOffset; R.data.seek(seekTo); r_lr = new LineReader(R.data); } int record_i = 0; while (r_start_offset < r_end_offset) { r_start_offset += r_lr.readLine(line); if (r_records[record_i] == null) r_records[record_i] = R.stockObject.clone(); r_records[record_i].fromText(line); record_i++; } r_last_offset = r_start_offset; // Nullify other records while (record_i < r_records.length) r_records[record_i++] = null; r_records_cache.put(cache_key, r_records); } // Read or retrieve s_records Shape[] s_records = s_records_cache.get(s_start_offset); if (s_records == null) { int cache_key = s_start_offset; // Need to read it from stream if (s_lr == null || s_last_offset != s_start_offset) { // Need to reposition s_lr (LineReader of S) long seekTo = s_start_offset + S.treeStartOffset; S.data.seek(seekTo); s_lr = new LineReader(S.data); } s_records = s_records_cache.popUnusedEntry(); if (s_records == null) { s_records = new Shape[S.degree * 2]; } int record_i = 0; while (s_start_offset < s_end_offset) { s_start_offset += s_lr.readLine(line); if (s_records[record_i] == null) s_records[record_i] = S.stockObject.clone(); s_records[record_i].fromText(line); record_i++; } // Nullify other records while (record_i < s_records.length) s_records[record_i++] = null; // Put in cache s_records_cache.put(cache_key, s_records); s_last_offset = s_start_offset; } // Do Cartesian product between records to find overlapping pairs for (int i_r = 0; i_r < r_records.length && r_records[i_r] != null; i_r++) { for (int i_s = 0; i_s < s_records.length && s_records[i_s] != null; i_s++) { if (r_records[i_r].isIntersected(s_records[i_s]) && !r_records[i_r].equals(s_records[i_s])) { result_count++; if (output != null) { output.collect((S1) r_records[i_r], (S2) s_records[i_s]); } } } } /////////////////////////////////////////////////////////////////// } else { // Add a new pair to node pairs to be tested // Go down one level if possible int new_r_node, new_s_node; if (!r_leaf) { new_r_node = (r_node + i) * R.degree + 1; } else { new_r_node = r_node + i; } if (!s_leaf) { new_s_node = (s_node + j) * S.degree + 1; } else { new_s_node = s_node + j; } long new_pair = (((long) new_r_node) << 32) | new_s_node; nodesToJoin.put(new_pair); } } } } reporter.progress(); } return result_count; }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNN.ShapeWithDistance<Partition>>() { {/* w ww . j a v a 2 s . c o m*/ initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() { {/*from w w w. ja va2 s .c om*/ initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params) throws IOException, InterruptedException { int iterations = 0; FileSystem fs = inFile.getFileSystem(params); Point queryPoint = (Point) OperationsParams.getShape(params, "point"); int k = params.getInt("k", 1); // Top-k objects are retained in this object PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k); SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile); double kthDistance = Double.MAX_VALUE; if (gIndex != null) { // There is a global index, use it PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() { {//from ww w . ja va 2 s . c om initialize(gIndex.size()); } @Override protected boolean lessThan(Object a, Object b) { return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance; } }; for (Partition p : gIndex) { double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y); partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance)); } while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) { ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop(); // Process this partition Path partitionPath = new Path(inFile, partitionToProcess.shape.filename); long length = fs.getFileStatus(partitionPath).getLen(); FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]); RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); if (distance <= kthDistance) knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); if (knn.size() >= k) kthDistance = knn.top().distance; } } else { // No global index, have to scan the whole file Job job = new Job(params); SpatialInputFormat3.addInputPath(job, inFile); List<InputSplit> splits = inputFormat.getSplits(job); for (InputSplit split : splits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(split, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(split, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(split, params); } else { throw new RuntimeException("Unknown record reader"); } iterations++; while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { double distance = shape.distanceTo(queryPoint.x, queryPoint.y); knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance)); } } reader.close(); } if (knn.size() >= k) kthDistance = knn.top().distance; } long resultCount = knn.size(); if (outPath != null && params.getBoolean("output", true)) { FileSystem outFS = outPath.getFileSystem(params); PrintStream ps = new PrintStream(outFS.create(outPath)); Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount); resultsOrdered.setSize((int) resultCount); while (knn.size() > 0) { ShapeWithDistance<S> nextAnswer = knn.pop(); resultsOrdered.set(knn.size(), nextAnswer); } Text text = new Text(); for (ShapeWithDistance<S> answer : resultsOrdered) { text.clear(); TextSerializerHelper.serializeDouble(answer.distance, text, ','); answer.shape.toText(text); ps.println(text); } ps.close(); } TotalIterations.addAndGet(iterations); return resultCount; }
From source file:org.apache.mahout.classifier.bayes.BayesClassifier.java
License:Apache License
/** * Classify the document and return the top <code>numResults</code> * * @param model The model//w ww. j a va 2s . c o m * @param document The document to classify * @param defaultCategory The default category to assign * @param numResults The maximum number of results to return, ranked by score. Ties are broken by comparing the * category * @return A Collection of {@link ClassifierResult}s. */ @Override public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) { Collection<String> categories = model.getLabels(); PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults); ClassifierResult tmp; for (String category : categories) { double prob = documentWeight(model, category, document); if (prob > 0.0) { tmp = new ClassifierResult(category, prob); pq.insert(tmp); } } Deque<ClassifierResult> result = new LinkedList<ClassifierResult>(); while ((tmp = pq.pop()) != null) { result.addLast(tmp); } if (result.isEmpty()) { result.add(new ClassifierResult(defaultCategory, 0)); } return result; }
From source file:org.apache.mahout.classifier.cbayes.CBayesClassifier.java
License:Apache License
/** * Classify the document and return the top <code>numResults</code> * * @param model The model/* w w w . ja v a2s . co m*/ * @param document The document to classify * @param defaultCategory The default category to assign * @param numResults The maximum number of results to return, ranked by score. Ties are broken by comparing the * category * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s. */ @Override public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) { Collection<String> categories = model.getLabels(); PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults); ClassifierResult tmp; for (String category : categories) { double prob = documentWeight(model, category, document); if (prob < 0.0) { tmp = new ClassifierResult(category, prob); pq.insert(tmp); } } Deque<ClassifierResult> result = new LinkedList<ClassifierResult>(); while ((tmp = pq.pop()) != null) { result.addLast(tmp); } if (result.isEmpty()) { result.add(new ClassifierResult(defaultCategory, 0)); } return result; }