List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.zanata.hibernate.search.TextFlowIdFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); for (Long tfId : textFlowIds) { Term term = new Term("id", tfId.toString()); TermDocs termDocs = reader.termDocs(term); while (termDocs.next()) bitSet.set(termDocs.doc());// www .j a v a 2s .c o m } return bitSet; }
From source file:perf.IndexAndSearchOpenStreetMaps.java
License:Apache License
private static void queryIndex(String queryClass, int gons, int nearestTopN, String polyFile, boolean preBuildQueries, Double filterPercent, boolean doDistanceSort) throws IOException { IndexSearcher[] searchers = new IndexSearcher[NUM_PARTS]; Directory[] dirs = new Directory[NUM_PARTS]; long sizeOnDisk = 0; for (int part = 0; part < NUM_PARTS; part++) { dirs[part] = FSDirectory.open(Paths.get(getName(part, doDistanceSort))); searchers[part] = new IndexSearcher(DirectoryReader.open(dirs[part])); searchers[part].setQueryCache(null); for (String name : dirs[part].listAll()) { sizeOnDisk += dirs[part].fileLength(name); }//from w ww. j a v a2 s . co m } //plotBKD(searchers[0].getIndexReader()); System.out.println("INDEX SIZE: " + (sizeOnDisk / 1024. / 1024. / 1024.) + " GB"); long bytes = 0; long maxDoc = 0; for (IndexSearcher s : searchers) { IndexReader r = s.getIndexReader(); maxDoc += r.maxDoc(); for (LeafReaderContext ctx : r.leaves()) { CodecReader cr = (CodecReader) ctx.reader(); /* for(Accountable acc : cr.getChildResources()) { System.out.println(" " + Accountables.toString(acc)); } */ bytes += cr.ramBytesUsed(); } } System.out.println("READER MB: " + (bytes / 1024. / 1024.)); System.out.println("maxDoc=" + maxDoc); double bestQPS = Double.NEGATIVE_INFINITY; // million hits per second: double bestMHPS = Double.NEGATIVE_INFINITY; if (queryClass.equals("polyFile")) { // TODO: only load the double[][] here, so that we includ the cost of making Polygon and Query in each iteration!! List<Polygon[]> polygons = readPolygons(polyFile); // Uncomment to find the lost points!! /* BooleanQuery.Builder b = new BooleanQuery.Builder(); b.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); for(Query q : queries) { b.add(q, BooleanClause.Occur.MUST_NOT); } searchers[0].search(b.build(), new SimpleCollector() { private int markerCount; private SortedNumericDocValues docValues; @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { docValues = context.reader().getSortedNumericDocValues("point"); } @Override public boolean needsScores() { return false; } @Override public void collect(int doc) { docValues.setDocument(doc); int count = docValues.count(); for (int i = 0; i < count; i++) { long encoded = docValues.valueAt(i); double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32)); double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF)); System.out.println(" WE.marker([" + docLatitude + ", " + docLongitude + "]).addTo(earth);"); } } }); */ /* { Query q = LatLonPoint.newBoxQuery("point", minLat, maxLat, minLon, maxLon); int totHits = 0; for(IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; } System.out.println("Poly file bbox total hits: " + totHits); } */ if (preBuildQueries) { System.out.println("\nUsing pre-built polygon queries, loaded from file " + polyFile); List<Query> queries = new ArrayList<>(); for (Polygon[] multiPolygon : polygons) { Query q = null; if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", multiPolygon); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", multiPolygon); } else if (useGeo3DLarge) { q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon); } else if (useGeo3D) { q = Geo3DPoint.newPolygonQuery("point", multiPolygon); } queries.add(q); } double[] result = runQueries(searchers, queries); bestQPS = result[0]; bestMHPS = result[1]; } else { System.out.println("\nUsing on-the-fly polygon queries, loaded from file " + polyFile); for (int iter = 0; iter < ITERS; iter++) { long tStart = System.nanoTime(); long totHits = 0; int queryCount = 0; for (Polygon[] multiPolygon : polygons) { // We do this to keep the benchmark honest, so any construction cost of a polygon is included in our run time measure: multiPolygon = clonePolygon(multiPolygon); Query q; if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", multiPolygon); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", multiPolygon); } else { q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon); } for (IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; } queryCount++; } long tEnd = System.nanoTime(); double elapsedSec = (tEnd - tStart) / 1000000000.0; double qps = queryCount / elapsedSec; double mhps = (totHits / 1000000.0) / elapsedSec; System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps, qps, elapsedSec, queryCount, totHits)); if (qps > bestQPS) { System.out.println(" ***"); bestQPS = qps; bestMHPS = mhps; } } } } else if (preBuildQueries) { System.out.println("\nUsing pre-built queries"); double[] result = runQueries(searchers, makeQueries(queryClass, gons)); bestQPS = result[0]; bestMHPS = result[1]; } else { System.out.println("\nUsing on-the-fly queries"); // Create regularly spaced shapes in a grid around London, UK: int STEPS = 5; double MIN_LAT = 51.0919106; double MAX_LAT = 51.6542719; double MIN_LON = -0.3867282; double MAX_LON = 0.8492337; // makeRegularPoly has insanely slow math, so make the double[]'s here. // we still form the query inside the benchmark loop (e.g. to account for preprocessing) ArrayList<double[][]> polys = new ArrayList<double[][]>(225); if ("poly".equals(queryClass)) { for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0; double centerLat = (lat + latEnd) / 2.0; double centerLon = (lon + lonEnd) / 2.0; polys.add(makeRegularPoly(centerLat, centerLon, distanceMeters, gons)); } } } } } for (int iter = 0; iter < ITERS; iter++) { long tStart = System.nanoTime(); long totHits = 0; double totNearestDistance = 0.0; int queryCount = 0; for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0; double centerLat = (lat + latEnd) / 2.0; double centerLon = (lon + lonEnd) / 2.0; ScoreDoc[] nearestHits = null; Query q = null; switch (queryClass) { case "distance": if (useGeo3D || useGeo3DLarge) { q = Geo3DPoint.newDistanceQuery("point", centerLat, centerLon, distanceMeters); } else if (useLatLonPoint) { q = LatLonPoint.newDistanceQuery("point", centerLat, centerLon, distanceMeters); } else if (useGeoPoint) { q = new GeoPointDistanceQuery("point", centerLat, centerLon, distanceMeters); } else { throw new AssertionError(); } break; case "poly": double[][] poly = polys.get(queryCount); //System.out.println("poly lats: " + Arrays.toString(poly[0])); //System.out.println("poly lons: " + Arrays.toString(poly[1])); if (useGeo3DLarge) { //System.out.println("POLY:\n lats=" + Arrays.toString(poly[0]) + "\n lons=" + Arrays.toString(poly[1])); q = Geo3DPoint.newLargePolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useGeo3D) { q = Geo3DPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", new Polygon(poly[0], poly[1])); } else { throw new AssertionError(); } break; case "box": if (useGeo3D || useGeo3DLarge) { q = Geo3DPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd); } else if (useLatLonPoint) { q = LatLonPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd); } else if (useGeoPoint) { q = new GeoPointInBBoxQuery("point", lat, latEnd, lon, lonEnd); } else { throw new AssertionError(); } break; case "nearest": if (useLatLonPoint) { if (searchers.length != 1) { // TODO throw new AssertionError(); } nearestHits = LatLonPoint.nearest(searchers[0], "point", (lat + latEnd) / 2.0, (lon + lonEnd) / 2.0, nearestTopN).scoreDocs; if (false && iter == 0) { System.out.println("\n" + nearestHits.length + " nearest:"); for (ScoreDoc hit : nearestHits) { System.out.println(" " + ((FieldDoc) hit).fields[0]); } } for (ScoreDoc hit : nearestHits) { totNearestDistance += (Double) ((FieldDoc) hit).fields[0]; } } else { throw new AssertionError(); } break; default: throw new AssertionError("unknown queryClass " + queryClass); } // TODO: do this somewhere else? if (filterPercent != null) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(q, BooleanClause.Occur.MUST); builder.add(new RandomQuery(filterPercent), BooleanClause.Occur.FILTER); q = builder.build(); } if (q != null) { if (doDistanceSort) { Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("point", centerLat, centerLon)); for (IndexSearcher s : searchers) { TopFieldDocs hits = s.search(q, 10, sort); totHits += hits.totalHits; } } else { //System.out.println("\nRUN QUERY " + q); //long t0 = System.nanoTime(); for (IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; if (false && iter == 0) { System.out.println("q=" + q + " lat=" + centerLat + " lon=" + centerLon + " distanceMeters=" + distanceMeters + " hits: " + hitCount); } } } } else { assert nearestHits != null; totHits += nearestHits.length; } queryCount++; //throw new RuntimeException("now stop"); } } } } long tEnd = System.nanoTime(); double elapsedSec = (tEnd - tStart) / 1000000000.0; double qps = queryCount / elapsedSec; double mhps = (totHits / 1000000.0) / elapsedSec; if (queryClass.equals("nearest")) { System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f QPS (%.2f sec for %d queries), totNearestDistance=%.10f, totHits=%d", iter, qps, elapsedSec, queryCount, totNearestDistance, maxDoc)); } else { System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps, qps, elapsedSec, queryCount, totHits)); } if (qps > bestQPS) { System.out.println(" ***"); bestQPS = qps; bestMHPS = mhps; } } } System.out.println("BEST M hits/sec: " + bestMHPS); System.out.println("BEST QPS: " + bestQPS); for (IndexSearcher s : searchers) { s.getIndexReader().close(); } IOUtils.close(dirs); }
From source file:perf.IndexAndSearchOpenStreetMapsGeo3D.java
License:Apache License
private static void queryIndex() throws IOException { Directory dir = FSDirectory.open(Paths.get("bkdtestgeo3d")); System.out.println("DIR: " + dir); IndexReader r = DirectoryReader.open(dir); System.out.println("maxDoc=" + r.maxDoc()); IndexSearcher s = new IndexSearcher(r); //SegmentReader sr = (SegmentReader) r.leaves().get(0).reader(); //BKDTreeReader reader = ((BKDTreeSortedNumericDocValues) sr.getSortedNumericDocValues("point")).getBKDTreeReader(); //System.out.println("reader MB heap=" + (reader.ramBytesUsed()/1024/1024.)); // London, UK: int STEPS = 5; double MIN_LAT = 51.0919106; double MAX_LAT = 51.6542719; double MIN_LON = -0.3867282; double MAX_LON = 0.8492337; for (int iter = 0; iter < 100; iter++) { long tStart = System.nanoTime(); long totHits = 0; int queryCount = 0; for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; Query q = new PointInGeo3DShapeQuery(PlanetModel.WGS84, "point", GeoBBoxFactory.makeGeoBBox(PlanetModel.WGS84, toRadians(latEnd), toRadians(lat), toRadians(lon), toRadians(lonEnd))); TotalHitCountCollector c = new TotalHitCountCollector(); //long t0 = System.nanoTime(); s.search(q, c);//from w w w . j a va 2 s . co m //System.out.println("\nITER: now query lat=" + lat + " latEnd=" + latEnd + " lon=" + lon + " lonEnd=" + lonEnd); //Bits hits = reader.intersect(lat, latEnd, lon, lonEnd); //System.out.println(" total hits: " + hitCount); //totHits += ((FixedBitSet) hits).cardinality(); //System.out.println(" add tot " + c.getTotalHits()); totHits += c.getTotalHits(); queryCount++; } } } } long tEnd = System.nanoTime(); System.out.println("ITER: " + iter + " " + ((tEnd - tStart) / 1000000000.0) + " sec; totHits=" + totHits + "; " + queryCount + " queries"); } IOUtils.close(r, dir); }
From source file:perf.PKLookupPerfTest3X.java
License:Apache License
public static void main(String[] args) throws IOException { final Directory dir; final String dirImpl = args[0]; final String dirPath = args[1]; final int numDocs = Integer.parseInt(args[2]); final int numLookups = Integer.parseInt(args[3]); final long seed = Long.parseLong(args[4]); if (dirImpl.equals("MMapDirectory")) { dir = new MMapDirectory(new File(dirPath)); } else if (dirImpl.equals("NIOFSDirectory")) { dir = new NIOFSDirectory(new File(dirPath)); } else if (dirImpl.equals("SimpleFSDirectory")) { dir = new SimpleFSDirectory(new File(dirPath)); } else {/*ww w . j a va 2 s .com*/ throw new RuntimeException("unknown directory impl \"" + dirImpl + "\""); } if (!new File(dirPath).exists()) { createIndex(dir, numDocs); } final IndexReader r = IndexReader.open(dir); System.out.println("Reader=" + r); final IndexReader[] subs = r.getSequentialSubReaders(); final TermDocs[] termDocsArr = new TermDocs[subs.length]; for (int subIdx = 0; subIdx < subs.length; subIdx++) { termDocsArr[subIdx] = subs[subIdx].termDocs(); } final int maxDoc = r.maxDoc(); final Random rand = new Random(seed); for (int cycle = 0; cycle < 10; cycle++) { System.out.println("Cycle: " + (cycle == 0 ? "warm" : "test")); System.out.println(" Lookup..."); final Term[] lookup = new Term[numLookups]; final int[] docIDs = new int[numLookups]; final Term protoTerm = new Term("id"); for (int iter = 0; iter < numLookups; iter++) { // Base 36, prefixed with 0s to be length 6 (= 2.2 B) lookup[iter] = protoTerm.createTerm( String.format("%6s", Integer.toString(rand.nextInt(maxDoc), Character.MAX_RADIX)) .replace(' ', '0')); } Arrays.fill(docIDs, -1); final AtomicBoolean failed = new AtomicBoolean(false); final Term t = new Term("id", ""); final long tStart = System.currentTimeMillis(); for (int iter = 0; iter < numLookups; iter++) { //System.out.println("lookup " + lookup[iter].utf8ToString()); int base = 0; int found = 0; for (int subIdx = 0; subIdx < subs.length; subIdx++) { final IndexReader sub = subs[subIdx]; if (!DO_DOC_LOOKUP) { final int df = sub.docFreq(lookup[iter]); if (df != 0) { if (df != 1) { // Only 1 doc should be found failed.set(true); } found++; if (found > 1) { // Should have been found only once across segs System.out.println("FAIL0"); failed.set(true); } } } else { final TermDocs termDocs = termDocsArr[subIdx]; termDocs.seek(lookup[iter]); if (termDocs.next()) { found++; if (found > 1) { // Should have been found only once across segs failed.set(true); } final int docID = termDocs.doc(); if (docIDs[iter] != -1) { // Same doc should only be seen once failed.set(true); } docIDs[iter] = base + docID; if (termDocs.next()) { // Only 1 doc should be found failed.set(true); } } } base += sub.maxDoc(); } } final long tLookup = (System.currentTimeMillis() - tStart); // cycle 0 is for warming //System.out.println(" " + (cycle == 0 ? "WARM: " : "") + tLookup + " msec for " + numLookups + " lookups (" + (1000*tLookup/numLookups) + " us per lookup) + totSeekMS=" + (BlockTermsReader.totSeekNanos/1000000.)); System.out.println(" " + (cycle == 0 ? "WARM: " : "") + tLookup + " msec for " + numLookups + " lookups (" + (1000.0 * tLookup / numLookups) + " us per lookup)"); if (failed.get()) { throw new RuntimeException("at least one lookup produced more than one result"); } if (DO_DOC_LOOKUP) { System.out.println(" Verify..."); for (int iter = 0; iter < numLookups; iter++) { if (docIDs[iter] == -1) { throw new RuntimeException("lookup of " + lookup[iter] + " failed iter=" + iter); } final String found = r.document(docIDs[iter]).get("id"); if (!found.equals(lookup[iter].text())) { throw new RuntimeException( "lookup of docid=" + lookup[iter].text() + " hit wrong docid=" + found); } } } } // System.out.println("blocks=" + BlockTermsReader.totBlockReadCount + " scans=" + BlockTermsReader.totScanCount + " " + (((float) BlockTermsReader.totScanCount))/(BlockTermsReader.totBlockReadCount) + " scans/block"); r.close(); dir.close(); }
From source file:perf.RandomFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) { final Random rand = new Random(42); final int maxDoc = reader.maxDoc(); OpenBitSet bits = new OpenBitSet(maxDoc); for (int docID = 0; docID < maxDoc; docID++) { if (rand.nextDouble() <= pctKeep) { bits.fastSet(docID);// w w w. j a v a2s .c o m } } System.out.println("rfilt " + bits.cardinality()); return bits; }
From source file:pretraga.IsolationSimilarity.java
public void test(String vec) { List<String> vector = processInput(vec); HashMap<String, Long> map = new HashMap<>(); try {/*from www . j av a 2s. c o m*/ Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath()); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); List<Integer> docId = getDocumentsFromVector(vector, reader, searcher); for (int i = 0; i < docId.size(); i++) { Fields ff = reader.getTermVectors(docId.get(i)); Terms terms = ff.terms(CONTENT); TermsEnum te = terms.iterator(); Object tmp = te.next(); while (tmp != null) { BytesRef by = (BytesRef) tmp; String term = by.utf8ToString(); ClassicSimilarity sim = null; if (searcher.getSimilarity(true) instanceof ClassicSimilarity) { sim = (ClassicSimilarity) searcher.getSimilarity(true); } float idf = sim.idf(te.docFreq(), reader.maxDoc()); float tf = sim.tf(te.totalTermFreq()); //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq()); TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq()); CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(), terms.getSumTotalTermFreq(), terms.getSumDocFreq()); Document d = reader.document(docId.get(i)); if (vector.contains(term)) { float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0)) .score(docId.get(i), te.totalTermFreq()); System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term); } tmp = te.next(); } /*Iterator<String> ss = ff.iterator(); while (ss.hasNext()) { String fieldString = ss.next(); System.out.println(fieldString); }*/ } } catch (Exception e) { } }
From source file:proj.zoie.api.ZoieSegmentReader.java
License:Apache License
private void init(IndexReader reader) throws IOException { int maxDoc = reader.maxDoc(); _uidArray = new long[maxDoc]; TermPositions tp = null;/* w ww. j a va 2 s .c o m*/ byte[] payloadBuffer = new byte[8]; // four bytes for a long try { tp = reader.termPositions(UID_TERM); int idx = 0; while (tp.next()) { int doc = tp.doc(); assert doc < maxDoc; while (idx < doc) _uidArray[idx++] = DELETED_UID; // fill the gap tp.nextPosition(); tp.getPayload(payloadBuffer, 0); long uid = bytesToLong(payloadBuffer); if (uid < _minUID) _minUID = uid; if (uid > _maxUID) _maxUID = uid; _uidArray[idx++] = uid; } while (idx < maxDoc) _uidArray[idx++] = DELETED_UID; // fill the gap } finally { if (tp != null) { tp.close(); } } }
From source file:psidev.psi.mi.search.engine.impl.AbstractSearchEngine.java
License:Apache License
public SearchResult<T> searchAll(Integer firstResult, Integer maxResults) throws SearchEngineException { if (firstResult == null) firstResult = 0;// w ww .j a v a 2s . co m if (maxResults == null) maxResults = MAX_TOP_RESULTS; IndexReader reader = indexSearcher.getIndexReader(); int totalCount = reader.maxDoc(); if (maxResults == 0) { return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); } // this is a hack to ignore any header introduced in the index by mistake (first development versions) if (reader.isDeleted(0)) { firstResult++; totalCount--; } if (firstResult > totalCount) { // closeIndexReader(reader); return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); } int maxIndex = Math.min(totalCount, firstResult + maxResults); List<T> dataObjects = new ArrayList<T>(); for (int i = firstResult; i < maxIndex; i++) { try { Document doc = reader.document(i); T data = (T) createDocumentBuilder().createData(doc); dataObjects.add(data); } catch (Exception e) { // closeIndexReader(reader); throw new SearchEngineException(e); } } // closeIndexReader(reader); return new SearchResult(dataObjects, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); }
From source file:solr2155.lucene.spatial.geohash.GeoHashPrefixFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); final TermsEnumCompatibility termsEnum = new TermsEnumCompatibility(reader, fieldName);//Lucene 4 compatibility wrapper final TermDocs termDocs = reader.termDocs(); Term term = termsEnum.term();//the most recent term examined via termsEnum.term() if (term == null) return bits; //TODO Add a precision short-circuit so that we are not accurate on the edge but we're faster. //TODO An array based nodes impl would be more efficient; or a stack of iterators. LinkedList conveniently has bulk add to beginning. LinkedList<GridNode> nodes = new LinkedList<GridNode>( gridReferenceSystem.getSubNodes(geoShape.boundingRectangle())); while (!nodes.isEmpty() && term != null) { final GridNode node = nodes.removeFirst(); assert node.length() > 0; if (!node.contains(term.text()) && node.before(term.text())) continue;//short circuit, moving >= the next indexed term IntersectCase intersection = geoShape.intersect(node.getRectangle()); if (intersection == IntersectCase.OUTSIDE) continue; TermsEnumCompatibility.SeekStatus seekStat = termsEnum.seek(node.getTermVal()); term = termsEnum.term();/*from w w w . j a v a2 s . c o m*/ if (seekStat != TermsEnumCompatibility.SeekStatus.FOUND) continue; if (intersection == IntersectCase.CONTAINS) { termDocs.seek(term); addDocs(termDocs, bits); term = termsEnum.next();//move to next term } else {//any other intersection //TODO is it worth it to optimize the shape (e.g. potentially simpler polygon)? //GeoShape geoShape = this.geoShape.optimize(intersection); //We either scan through the leaf node(s), or if there are many points then we divide & conquer. boolean manyPoints = node.length() < gridReferenceSystem.maxLen - GRIDLEN_SCAN_THRESHOLD; //TODO Try variable depth strategy: //IF configured to do so, we could use term.freq() as an estimate on the number of places at this depth. OR, perhaps // make estimates based on the total known term count at this level? Or don't worry about it--use fixed depth. // if (manyPoints) { // //Make some estimations on how many points there are at this level and how few there would need to be to set // // manyPoints to false. // // long termsThreshold = (long) estimateNumberIndexedTerms(node.length(),geoShape.getDocFreqExpenseThreshold(node)); // // long thisOrd = termsEnum.ord(); // manyPoints = (termsEnum.seek(thisOrd+termsThreshold+1) != TermsEnum.SeekStatus.END // && node.contains(termsEnum.term())); // termsEnum.seek(thisOrd);//return to last position // } if (!manyPoints) { //traverse all leaf terms within this node to see if they are within the geoShape, one by one. for (; term != null && node.contains(term.text()); term = termsEnum.next()) { if (term.text().length() < gridReferenceSystem.maxLen)//not a leaf continue; final Point2D point = gridReferenceSystem.decodeXY(term.text()); //Filter those out of the shape. if (!geoShape.contains(point)) continue; //record termDocs.seek(term); addDocs(termDocs, bits); } } else { //divide & conquer nodes.addAll(0, node.getSubNodes());//add to beginning } } } //node loop return bits; }
From source file:solr2155.solr.search.function.GeoHashValueSource.java
License:Apache License
@SuppressWarnings({ "unchecked" }) GeoHashValueSource(String fieldName, SolrIndexSearcher searcher) throws IOException { log.info("Loading geohash field " + fieldName + " into memory."); this.fieldName = fieldName; //Get gridReferenceSystem final GridNode.GridReferenceSystem gridReferenceSystem; FieldType fieldType = searcher.getSchema().getField(fieldName).getType(); if (fieldType instanceof GeoHashField) { gridReferenceSystem = ((GeoHashField) fieldType).getGridReferenceSystem(); } else//from www . j a v a2 s. co m throw new RuntimeException( "field " + fieldName + " should be a GeoHashField, not " + fieldType.getTypeName()); //Traverse the index to load up doc2PointsCache IndexReader reader = searcher.getIndexReader(); TermsEnumCompatibility termsEnum = new TermsEnumCompatibility(reader, fieldName); TermDocs termDocs = reader.termDocs(); //cached for termsEnum.docs() calls try { while (true) { final Term term = termsEnum.next(); if (term == null) break; if (term.text().length() != gridReferenceSystem.getPrecision()) continue; Point2D point = gridReferenceSystem.decodeXY(term.text()); termDocs.seek(termsEnum.getTermEnum()); while (termDocs.next()) { final int docId = termDocs.doc(); if (docId == DocIdSetIterator.NO_MORE_DOCS) break; if (doc2PointsCache == null) doc2PointsCache = (List<Point2D>[]) new List[reader.maxDoc()];//java generics hack List<Point2D> points = doc2PointsCache[docId]; if (points == null) { points = new ArrayList<Point2D>(DEFAULT_ARRAY_CAPACITY); doc2PointsCache[docId] = points; } points.add(point); } } } finally { // in Lucene 3 these should be closed (not in Lucene 4) termDocs.close(); termsEnum.close(); } //Log statistics if (log.isInfoEnabled()) { int min = Integer.MAX_VALUE, sum = 0, max = 0; int dlen = 0; if (doc2PointsCache != null) { dlen = doc2PointsCache.length; for (List<Point2D> point2Ds : doc2PointsCache) { int plen = (point2Ds == null ? 0 : point2Ds.size()); min = Math.min(min, plen); max = Math.max(max, plen); sum += plen; } } if (min == Integer.MAX_VALUE) min = 0; float avg = (float) sum / dlen; log.info("field '" + fieldName + "' in RAM: loaded min/avg/max per doc #: (" + min + "," + avg + "," + max + ") #" + dlen); } }