List of usage examples for org.apache.lucene.search IndexSearcher count
public int count(Query query) throws IOException
From source file:org.elasticsearch.search.query.QueryPhaseTests.java
License:Apache License
private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception { TestSearchContext context = new TestSearchContext(); context.parsedQuery(new ParsedQuery(query)); context.setSize(0);/*w w w .j ava 2s . c o m*/ IndexSearcher searcher = new IndexSearcher(reader); final AtomicBoolean collected = new AtomicBoolean(); IndexSearcher contextSearcher = new IndexSearcher(reader) { protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { collected.set(true); super.search(leaves, weight, collector); } }; final boolean rescore = QueryPhase.execute(context, contextSearcher); assertFalse(rescore); assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits); assertEquals(shouldCollect, collected.get()); }
From source file:org.elasticsearch.search.slice.DocValuesSliceQueryTests.java
License:Apache License
public void testSearch() throws Exception { final int numDocs = randomIntBetween(100, 200); final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir); int max = randomIntBetween(2, 10); int[] sliceCounters1 = new int[max]; int[] sliceCounters2 = new int[max]; Set<String> keys = new HashSet<>(); for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); String uuid = UUIDs.base64UUID(); int intValue = randomInt(); long doubleValue = NumericUtils.doubleToSortableLong(randomDouble()); doc.add(new StringField("uuid", uuid, Field.Store.YES)); doc.add(new SortedNumericDocValuesField("intField", intValue)); doc.add(new SortedNumericDocValuesField("doubleField", doubleValue)); w.addDocument(doc);//from w w w . j av a 2 s . co m sliceCounters1[Math.floorMod(BitMixer.mix((long) intValue), max)]++; sliceCounters2[Math.floorMod(BitMixer.mix(doubleValue), max)]++; keys.add(uuid); } final IndexReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); for (int id = 0; id < max; id++) { DocValuesSliceQuery query1 = new DocValuesSliceQuery("intField", id, max); assertThat(searcher.count(query1), equalTo(sliceCounters1[id])); DocValuesSliceQuery query2 = new DocValuesSliceQuery("doubleField", id, max); assertThat(searcher.count(query2), equalTo(sliceCounters2[id])); searcher.search(query1, new Collector() { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { return new LeafCollector() { @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { Document d = context.reader().document(doc, Collections.singleton("uuid")); String uuid = d.get("uuid"); assertThat(keys.contains(uuid), equalTo(true)); keys.remove(uuid); } }; } @Override public boolean needsScores() { return false; } }); } assertThat(keys.size(), equalTo(0)); w.close(); reader.close(); dir.close(); }
From source file:org.elasticsearch.search.slice.TermsSliceQueryTests.java
License:Apache License
public void testSearch() throws Exception { final int numDocs = randomIntBetween(100, 200); final Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir, new KeywordAnalyzer()); int max = randomIntBetween(2, 10); int[] sliceCounters = new int[max]; Set<String> keys = new HashSet<>(); for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); String uuid = UUIDs.base64UUID(); BytesRef br = new BytesRef(uuid); int id = Math.floorMod(br.hashCode(), max); sliceCounters[id]++;//ww w. j a v a2 s .c o m doc.add(new StringField("uuid", uuid, Field.Store.YES)); w.addDocument(doc); keys.add(uuid); } final IndexReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); for (int id = 0; id < max; id++) { TermsSliceQuery query1 = new TermsSliceQuery("uuid", id, max); assertThat(searcher.count(query1), equalTo(sliceCounters[id])); searcher.search(query1, new Collector() { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { return new LeafCollector() { @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { Document d = context.reader().document(doc, Collections.singleton("uuid")); String uuid = d.get("uuid"); assertThat(keys.contains(uuid), equalTo(true)); keys.remove(uuid); } }; } @Override public boolean needsScores() { return false; } }); } assertThat(keys.size(), equalTo(0)); w.close(); reader.close(); dir.close(); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapperUnitTests.java
License:Open Source License
public void doTestIndexSearcherWrapper(boolean sparse, boolean deletions) throws IOException { Directory dir = newDirectory();/*from w w w.ja va 2s .com*/ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); StringField allowedField = new StringField("allowed", "yes", Store.NO); doc.add(allowedField); StringField fooField = new StringField("foo", "bar", Store.NO); doc.add(fooField); StringField deleteField = new StringField("delete", "no", Store.NO); doc.add(deleteField); w.addDocument(doc); if (deletions) { // add a document that matches foo:bar but will be deleted deleteField.setStringValue("yes"); w.addDocument(doc); deleteField.setStringValue("no"); } allowedField.setStringValue("no"); w.addDocument(doc); if (sparse) { for (int i = 0; i < 1000; ++i) { w.addDocument(doc); } w.forceMerge(1); } w.deleteDocuments(new Term("delete", "yes")); IndexSettings settings = IndexSettingsModule.newIndexSettings("_index", Settings.EMPTY); BitsetFilterCache.Listener listener = new BitsetFilterCache.Listener() { @Override public void onCache(ShardId shardId, Accountable accountable) { } @Override public void onRemoval(ShardId shardId, Accountable accountable) { } }; DirectoryReader reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(w), new ShardId(indexSettings.getIndex(), 0)); BitsetFilterCache cache = new BitsetFilterCache(settings, listener); Query roleQuery = new TermQuery(new Term("allowed", "yes")); BitSet bitSet = cache.getBitSetProducer(roleQuery).getBitSet(reader.leaves().get(0)); if (sparse) { assertThat(bitSet, instanceOf(SparseFixedBitSet.class)); } else { assertThat(bitSet, instanceOf(FixedBitSet.class)); } DocumentSubsetDirectoryReader filteredReader = DocumentSubsetReader.wrap(reader, cache, roleQuery); IndexSearcher searcher = new SecurityIndexSearcherWrapper.IndexSearcherWrapper(filteredReader); // Searching a non-existing term will trigger a null scorer assertEquals(0, searcher.count(new TermQuery(new Term("non_existing_field", "non_existing_value")))); assertEquals(1, searcher.count(new TermQuery(new Term("foo", "bar")))); // make sure scorers are created only once, see #1725 assertEquals(1, searcher.count(new CreateScorerOnceQuery(new MatchAllDocsQuery()))); IOUtils.close(reader, w, dir); }
From source file:perf.IndexAndSearchOpenStreetMaps.java
License:Apache License
private static double[] runQueries(IndexSearcher[] searchers, List<Query> queries) throws IOException { double bestQPS = Double.NEGATIVE_INFINITY; // million hits per second: double bestMHPS = Double.NEGATIVE_INFINITY; for (int iter = 0; iter < ITERS; iter++) { long tStart = System.nanoTime(); long totHits = 0; int count = 0; for (Query q : queries) { int hitCount = 0; for (IndexSearcher s : searchers) { hitCount += s.count(q); }//from ww w .j a va 2s.c o m if (iter == 0) { //System.out.println("QUERY " + count + ": " + q + " hits=" + hitCount); count++; } totHits += hitCount; } long tEnd = System.nanoTime(); double elapsedSec = (tEnd - tStart) / 1000000000.0; double qps = queries.size() / elapsedSec; double mhps = (totHits / 1000000.0) / elapsedSec; System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps, qps, elapsedSec, queries.size(), totHits)); if (qps > bestQPS) { System.out.println(" ***"); bestQPS = qps; bestMHPS = mhps; } } return new double[] { bestQPS, bestMHPS }; }
From source file:perf.IndexAndSearchOpenStreetMaps.java
License:Apache License
private static void queryIndex(String queryClass, int gons, int nearestTopN, String polyFile, boolean preBuildQueries, Double filterPercent, boolean doDistanceSort) throws IOException { IndexSearcher[] searchers = new IndexSearcher[NUM_PARTS]; Directory[] dirs = new Directory[NUM_PARTS]; long sizeOnDisk = 0; for (int part = 0; part < NUM_PARTS; part++) { dirs[part] = FSDirectory.open(Paths.get(getName(part, doDistanceSort))); searchers[part] = new IndexSearcher(DirectoryReader.open(dirs[part])); searchers[part].setQueryCache(null); for (String name : dirs[part].listAll()) { sizeOnDisk += dirs[part].fileLength(name); }/*from ww w. j a va 2 s.c o m*/ } //plotBKD(searchers[0].getIndexReader()); System.out.println("INDEX SIZE: " + (sizeOnDisk / 1024. / 1024. / 1024.) + " GB"); long bytes = 0; long maxDoc = 0; for (IndexSearcher s : searchers) { IndexReader r = s.getIndexReader(); maxDoc += r.maxDoc(); for (LeafReaderContext ctx : r.leaves()) { CodecReader cr = (CodecReader) ctx.reader(); /* for(Accountable acc : cr.getChildResources()) { System.out.println(" " + Accountables.toString(acc)); } */ bytes += cr.ramBytesUsed(); } } System.out.println("READER MB: " + (bytes / 1024. / 1024.)); System.out.println("maxDoc=" + maxDoc); double bestQPS = Double.NEGATIVE_INFINITY; // million hits per second: double bestMHPS = Double.NEGATIVE_INFINITY; if (queryClass.equals("polyFile")) { // TODO: only load the double[][] here, so that we includ the cost of making Polygon and Query in each iteration!! List<Polygon[]> polygons = readPolygons(polyFile); // Uncomment to find the lost points!! /* BooleanQuery.Builder b = new BooleanQuery.Builder(); b.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); for(Query q : queries) { b.add(q, BooleanClause.Occur.MUST_NOT); } searchers[0].search(b.build(), new SimpleCollector() { private int markerCount; private SortedNumericDocValues docValues; @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { docValues = context.reader().getSortedNumericDocValues("point"); } @Override public boolean needsScores() { return false; } @Override public void collect(int doc) { docValues.setDocument(doc); int count = docValues.count(); for (int i = 0; i < count; i++) { long encoded = docValues.valueAt(i); double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32)); double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF)); System.out.println(" WE.marker([" + docLatitude + ", " + docLongitude + "]).addTo(earth);"); } } }); */ /* { Query q = LatLonPoint.newBoxQuery("point", minLat, maxLat, minLon, maxLon); int totHits = 0; for(IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; } System.out.println("Poly file bbox total hits: " + totHits); } */ if (preBuildQueries) { System.out.println("\nUsing pre-built polygon queries, loaded from file " + polyFile); List<Query> queries = new ArrayList<>(); for (Polygon[] multiPolygon : polygons) { Query q = null; if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", multiPolygon); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", multiPolygon); } else if (useGeo3DLarge) { q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon); } else if (useGeo3D) { q = Geo3DPoint.newPolygonQuery("point", multiPolygon); } queries.add(q); } double[] result = runQueries(searchers, queries); bestQPS = result[0]; bestMHPS = result[1]; } else { System.out.println("\nUsing on-the-fly polygon queries, loaded from file " + polyFile); for (int iter = 0; iter < ITERS; iter++) { long tStart = System.nanoTime(); long totHits = 0; int queryCount = 0; for (Polygon[] multiPolygon : polygons) { // We do this to keep the benchmark honest, so any construction cost of a polygon is included in our run time measure: multiPolygon = clonePolygon(multiPolygon); Query q; if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", multiPolygon); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", multiPolygon); } else { q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon); } for (IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; } queryCount++; } long tEnd = System.nanoTime(); double elapsedSec = (tEnd - tStart) / 1000000000.0; double qps = queryCount / elapsedSec; double mhps = (totHits / 1000000.0) / elapsedSec; System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps, qps, elapsedSec, queryCount, totHits)); if (qps > bestQPS) { System.out.println(" ***"); bestQPS = qps; bestMHPS = mhps; } } } } else if (preBuildQueries) { System.out.println("\nUsing pre-built queries"); double[] result = runQueries(searchers, makeQueries(queryClass, gons)); bestQPS = result[0]; bestMHPS = result[1]; } else { System.out.println("\nUsing on-the-fly queries"); // Create regularly spaced shapes in a grid around London, UK: int STEPS = 5; double MIN_LAT = 51.0919106; double MAX_LAT = 51.6542719; double MIN_LON = -0.3867282; double MAX_LON = 0.8492337; // makeRegularPoly has insanely slow math, so make the double[]'s here. // we still form the query inside the benchmark loop (e.g. to account for preprocessing) ArrayList<double[][]> polys = new ArrayList<double[][]>(225); if ("poly".equals(queryClass)) { for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0; double centerLat = (lat + latEnd) / 2.0; double centerLon = (lon + lonEnd) / 2.0; polys.add(makeRegularPoly(centerLat, centerLon, distanceMeters, gons)); } } } } } for (int iter = 0; iter < ITERS; iter++) { long tStart = System.nanoTime(); long totHits = 0; double totNearestDistance = 0.0; int queryCount = 0; for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0; double centerLat = (lat + latEnd) / 2.0; double centerLon = (lon + lonEnd) / 2.0; ScoreDoc[] nearestHits = null; Query q = null; switch (queryClass) { case "distance": if (useGeo3D || useGeo3DLarge) { q = Geo3DPoint.newDistanceQuery("point", centerLat, centerLon, distanceMeters); } else if (useLatLonPoint) { q = LatLonPoint.newDistanceQuery("point", centerLat, centerLon, distanceMeters); } else if (useGeoPoint) { q = new GeoPointDistanceQuery("point", centerLat, centerLon, distanceMeters); } else { throw new AssertionError(); } break; case "poly": double[][] poly = polys.get(queryCount); //System.out.println("poly lats: " + Arrays.toString(poly[0])); //System.out.println("poly lons: " + Arrays.toString(poly[1])); if (useGeo3DLarge) { //System.out.println("POLY:\n lats=" + Arrays.toString(poly[0]) + "\n lons=" + Arrays.toString(poly[1])); q = Geo3DPoint.newLargePolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useGeo3D) { q = Geo3DPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useLatLonPoint) { q = LatLonPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1])); } else if (useGeoPoint) { q = new GeoPointInPolygonQuery("point", new Polygon(poly[0], poly[1])); } else { throw new AssertionError(); } break; case "box": if (useGeo3D || useGeo3DLarge) { q = Geo3DPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd); } else if (useLatLonPoint) { q = LatLonPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd); } else if (useGeoPoint) { q = new GeoPointInBBoxQuery("point", lat, latEnd, lon, lonEnd); } else { throw new AssertionError(); } break; case "nearest": if (useLatLonPoint) { if (searchers.length != 1) { // TODO throw new AssertionError(); } nearestHits = LatLonPoint.nearest(searchers[0], "point", (lat + latEnd) / 2.0, (lon + lonEnd) / 2.0, nearestTopN).scoreDocs; if (false && iter == 0) { System.out.println("\n" + nearestHits.length + " nearest:"); for (ScoreDoc hit : nearestHits) { System.out.println(" " + ((FieldDoc) hit).fields[0]); } } for (ScoreDoc hit : nearestHits) { totNearestDistance += (Double) ((FieldDoc) hit).fields[0]; } } else { throw new AssertionError(); } break; default: throw new AssertionError("unknown queryClass " + queryClass); } // TODO: do this somewhere else? if (filterPercent != null) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(q, BooleanClause.Occur.MUST); builder.add(new RandomQuery(filterPercent), BooleanClause.Occur.FILTER); q = builder.build(); } if (q != null) { if (doDistanceSort) { Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("point", centerLat, centerLon)); for (IndexSearcher s : searchers) { TopFieldDocs hits = s.search(q, 10, sort); totHits += hits.totalHits; } } else { //System.out.println("\nRUN QUERY " + q); //long t0 = System.nanoTime(); for (IndexSearcher s : searchers) { int hitCount = s.count(q); totHits += hitCount; if (false && iter == 0) { System.out.println("q=" + q + " lat=" + centerLat + " lon=" + centerLon + " distanceMeters=" + distanceMeters + " hits: " + hitCount); } } } } else { assert nearestHits != null; totHits += nearestHits.length; } queryCount++; //throw new RuntimeException("now stop"); } } } } long tEnd = System.nanoTime(); double elapsedSec = (tEnd - tStart) / 1000000000.0; double qps = queryCount / elapsedSec; double mhps = (totHits / 1000000.0) / elapsedSec; if (queryClass.equals("nearest")) { System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f QPS (%.2f sec for %d queries), totNearestDistance=%.10f, totHits=%d", iter, qps, elapsedSec, queryCount, totNearestDistance, maxDoc)); } else { System.out.println(String.format(Locale.ROOT, "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps, qps, elapsedSec, queryCount, totHits)); } if (qps > bestQPS) { System.out.println(" ***"); bestQPS = qps; bestMHPS = mhps; } } } System.out.println("BEST M hits/sec: " + bestMHPS); System.out.println("BEST QPS: " + bestQPS); for (IndexSearcher s : searchers) { s.getIndexReader().close(); } IOUtils.close(dirs); }
From source file:stemEvalLucene.evalLucene.java
License:Apache License
/** * give the id list of sentences, from Lucene index * //from www . j a v a 2 s.com * @param input input word * @param catalogName catalog (domain) name which we'd like to search in * @param limit how many hits are needed (0 means all) * */ public List<String> query(String input, String catalogName, int limit) { List<String> res = new ArrayList<String>(); try { catalog c = catalogs.get(catalogName); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(QueryParser.escape(input)); int n = limit > 0 ? limit : searcher.count(query); if (n == 0) n = 1; TopDocs results = searcher.search(query, n); int endPos = limit; if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits else endPos = results.totalHits; //all hits for (int i = 0; i < endPos; i++) { int id = results.scoreDocs[i].doc; Document doc = searcher.doc(id); res.add(doc.get("filename")); } reader.close(); return res; } catch (ParseException e) { log(e.getMessage()); } catch (IOException e) { log(e.getMessage()); } return res; }