List of usage examples for org.apache.lucene.search DocIdSetIterator nextDoc
public abstract int nextDoc() throws IOException;
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java
License:Open Source License
static void intersectScorerAndRoleBits(Scorer scorer, SparseFixedBitSet roleBits, LeafCollector collector, Bits acceptDocs) throws IOException { // ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should // be used first: DocIdSetIterator iterator = ConjunctionDISI.intersectIterators( Arrays.asList(new BitSetIterator(roleBits, roleBits.approximateCardinality()), scorer.iterator())); for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) { if (acceptDocs == null || acceptDocs.get(docId)) { collector.collect(docId);/* ww w . ja v a 2 s .c o m*/ } } }
From source file:org.hibernate.search.filter.impl.AndDocIdSet.java
License:Open Source License
private int findFirstTargetPosition(final DocIdSetIterator[] iterators, OpenBitSet result) throws IOException { int targetPosition = iterators[0].nextDoc(); if (targetPosition == DocIdSetIterator.NO_MORE_DOCS) { // first iterator has no values, so skip all return DocIdSetIterator.NO_MORE_DOCS; }//from ww w .j a v a 2s. c o m boolean allIteratorsShareSameFirstTarget = true; //iterator initialize, just one "next" for each DocIdSetIterator for (int i = 1; i < iterators.length; i++) { final DocIdSetIterator iterator = iterators[i]; final int position = iterator.nextDoc(); if (position == DocIdSetIterator.NO_MORE_DOCS) { //current iterator has no values, so skip all return DocIdSetIterator.NO_MORE_DOCS; } if (targetPosition != position) { targetPosition = max(targetPosition, position); allIteratorsShareSameFirstTarget = false; } } // end iterator initialize if (allIteratorsShareSameFirstTarget) { result.fastSet(targetPosition); targetPosition++; } return targetPosition; }
From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java
License:Open Source License
@Test public void testIteratorMatchesTestArray() throws IOException { DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9); DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator(); assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, docIdSetIterator.docID()); assertEquals(9, docIdSetIterator.advance(9)); assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10)); }
From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java
License:Open Source License
private static void iterateOnResults(DocIdSet docIdBitSet) throws IOException { DocIdSetIterator iterator = docIdBitSet.iterator(); int currentDoc; do {// w w w .j av a2s . c o m currentDoc = iterator.nextDoc(); } while (currentDoc != DocIdSetIterator.NO_MORE_DOCS); }
From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java
License:Open Source License
/** * @param expected the doc id set as expected * @param actual the doc id test as returned by the test * * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal *///from w w w. j a va 2 s . c om public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) { try { DocIdSetIterator iterA = expected.iterator(); DocIdSetIterator iterB = actual.iterator(); int nextA; int nextB; do { nextA = iterA.nextDoc(); nextB = iterB.nextDoc(); if (nextA != nextB) { return false; } assertEquals(iterA.docID(), iterB.docID()); } while (nextA != DocIdSetIterator.NO_MORE_DOCS); } catch (IOException ioe) { fail("these DocIdSetIterator instances should not throw any exceptions"); } return true; }
From source file:org.hibernate.search.test.filter.FiltersOptimizationTest.java
License:Open Source License
/** * Verifies if the docIdSet is representing a specific * sequence of docIds.//from w ww .j a v a 2 s . c o m * @param docIdSet the docIdSet to test * @param expectedIds an array of document ids * @return true if iterating on docIdSet returns the expectedIds * @throws IOException should not happen */ private boolean isIdSetSequenceSameTo(DocIdSet docIdSet, int... expectedIds) throws IOException { DocIdSetIterator idSetIterator = docIdSet.iterator(); for (int setBit : expectedIds) { int currentId = idSetIterator.nextDoc(); if (currentId == DocIdSetIterator.NO_MORE_DOCS) { return false; } if (currentId != setBit) { return false; } } // and now test both sequences are at the end: return idSetIterator.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; }
From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java
License:Apache License
@Test public void testAdvance() throws IOException { Random rand = new Random(13); int[] maxDoc = new int[NUM_BITSETS]; OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS]; for (int i = 0; i < NUM_BITSETS; i++) { OpenBitSet bitset = bitsets[i] = new OpenBitSet(); for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) { if (rand.nextInt(5) == 0) { bitset.set(j);/* w w w .j a v a 2 s .c o m*/ } } maxDoc[i] = NUM_DOCS_IN_BITSET; } int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET; // compare nextDoc invocations with advance MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc); final DocIdSetIterator simpleIterator = docIdSet.iterator(); final DocIdSetIterator advancedIterator = docIdSet.iterator(); int docId = 0; while (true) { final int delta = rand.nextInt(CHECK_INTERVAL); docId = docId + delta + 1; if (docId > totalMaxDoc) { break; } while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ; advancedIterator.advance(docId); assertEquals(simpleIterator.docID(), advancedIterator.docID()); } }
From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java
License:Open Source License
/** * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed * token per document.//from w w w .j a v a2 s.c o m */ public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale, ExtendedPropertyDefinition epd) throws IOException { // TODO: If the number of terms is high compared to docs.size(), and zeros==false, // we should use an alternate strategy to avoid // 1) creating another huge int[] for the counts // 2) looping over that huge int[] looking for the rare non-zeros. // // Yet another variation: if docs.size() is small and termvectors are stored, // then use them instead of the FieldCache. // // TODO: this function is too big and could use some refactoring, but // we also need a facet cache, and refactoring of SimpleFacets instead of // trying to pass all the various params around. FieldType ft = getType(epd); NamedList<Object> res = new NamedList<Object>(); FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName); final String[] terms = si.lookup; final int[] termNum = si.order; if (prefix != null && prefix.length() == 0) prefix = null; int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator); endTermIndex = -endTermIndex - 1; } else { startTermIndex = 1; endTermIndex = terms.length; } final int nTerms = endTermIndex - startTermIndex; if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; DocIdSetIterator iter = docs.iterator(); while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int term = termNum[iter.docID()]; int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) counts[arrIdx]++; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>(); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c)); if (queue.size() >= maxsize) { break; } } } // now select the right page from the results for (SimpleFacets.CountPair<String, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(p.key), p.val); } } else { // add results in index order int i = 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i = off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(terms[startTermIndex + i]), c); } } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale)); } return res; }
From source file:org.meresco.lucene.search.MerescoTaxonomyFacetCounts.java
License:Open Source License
private final void count(List<MatchingDocs> matchingDocs) throws IOException { IntsRef scratch = new IntsRef(); OrdinalsReader.OrdinalsSegmentReader[] ordsReaders = new OrdinalsReader.OrdinalsSegmentReader[this.ordinalsReaders .size()];//from w ww.j a v a 2 s . co m for (MatchingDocs hits : matchingDocs) { for (int i = 0; i < ordsReaders.length; i++) { ordsReaders[i] = this.ordinalsReaders.get(i).getReader(hits.context); } DocIdSetIterator docs = hits.bits.iterator(); int doc; while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { for (OrdinalsReader.OrdinalsSegmentReader ords : ordsReaders) { ords.get(doc, scratch); for (int i = 0; i < scratch.length; i++) { values[scratch.ints[scratch.offset + i]]++; } } } } rollup(); }
From source file:org.neo4j.kernel.api.impl.index.collector.DocValuesCollector.java
License:Open Source License
private void replayTo(Collector collector) throws IOException { for (MatchingDocs docs : getMatchingDocs()) { LeafCollector leafCollector = collector.getLeafCollector(docs.context); Scorer scorer;// w w w .jav a2 s . c o m DocIdSetIterator idIterator = docs.docIdSet.iterator(); if (isKeepScores()) { scorer = new ReplayingScorer(docs.scores); } else { scorer = new ConstantScoreScorer(null, Float.NaN, idIterator); } leafCollector.setScorer(scorer); int doc; while ((doc = idIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { leafCollector.collect(doc); } } }