List of usage examples for org.apache.lucene.search DocIdSetIterator docID
public abstract int docID();
-1
if #nextDoc() or #advance(int) were not called yet. From source file:org.elasticsearch.search.profile.ProfileScorer.java
License:Apache License
@Override public TwoPhaseIterator twoPhaseIterator() { final TwoPhaseIterator in = scorer.twoPhaseIterator(); if (in == null) { return null; }//from ww w .j ava 2s. co m final DocIdSetIterator inApproximation = in.approximation(); final DocIdSetIterator approximation = new DocIdSetIterator() { @Override public int advance(int target) throws IOException { profile.startTime(ProfileBreakdown.TimingType.ADVANCE); try { return inApproximation.advance(target); } finally { profile.stopAndRecordTime(); } } @Override public int nextDoc() throws IOException { profile.startTime(ProfileBreakdown.TimingType.NEXT_DOC); try { return inApproximation.nextDoc(); } finally { profile.stopAndRecordTime(); } } @Override public int docID() { return inApproximation.docID(); } @Override public long cost() { return inApproximation.cost(); } }; return new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { profile.startTime(ProfileBreakdown.TimingType.MATCH); try { return in.matches(); } finally { profile.stopAndRecordTime(); } } @Override public float matchCost() { return in.matchCost(); } }; }
From source file:org.hibernate.search.filter.impl.AndDocIdSet.java
License:Open Source License
private boolean iteratorAlreadyOnTargetPosition(int targetPosition, DocIdSetIterator iterator) { return iterator.docID() == targetPosition; }
From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java
License:Open Source License
@Test public void testIteratorMatchesTestArray() throws IOException { DocIdSet docIdSet0_9 = arrayToDocIdSet(testDataFrom0to9); DocIdSetIterator docIdSetIterator = docIdSet0_9.iterator(); assertTrue(docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, docIdSetIterator.docID()); assertEquals(9, docIdSetIterator.advance(9)); assertEquals(DocIdSetIterator.NO_MORE_DOCS, docIdSetIterator.advance(10)); }
From source file:org.hibernate.search.test.filter.AndDocIdSetsTest.java
License:Open Source License
/** * @param expected the doc id set as expected * @param actual the doc id test as returned by the test * * @return true if the two DocIdSet are equal: contain the same number of ids, same order and all are equal *//*from www . jav a 2s .c o m*/ public static boolean docIdSetsEqual(DocIdSet expected, DocIdSet actual) { try { DocIdSetIterator iterA = expected.iterator(); DocIdSetIterator iterB = actual.iterator(); int nextA; int nextB; do { nextA = iterA.nextDoc(); nextB = iterB.nextDoc(); if (nextA != nextB) { return false; } assertEquals(iterA.docID(), iterB.docID()); } while (nextA != DocIdSetIterator.NO_MORE_DOCS); } catch (IOException ioe) { fail("these DocIdSetIterator instances should not throw any exceptions"); } return true; }
From source file:org.hippoecm.repository.query.lucene.util.MultiDocIdSetTest.java
License:Apache License
@Test public void testAdvance() throws IOException { Random rand = new Random(13); int[] maxDoc = new int[NUM_BITSETS]; OpenBitSet[] bitsets = new OpenBitSet[NUM_BITSETS]; for (int i = 0; i < NUM_BITSETS; i++) { OpenBitSet bitset = bitsets[i] = new OpenBitSet(); for (int j = 0; j < NUM_DOCS_IN_BITSET; j++) { if (rand.nextInt(5) == 0) { bitset.set(j);/*from ww w.j a v a 2 s .co m*/ } } maxDoc[i] = NUM_DOCS_IN_BITSET; } int totalMaxDoc = NUM_BITSETS * NUM_DOCS_IN_BITSET; // compare nextDoc invocations with advance MultiDocIdSet docIdSet = new MultiDocIdSet(bitsets, maxDoc); final DocIdSetIterator simpleIterator = docIdSet.iterator(); final DocIdSetIterator advancedIterator = docIdSet.iterator(); int docId = 0; while (true) { final int delta = rand.nextInt(CHECK_INTERVAL); docId = docId + delta + 1; if (docId > totalMaxDoc) { break; } while (simpleIterator.docID() < docId && simpleIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ; advancedIterator.advance(docId); assertEquals(simpleIterator.docID(), advancedIterator.docID()); } }
From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java
License:Open Source License
/** * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed * token per document./*w ww . ja v a2s .c o m*/ */ public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale, ExtendedPropertyDefinition epd) throws IOException { // TODO: If the number of terms is high compared to docs.size(), and zeros==false, // we should use an alternate strategy to avoid // 1) creating another huge int[] for the counts // 2) looping over that huge int[] looking for the rare non-zeros. // // Yet another variation: if docs.size() is small and termvectors are stored, // then use them instead of the FieldCache. // // TODO: this function is too big and could use some refactoring, but // we also need a facet cache, and refactoring of SimpleFacets instead of // trying to pass all the various params around. FieldType ft = getType(epd); NamedList<Object> res = new NamedList<Object>(); FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName); final String[] terms = si.lookup; final int[] termNum = si.order; if (prefix != null && prefix.length() == 0) prefix = null; int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator); endTermIndex = -endTermIndex - 1; } else { startTermIndex = 1; endTermIndex = terms.length; } final int nTerms = endTermIndex - startTermIndex; if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; DocIdSetIterator iter = docs.iterator(); while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int term = termNum[iter.docID()]; int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) counts[arrIdx]++; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>(); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c)); if (queue.size() >= maxsize) { break; } } } // now select the right page from the results for (SimpleFacets.CountPair<String, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(p.key), p.val); } } else { // add results in index order int i = 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i = off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(terms[startTermIndex + i]), c); } } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale)); } return res; }
From source file:org.vootoo.search.function.ValueSourceCollectorFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(@SuppressWarnings("rawtypes") final Map context, final LeafReaderContext readerContext, Bits acceptDocs) throws IOException { collectorFilterable.doSetNextReader(context, readerContext); //TODO check getDocIdSet use return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override/*from ww w .j a v a 2 s . c om*/ public long ramBytesUsed() { return 0; } @Override public DocIdSetIterator iterator() throws IOException { final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation! TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { return collectorFilterable.matches(approximation.docID()); } @Override public float matchCost() { return 100; // TODO: use cost of ValueSourceScorer.this.matches() } }; return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator); } @Override public Bits bits() { return null; // don't use random access } }, acceptDocs); }
From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java
License:Open Source License
List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException { Corpus corpus = corpusMapper.getCorpus(); int[] totalTokens = corpus.getLastTokenPositions(tokenType); FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit); Set<String> validIds = new HashSet<String>(); validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus)); OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters); DocIdSetIterator it = corpusMapper.getDocIdSet().iterator(); while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int luceneDoc = it.docID(); String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc); if (validIds.contains(docId) == false) { continue; }//from w w w . ja v a 2 s .com int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc); int lastToken = totalTokens[corpusDocumentIndex]; // build single grams as seed for ngrams SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper, luceneDoc, lastToken); Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>(); for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) { if (sparseSimplifiedTermInfoArray[i] != null && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) { if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) { List<int[]> l = new ArrayList<int[]>(); l.add(new int[] { i, i }); stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l); } else { stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i }); } } } List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1); ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2); ngrams = filter.getFilteredNgrams(ngrams, lastToken); for (DocumentNgram ngram : ngrams) { if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) { queue.offer(ngram); } } } return queue.getOrderedList(start); }