List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:DocIdSetBenchmark.java
License:Apache License
private static int exhaustIterator(DocIdSet set) throws IOException { int dummy = 0; final DocIdSetIterator it = set.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { dummy += doc;// ww w . ja v a 2 s. com } return dummy; }
From source file:DocIdSetBenchmark.java
License:Apache License
private static int exhaustIterator(DocIdSet set, int increment) throws IOException { int dummy = 0; final DocIdSetIterator it = set.iterator(); for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.advance(doc + increment)) { dummy += doc;/*from w ww . j av a 2 s. c om*/ } return dummy; }
From source file:arena.lucene.LuceneIndexSearcherImpl.java
License:Open Source License
protected TopDocs executeSearch(IndexSearcher searcher, Query query, Filter filter, Sort sort, int collectorLimit) throws IOException { // Decide on how to search based on which elements of the lucene query model are available if (query != null) { // Full scoring search TopDocsCollector<? extends ScoreDoc> collector = null; if (sort == null) { collector = TopScoreDocCollector.create(collectorLimit, true); } else {//from w ww . j a v a 2 s . co m SortField sortFields[] = sort.getSort(); if (sortFields != null && sortFields.length > 0 && sortFields[0].getType() == SortField.SCORE && !sortFields[0].getReverse()) { collector = TopScoreDocCollector.create(collectorLimit, true); } else { collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); } } searcher.search(query, filter, collector); return collector.topDocs(); } else if (filter != null) { // No query = no need for scoring, just dump the results into a hit collector that runs // off the results in the order we want DocIdSetIterator filterMatchesIterator = filter.getDocIdSet(searcher.getIndexReader()).iterator(); if (sort == null) { // no sort available, so the natural iteration order is fine // if we have an iterator that means sorting is already handled, so just pull off the first n rows into the output ScoreDoc[] scoreDocs = new ScoreDoc[collectorLimit]; int found = 0; int docId; while (found < collectorLimit && (docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { scoreDocs[found++] = new ScoreDoc(docId, 1f); } return new TopDocs(found, found < collectorLimit ? Arrays.copyOf(scoreDocs, found) : scoreDocs, 1f); } else { TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); int docId; while ((docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { collector.collect(docId); } return collector.topDocs(); } } else if (sort != null) { // no query and no filter so no score but add every doc in the index for non-score sorting TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); int numDocs = searcher.getIndexReader().numDocs(); for (int n = 0; n < numDocs; n++) { collector.collect(n); } return collector.topDocs(); } else { // no query filter or sort: return the top n docs ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(collectorLimit, searcher.getIndexReader().numDocs())]; for (int n = 0; n < scoreDocs.length; n++) { scoreDocs[n] = new ScoreDoc(n, 1f); } return new TopDocs(scoreDocs.length, scoreDocs, 1f); } }
From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected Map<String, int[]> parseD1Index(IndexReader d1Index, IndexReader d2Index) { try {/* w w w. j av a 2 s . co m*/ int[] documentIds = getDocumentIds(d1Index); final Map<String, int[]> hashedBlocks = new HashMap<>(); Fields fields = MultiFields.getFields(d1Index); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { // check whether it is a common term int d2DocFrequency = d2Index.docFreq(new Term(field, text)); if (d2DocFrequency == 0) { continue; } final List<Integer> entityIds = new ArrayList<>(); PostingsEnum pe = MultiFields.getTermDocsEnum(d1Index, field, text); int doc; while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { entityIds.add(documentIds[doc]); } int[] idsArray = Converter.convertCollectionToArray(entityIds); hashedBlocks.put(text.utf8ToString(), idsArray); } } return hashedBlocks; } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); return null; } }
From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected void parseD2Index(IndexReader d2Index, Map<String, int[]> hashedBlocks) { try {//from ww w . j av a2 s.co m int[] documentIds = getDocumentIds(d2Index); Fields fields = MultiFields.getFields(d2Index); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { if (!hashedBlocks.containsKey(text.utf8ToString())) { continue; } final List<Integer> entityIds = new ArrayList<>(); PostingsEnum pe = MultiFields.getTermDocsEnum(d2Index, field, text); int doc; while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { entityIds.add(documentIds[doc]); } int[] idsArray = Converter.convertCollectionToArray(entityIds); int[] d1Entities = hashedBlocks.get(text.utf8ToString()); blocks.add(new BilateralBlock(d1Entities, idsArray)); } } } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } }
From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected void parseIndex(IndexReader d1Index) { try {/*from w w w . j a v a 2s . c o m*/ int[] documentIds = getDocumentIds(d1Index); Fields fields = MultiFields.getFields(d1Index); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { if (termsEnum.docFreq() < 2) { continue; } final List<Integer> entityIds = new ArrayList<>(); PostingsEnum pe = MultiFields.getTermDocsEnum(d1Index, field, text); int doc; while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { entityIds.add(documentIds[doc]); } int[] idsArray = Converter.convertCollectionToArray(entityIds); UnilateralBlock block = new UnilateralBlock(idsArray); blocks.add(block); } } } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } }
From source file:BlockBuilding.SortedNeighborhoodBlocking.java
License:Apache License
protected List<Integer> getTermEntities(int[] docIds, IndexReader iReader, String blockingKey) { try {//from w w w .java 2 s .co m Term term = new Term(VALUE_LABEL, blockingKey); List<Integer> entityIds = new ArrayList<>(); int docFrequency = iReader.docFreq(term); if (0 < docFrequency) { BytesRef text = term.bytes(); PostingsEnum pe = MultiFields.getTermDocsEnum(iReader, VALUE_LABEL, text); int doc; while ((doc = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { entityIds.add(docIds[doc]); } } return entityIds; } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); return null; } }
From source file:cn.hbu.cs.esearch.document.UIDDocIdSet.java
License:Apache License
@Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int doc = -1; int current = -1; @Override/*from w w w . java 2 s .co m*/ public int docID() { return doc; } @Override public int nextDoc() throws IOException { if (current < sorted.length - 1) { current++; doc = sorted[current]; return doc; } return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { int idx = Arrays.binarySearch(sorted, target); if (idx < 0) { idx = -(idx + 1); if (idx >= sorted.length) { return DocIdSetIterator.NO_MORE_DOCS; } } current = idx; doc = sorted[current]; return doc; } // No use, just implement abstract function @Override public long cost() { return 0; } }; }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override protected void persistDelete(long uid) throws IOException { final int docid = mapDocId(uid); if (docid < 0) { return;// w w w . j a v a 2s. c om } Query deleteQ = new ConstantScoreQuery(new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext readerCtx, Bits acceptedDocs) throws IOException { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { int currId = -1; @Override public int nextDoc() throws IOException { if (currId == -1) { currId = docid; } else { currId = DocIdSetIterator.NO_MORE_DOCS; } return currId; } @Override public int docID() { return currId; } @Override public int advance(int target) throws IOException { if (currId != DocIdSetIterator.NO_MORE_DOCS) { if (target < docid) { currId = docid; } else { currId = DocIdSetIterator.NO_MORE_DOCS; } } return currId; } @Override public long cost() { // TODO Auto-generated method stub return 0; } }; } }; } }); indexWriter.deleteDocuments(deleteQ); if (currentReaderData != null) { currentReaderData.uidMap.remove(uid); } }
From source file:cn.hbu.cs.esearch.util.ArrayDocIdSet.java
License:Apache License
@Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int doc = -1; int current = -1; int largest = lengthMinusone; @Override// w ww . j av a2 s. co m public int docID() { return doc; } @Override public int nextDoc() throws IOException { if (current < lengthMinusone) { current++; doc = docIds[current]; return doc; } return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { int idx = current < 0 ? binarySearch(docIds, target) : binarySearch(docIds, target, current, largest); if (idx < 0) { idx = -(idx + 1); if (idx >= docIds.length) { return DocIdSetIterator.NO_MORE_DOCS; } } current = idx; doc = docIds[current]; return doc; } @Override public long cost() { // TODO Auto-generated method stub return 0; } }; }