List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:org.sindice.siren.util.ScorerCellQueue.java
License:Apache License
public final boolean topSkipToAndAdjustElsePop(final int entity, final int tuple, final int cell) throws IOException { return this.checkAdjustElsePop(topHSC.scorer.advance(entity, tuple, cell) != DocIdSetIterator.NO_MORE_DOCS); }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocIdSetIterator filterItr, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { int filterDoc = -1; int spansDoc = spans.nextDoc(); while (true) { if (spansDoc == DocIdSetIterator.NO_MORE_DOCS) { break; }//from w w w.j a v a 2s .c o m filterDoc = filterItr.advance(spansDoc); if (filterDoc == DocIdSetIterator.NO_MORE_DOCS) { break; } else if (filterDoc > spansDoc) { while (spansDoc <= filterDoc) { spansDoc = spans.nextDoc(); if (spansDoc == filterDoc) { boolean cont = visit(leafCtx, spans, visitor); if (!cont) { return false; } } else { continue; } } } else if (filterDoc == spansDoc) { boolean cont = visit(leafCtx, spans, visitor); if (!cont) { return false; } //then iterate spans spansDoc = spans.nextDoc(); } else if (filterDoc < spansDoc) { throw new IllegalArgumentException("FILTER doc is < spansdoc!!!"); } else { throw new IllegalArgumentException("Something horrible happened"); } } return true; }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { boolean cont = visit(leafCtx, spans, visitor); if (!cont) { return false; }// ww w.j a va 2 s.co m } return true; }
From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java
License:Apache License
private void executeNeedleTests(Analyzer analyzer) throws Exception { String needle = getNeedle(analyzer); int numFieldValues = 23; Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues); IndexReader reader = DirectoryReader.open(directory); LeafReaderContext ctx = reader.leaves().get(0); LeafReader r = ctx.reader();/*from w w w . j av a 2 s . c om*/ PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL); int numTests = 0; try { while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int frq = dpe.freq(); int advanced = 0; String[] fieldValues = r.document(dpe.docID()).getValues(FIELD); while (++advanced < frq) { dpe.nextPosition(); String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(), dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | "); assertEquals(needle, rebuilt); numTests++; } } } finally { reader.close(); directory.close(); } assertEquals("number of tests", numFieldValues - 1, numTests); }
From source file:org.uberfire.ext.metadata.backend.lucene.index.BaseLuceneIndex.java
License:Apache License
protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids) throws IOException { final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves(); final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()]; final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(); }//from w w w. ja v a 2s . c o m int[] results = new int[ids.length]; for (int i = 0; i < results.length; i++) { results[i] = -1; } // for each id given for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = new BytesRef(ids[idx]); // for each leaf reader.. for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { final LeafReader subReader = subReaders.get(subIDX).reader(); final TermsEnum termsEnum = termsEnums[subIDX]; // does the enumeration of ("id") terms from our reader contain the "id" field we're looking for? if (termsEnum.seekExact(id)) { final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0); // okay, the reader contains it, get the postings ("docs+") for and check that they're there (NP check) if (docs != null) { final int docID = docs.nextDoc(); Bits liveDocs = subReader.getLiveDocs(); // But wait, maybe some of the docs have been deleted! Check that too.. if ((liveDocs == null || liveDocs.get(docID)) && docID != DocIdSetIterator.NO_MORE_DOCS) { results[idx] = base + docID; break; } } } base += subReader.maxDoc(); } } return results; }
From source file:org.voyanttools.trombone.tool.corpus.DocumentNgrams.java
License:Open Source License
List<DocumentNgram> getNgrams(CorpusMapper corpusMapper, Keywords stopwords) throws IOException { Corpus corpus = corpusMapper.getCorpus(); int[] totalTokens = corpus.getLastTokenPositions(tokenType); FlexibleQueue<DocumentNgram> queue = new FlexibleQueue<DocumentNgram>(comparator, start + limit); Set<String> validIds = new HashSet<String>(); validIds.addAll(this.getCorpusStoredDocumentIdsFromParameters(corpus)); OverlapFilter filter = getDocumentNgramsOverlapFilter(parameters); DocIdSetIterator it = corpusMapper.getDocIdSet().iterator(); while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int luceneDoc = it.docID(); String docId = corpusMapper.getDocumentIdFromLuceneId(luceneDoc); if (validIds.contains(docId) == false) { continue; }/*from w w w .j a v a2 s .c o m*/ int corpusDocumentIndex = corpusMapper.getDocumentPositionFromLuceneId(luceneDoc); int lastToken = totalTokens[corpusDocumentIndex]; // build single grams as seed for ngrams SimplifiedTermInfo[] sparseSimplifiedTermInfoArray = getSparseSimplifiedTermInfoArray(corpusMapper, luceneDoc, lastToken); Map<String, List<int[]>> stringPositionsMap = new HashMap<String, List<int[]>>(); for (int i = 0, len = sparseSimplifiedTermInfoArray.length; i < len; i++) { if (sparseSimplifiedTermInfoArray[i] != null && sparseSimplifiedTermInfoArray[i].term.isEmpty() == false) { if (stringPositionsMap.containsKey(sparseSimplifiedTermInfoArray[i].term) == false) { List<int[]> l = new ArrayList<int[]>(); l.add(new int[] { i, i }); stringPositionsMap.put(sparseSimplifiedTermInfoArray[i].term, l); } else { stringPositionsMap.get(sparseSimplifiedTermInfoArray[i].term).add(new int[] { i, i }); } } } List<DocumentNgram> ngrams = getNgramsFromStringPositions(stringPositionsMap, corpusDocumentIndex, 1); ngrams = getNextNgrams(ngrams, sparseSimplifiedTermInfoArray, corpusDocumentIndex, 2); ngrams = filter.getFilteredNgrams(ngrams, lastToken); for (DocumentNgram ngram : ngrams) { if (ngram.getLength() >= minLength && ngram.getLength() <= maxLength) { queue.offer(ngram); } } } return queue.getOrderedList(start); }
From source file:org.zenoss.zep.index.impl.lucene.LuceneEventIndexBackend.java
License:Open Source License
protected void searchEventTagSeverities(EventFilter filter, EventTagSeverityCounter counter) throws ZepException { final boolean hasTagsFilter = filter.getTagFilterCount() > 0; IndexSearcher searcher = null;/* ww w.ja v a 2 s . com*/ try { searcher = getSearcher(); final Query query = buildQueryFromFilter(searcher.getIndexReader(), filter); final OpenBitSet docs = new OpenBitSet(searcher.getIndexReader().maxDoc()); searcher.search(query, new Collector() { private int docBase; @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { docs.set(docBase + doc); } @Override public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException { this.docBase = atomicReaderContext.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return true; } }); int docId; final DocIdSetIterator it = docs.iterator(); while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { final EventSummary summary; if (this.archive) { // TODO: This isn't very cheap - would be better to batch by UUID in separate calls // This doesn't get called on the event archive right now, so leave it until need to optimize. Document doc = searcher.doc(docId, UUID_FIELDS); summary = this.eventSummaryBaseDao.findByUuid(doc.get(FIELD_UUID)); } else { Document doc = searcher.doc(docId); // this is an optimization for getting the non-archived tags from an organizer for ticket // see ZEN-7239. For this ticket we updated the index to store what we needed for generating the // tags severities. Since we do not want a migrate of completely deleting the index this // method is backwards compatible by uncompressing the protobuf if (doc.get(FIELD_SEVERITY) != null) { int count = Integer.parseInt(doc.get(FIELD_COUNT)); boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED .equals(EventStatus.valueOf(Integer.parseInt(doc.get(FIELD_STATUS)))); EventSeverity severity = EventSeverity.valueOf(Integer.parseInt(doc.get(FIELD_SEVERITY))); // get the map for each filter and update the count for (String tag : doc.getValues(FIELD_TAGS)) counter.update(tag, severity, count, acknowledged); continue; } else { summary = LuceneEventIndexMapper.toEventSummary(doc); } } boolean acknowledged = EventStatus.STATUS_ACKNOWLEDGED == summary.getStatus(); Event occurrence = summary.getOccurrence(0); EventSeverity severity = occurrence.getSeverity(); int count = occurrence.getCount(); EventActor actor = occurrence.getActor(); // Build tags from element_uuids - no tags specified in filter if (!hasTagsFilter) { if (actor.hasElementUuid()) counter.update(actor.getElementUuid(), severity, count, acknowledged); } // Build tag severities from passed in filter else { for (String uuid : Arrays.asList(actor.getElementUuid(), actor.getElementSubUuid())) counter.update(uuid, severity, count, acknowledged); for (EventTag tag : occurrence.getTagsList()) for (String tagUuid : tag.getUuidList()) counter.update(tagUuid, severity, count, acknowledged); } } } catch (IOException e) { throw new ZepException(e); } catch (OutOfMemoryError e) { closeSearcherManager(); throw e; } finally { returnSearcher(searcher); } }
From source file:perf.DiskUsage.java
License:Apache License
static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception { Map<String, FieldStats> stats = new HashMap<>(); Map<String, String> dvSuffixes = new HashMap<>(); Map<String, String> postingsSuffixes = new HashMap<>(); for (FieldInfo field : reader.getFieldInfos()) { FieldStats fieldStats = new FieldStats(field.name); stats.put(field.name, fieldStats); Map<String, String> attributes = field.attributes(); if (attributes != null) { String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY); if (postingsSuffix != null) { postingsSuffixes.put(postingsSuffix, field.name); }/*from w ww . ja v a2 s . co m*/ String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY); if (dvSuffix != null) { dvSuffixes.put(dvSuffix, field.name); } } DocIdSetIterator docsWithField; switch (field.getDocValuesType()) { case NUMERIC: docsWithField = reader.getNumericDocValues(field.name); break; case BINARY: docsWithField = reader.getBinaryDocValues(field.name); break; case SORTED: docsWithField = reader.getSortedDocValues(field.name); break; case SORTED_NUMERIC: docsWithField = reader.getSortedNumericDocValues(field.name); break; case SORTED_SET: docsWithField = reader.getSortedSetDocValues(field.name); break; case NONE: docsWithField = null; break; default: docsWithField = null; break; } if (docsWithField != null) { int count = 0; while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } fieldStats.docCountWithField = count; } } Directory directory = reader.directory(); for (String file : directory.listAll()) { String suffix = parseSuffix(file); long bytes = directory.fileLength(file); if (suffix != null) { switch (IndexFileNames.getExtension(file)) { case "dvd": case "dvm": stats.get(dvSuffixes.get(suffix)).dvBytes += bytes; break; case "tim": case "tip": stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes; break; case "doc": stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes; break; case "pos": case "pay": stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes; break; default: throw new AssertionError("unexpected suffixed file: " + file); } } else { // not a per-field file, but we can hackishly do this for the points case. if ("dii".equals(IndexFileNames.getExtension(file))) { System.err.println( "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!"); try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) { // fail hard if its not exactly the version we do this hack for. CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0, reader.getSegmentInfo().info.getId(), ""); int fieldCount = in.readVInt(); // strangely, bkd offsets are not in any guaranteed order TreeMap<Long, String> offsetToField = new TreeMap<>(); for (int i = 0; i < fieldCount; i++) { int field = in.readVInt(); long offset = in.readVLong(); offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name); } // now we can traverse in order long previousOffset = 0; for (Map.Entry<Long, String> entry : offsetToField.entrySet()) { long offset = entry.getKey(); String field = entry.getValue(); stats.get(field).pointsBytes += (offset - previousOffset); previousOffset = offset; } CodecUtil.checkFooter(in); } } } } return new TreeSet<FieldStats>(stats.values()); }
From source file:perf.PKLookupTask.java
License:Apache License
@Override public void go(IndexState state) throws IOException { final IndexSearcher searcher = state.mgr.acquire(); try {// w ww . ja v a 2 s .c o m final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves(); IndexState.PKLookupState[] pkStates = new IndexState.PKLookupState[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { LeafReaderContext ctx = subReaders.get(subIDX); ThreadLocal<IndexState.PKLookupState> states = state.pkLookupStates .get(ctx.reader().getCoreCacheKey()); // NPE here means you are trying to use this task on a newly refreshed NRT reader! IndexState.PKLookupState pkState = states.get(); if (pkState == null) { pkState = new IndexState.PKLookupState(ctx.reader(), "id"); states.set(pkState); } pkStates[subIDX] = pkState; } for (int idx = 0; idx < ids.length; idx++) { int base = 0; final BytesRef id = ids[idx]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { IndexState.PKLookupState pkState = pkStates[subIDX]; //System.out.println("\nTASK: sub=" + sub); //System.out.println("TEST: lookup " + ids[idx].utf8ToString()); if (pkState.termsEnum.seekExact(id)) { //System.out.println(" found!"); PostingsEnum docs = pkState.termsEnum.postings(pkState.postingsEnum, 0); assert docs != null; int docID = DocIdSetIterator.NO_MORE_DOCS; for (int d = docs.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docs.nextDoc()) { if (pkState.liveDocs == null || pkState.liveDocs.get(d)) { docID = d; break; } } if (docID != DocIdSetIterator.NO_MORE_DOCS) { answers[idx] = base + docID; break; } } base += subReaders.get(subIDX).reader().maxDoc(); } } } finally { state.mgr.release(searcher); } }
From source file:proj.zoie.api.impl.util.ArrayDocIdSet.java
License:Apache License
@Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int doc = -1; int current = -1; int largest = _lengthminusone; @Override/*from w w w . j av a 2 s . c om*/ public int docID() { return doc; } @Override public int nextDoc() throws IOException { if (current < _lengthminusone) { current++; doc = _docids[current]; return doc; } return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { int idx = current < 0 ? binarySearch(_docids, target) : binarySearch(_docids, target, current, largest); // int idx = Arrays.binarySearch(_docids,target); if (idx < 0) { idx = -(idx + 1); if (idx >= _docids.length) return DocIdSetIterator.NO_MORE_DOCS; } current = idx; doc = _docids[current]; return doc; } }; }