List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java
License:Apache License
@Override public int nextBucket() throws IOException { if (currentDoc < 0) { // Not nexted yet, no bucket return -1; }/*from w w w . j a v a 2 s . c o m*/ if (currentDoc == DocIdSetIterator.NO_MORE_DOCS || source.startPosition() == Spans.NO_MORE_POSITIONS) return NO_MORE_BUCKETS; return gatherHitsInternal(); }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsAbstract.java
License:Apache License
@Override public int advance(int target) throws IOException { bucketSize = -1; // not at a valid bucket anymore if (currentDoc != DocIdSetIterator.NO_MORE_DOCS) { if (currentDoc >= target) nextDoc();/*from w w w. jav a2 s . c om*/ else { currentDoc = source.advance(target); if (currentDoc != DocIdSetIterator.NO_MORE_DOCS) { source.nextStartPosition(); // start gathering at the first hit //gatherHitsInternal(); } } } return currentDoc; }
From source file:nl.inl.blacklab.search.lucene.SpansInBucketsPerStartPoint.java
License:Apache License
@Override public int advance(int target) throws IOException { if (currentDoc >= target) { return nextDoc(); }/*from w w w .j a v a2 s. c om*/ if (currentDoc == NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; if (currentDoc < target) { currentDoc = source.advance(target); currentSpansStart = source.nextStartPosition(); currentBucketStart = -1; // no bucket yet } return currentDoc; }
From source file:nl.inl.blacklab.search.lucene.TestSpansInBuckets.java
License:Apache License
@Test public void testSkipToPastEnd() throws IOException { Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, hpd.advance(6)); }
From source file:nl.inl.blacklab.search.Searcher.java
License:Apache License
/** * Get character positions from word positions. * * Places character positions in the same arrays as the word positions were specified in. * * @param doc/*from w ww. j a va2 s.com*/ * the document from which to find character positions * @param fieldName * the field from which to find character positions * @param startsOfWords * word positions for which we want starting character positions (i.e. the position * of the first letter of that word) * @param endsOfWords * word positions for which we want ending character positions (i.e. the position of * the last letter of that word) * @param fillInDefaultsIfNotFound * if true, if any illegal word positions are specified (say, past the end of the * document), a sane default value is chosen (in this case, the last character of the * last word found). Otherwise, throws an exception. */ void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords, boolean fillInDefaultsIfNotFound) { if (startsOfWords.length == 0) return; // nothing to do try { // Determine lowest and highest word position we'd like to know something about. // This saves a little bit of time for large result sets. int minP = -1, maxP = -1; int numStarts = startsOfWords.length; int numEnds = endsOfWords.length; for (int i = 0; i < numStarts; i++) { if (startsOfWords[i] < minP || minP == -1) minP = startsOfWords[i]; if (startsOfWords[i] > maxP) maxP = startsOfWords[i]; } for (int i = 0; i < numEnds; i++) { if (endsOfWords[i] < minP || minP == -1) minP = endsOfWords[i]; if (endsOfWords[i] > maxP) maxP = endsOfWords[i]; } if (minP < 0 || maxP < 0) throw new RuntimeException("Can't determine min and max positions"); String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName); org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName); if (terms == null) throw new RuntimeException("Field " + fieldPropName + " in doc " + doc + " has no term vector"); if (!terms.hasPositions()) throw new RuntimeException( "Field " + fieldPropName + " in doc " + doc + " has no character postion information"); //int lowestPos = -1, highestPos = -1; int lowestPosFirstChar = -1, highestPosLastChar = -1; int total = numStarts + numEnds; boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes! int found = 0; // Iterate over terms TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { DocsAndPositionsEnum dpe = termsEnum.docsAndPositions(null, null); // Iterate over docs containing this term (NOTE: should be only one doc!) while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int position = -1; // Iterate over positions of this term in this doc int positionsRead = 0; int numberOfPositions = dpe.freq(); while (positionsRead < numberOfPositions) { position = dpe.nextPosition(); if (position == -1) break; positionsRead++; // Keep track of the lowest and highest char pos, so // we can fill in the character positions we didn't find int startOffset = dpe.startOffset(); if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) { lowestPosFirstChar = startOffset; } int endOffset = dpe.endOffset(); if (endOffset > highestPosLastChar) { highestPosLastChar = endOffset; } // We've calculated the min and max word positions in advance, so // we know we can skip this position if it's outside the range we're interested in. // (Saves a little time for large result sets) if (position < minP || position > maxP) { continue; } for (int m = 0; m < numStarts; m++) { if (!done[m] && position == startsOfWords[m]) { done[m] = true; startsOfWords[m] = startOffset; found++; } } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m] && position == endsOfWords[m]) { done[numStarts + m] = true; endsOfWords[m] = endOffset; found++; } } // NOTE: we might be tempted to break here if found == total, // but that would foul up our calculation of highestPosLastChar and // lowestPosFirstChar. } } } if (found < total) { if (!fillInDefaultsIfNotFound) throw new RuntimeException("Could not find all character offsets!"); if (lowestPosFirstChar < 0 || highestPosLastChar < 0) throw new RuntimeException("Could not find default char positions!"); for (int m = 0; m < numStarts; m++) { if (!done[m]) startsOfWords[m] = lowestPosFirstChar; } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m]) endsOfWords[m] = highestPosLastChar; } } } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
From source file:nl.inl.blacklab.search.Searcher.java
License:Apache License
/** Deletes documents matching a query from the BlackLab index. * * This deletes the documents from the Lucene index, the forward indices and the content store(s). * @param q the query//from w w w. j ava2 s . c o m */ public void delete(Query q) { if (!indexMode) throw new RuntimeException("Cannot delete documents, not in index mode"); try { // Open a fresh reader to execute the query DirectoryReader reader = DirectoryReader.open(indexWriter, false); try { // Execute the query, iterate over the docs and delete from FI and CS. IndexSearcher s = new IndexSearcher(reader); Weight w = s.createNormalizedWeight(q); AtomicReader scrw = new SlowCompositeReaderWrapper(reader); try { Scorer sc = w.scorer(scrw.getContext(), true, false, MultiFields.getLiveDocs(reader)); if (sc == null) return; // no matching documents // Iterate over matching docs while (true) { int docId; try { docId = sc.nextDoc(); } catch (IOException e) { throw new RuntimeException(e); } if (docId == DocIdSetIterator.NO_MORE_DOCS) break; Document d = reader.document(docId); // Delete this document in all forward indices for (Map.Entry<String, ForwardIndex> e : forwardIndices.entrySet()) { String fieldName = e.getKey(); ForwardIndex fi = e.getValue(); int fiid = Integer.parseInt(d.get(ComplexFieldUtil.forwardIndexIdField(fieldName))); fi.deleteDocument(fiid); } // Delete this document in all content stores for (Map.Entry<String, ContentAccessor> e : contentAccessors.entrySet()) { String fieldName = e.getKey(); ContentAccessor ca = e.getValue(); if (!(ca instanceof ContentAccessorContentStore)) continue; // can only delete from content store ContentStore cs = ((ContentAccessorContentStore) ca).getContentStore(); int cid = Integer.parseInt(d.get(ComplexFieldUtil.contentIdField((fieldName)))); cs.delete(cid); } } } finally { scrw.close(); } } finally { reader.close(); } // Finally, delete the documents from the Lucene index indexWriter.deleteDocuments(q); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:nl.inl.blacklab.search.Searcher.java
License:Apache License
/** * Determine the term frequencies in a set of documents (defined by the filter query) * * @param documentFilterQuery what set of documents to get the term frequencies for * @param fieldName complex field name, i.e. contents * @param propName property name, i.e. word, lemma, pos, etc. * @param altName alternative name, i.e. s, i (case-sensitivity) * @return the term frequency map//from ww w . j a v a2 s . co m */ public Map<String, Integer> termFrequencies(Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery); Map<String, Integer> freq = new HashMap<String, Integer>(); for (AtomicReaderContext arc : reader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, true, false, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { LuceneUtil.getFrequenciesFromTermVector(reader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
From source file:nl.inl.blacklab.search.SearcherImpl.java
License:Apache License
@Override public void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords, boolean fillInDefaultsIfNotFound) { if (startsOfWords.length == 0) return; // nothing to do try {// w w w. j a va 2s. c om // Determine lowest and highest word position we'd like to know something about. // This saves a little bit of time for large result sets. int minP = -1, maxP = -1; int numStarts = startsOfWords.length; int numEnds = endsOfWords.length; for (int i = 0; i < numStarts; i++) { if (startsOfWords[i] < minP || minP == -1) minP = startsOfWords[i]; if (startsOfWords[i] > maxP) maxP = startsOfWords[i]; } for (int i = 0; i < numEnds; i++) { if (endsOfWords[i] < minP || minP == -1) minP = endsOfWords[i]; if (endsOfWords[i] > maxP) maxP = endsOfWords[i]; } if (minP < 0 || maxP < 0) throw new RuntimeException("Can't determine min and max positions"); String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName); org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName); if (terms == null) throw new IllegalArgumentException( "Field " + fieldPropName + " in doc " + doc + " has no term vector"); if (!terms.hasPositions()) throw new IllegalArgumentException( "Field " + fieldPropName + " in doc " + doc + " has no character postion information"); //int lowestPos = -1, highestPos = -1; int lowestPosFirstChar = -1, highestPosLastChar = -1; int total = numStarts + numEnds; boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes! int found = 0; // Iterate over terms TermsEnum termsEnum = terms.iterator(); while (termsEnum.next() != null) { PostingsEnum dpe = termsEnum.postings(null, null, PostingsEnum.POSITIONS); // Iterate over docs containing this term (NOTE: should be only one doc!) while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { // Iterate over positions of this term in this doc int positionsRead = 0; int numberOfPositions = dpe.freq(); while (positionsRead < numberOfPositions) { int position = dpe.nextPosition(); if (position == -1) break; positionsRead++; // Keep track of the lowest and highest char pos, so // we can fill in the character positions we didn't find int startOffset = dpe.startOffset(); if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) { lowestPosFirstChar = startOffset; } int endOffset = dpe.endOffset(); if (endOffset > highestPosLastChar) { highestPosLastChar = endOffset; } // We've calculated the min and max word positions in advance, so // we know we can skip this position if it's outside the range we're interested in. // (Saves a little time for large result sets) if (position < minP || position > maxP) { continue; } for (int m = 0; m < numStarts; m++) { if (!done[m] && position == startsOfWords[m]) { done[m] = true; startsOfWords[m] = startOffset; found++; } } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m] && position == endsOfWords[m]) { done[numStarts + m] = true; endsOfWords[m] = endOffset; found++; } } // NOTE: we might be tempted to break here if found == total, // but that would foul up our calculation of highestPosLastChar and // lowestPosFirstChar. } } } if (found < total) { if (!fillInDefaultsIfNotFound) throw new RuntimeException("Could not find all character offsets!"); if (lowestPosFirstChar < 0 || highestPosLastChar < 0) throw new RuntimeException("Could not find default char positions!"); for (int m = 0; m < numStarts; m++) { if (!done[m]) startsOfWords[m] = lowestPosFirstChar; } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m]) endsOfWords[m] = highestPosLastChar; } } } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
From source file:nl.inl.blacklab.search.SearcherImpl.java
License:Apache License
@Override public void delete(Query q) { if (!indexMode) throw new RuntimeException("Cannot delete documents, not in index mode"); try {/*from w ww . j a v a 2 s . c om*/ // Open a fresh reader to execute the query try (IndexReader freshReader = DirectoryReader.open(indexWriter, false)) { // Execute the query, iterate over the docs and delete from FI and CS. IndexSearcher s = new IndexSearcher(freshReader); Weight w = s.createNormalizedWeight(q, false); try (LeafReader scrw = SlowCompositeReaderWrapper.wrap(freshReader)) { Scorer sc = w.scorer(scrw.getContext(), MultiFields.getLiveDocs(freshReader)); if (sc == null) return; // no matching documents // Iterate over matching docs while (true) { int docId; try { docId = sc.nextDoc(); } catch (IOException e) { throw new RuntimeException(e); } if (docId == DocIdSetIterator.NO_MORE_DOCS) break; Document d = freshReader.document(docId); deleteFromForwardIndices(d); // Delete this document in all content stores contentStores.deleteDocument(d); } } } finally { reader.close(); } // Finally, delete the documents from the Lucene index indexWriter.deleteDocuments(q); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:nl.inl.blacklab.TestUtil.java
License:Apache License
public static void assertEquals(Spans expected, Spans actual, boolean skipFirstNextDoc) throws IOException { int docNumber = 0, hitNumber; boolean firstDoc = true; while (true) { int actualDocId; if (firstDoc && skipFirstNextDoc) { // Actual Spans already skipped to document for testing. Don't .nextDoc() this time. firstDoc = false;//from w ww .j a v a 2s .c o m actualDocId = actual.docID(); } else { actualDocId = actual.nextDoc(); } docNumber++; hitNumber = 0; Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.nextDoc(), actualDocId); Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", expected.docID(), actual.docID()); Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID()); if (actualDocId == DocIdSetIterator.NO_MORE_DOCS) break; Assert.assertEquals(-1, actual.startPosition()); Assert.assertEquals(-1, actual.endPosition()); boolean first = true; while (true) { int actualStartPos = actual.nextStartPosition(); if (first) { // .nextDoc() should always place us in a document with at least 1 hit first = false; Assert.assertFalse(actualStartPos == Spans.NO_MORE_POSITIONS); } hitNumber++; Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.nextStartPosition(), actualStartPos); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", expected.startPosition(), actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", actualStartPos, actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", expected.endPosition(), actual.endPosition()); if (actualStartPos == Spans.NO_MORE_POSITIONS) { Assert.assertEquals(StringUtil.ordinal(docNumber) + " doc id", actualDocId, actual.docID()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS, actual.nextStartPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": start pos", Spans.NO_MORE_POSITIONS, actual.startPosition()); Assert.assertEquals(hitDesc(docNumber, hitNumber) + ": end pos", Spans.NO_MORE_POSITIONS, actual.endPosition()); break; } } } }