List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS
int NO_MORE_DOCS
To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.
Click Source Link
From source file:io.anserini.index.IndexUtils.java
License:Apache License
public void printTermCounts(String termStr) throws IOException, ParseException { EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET); QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea); TermQuery q = (TermQuery) qp.parse(termStr); Term t = q.getTerm();//from w w w. ja v a 2 s . c om System.out.println("raw term: " + termStr); System.out.println("stemmed term: " + q.toString(LuceneDocumentGenerator.FIELD_BODY)); System.out.println("collection frequency: " + reader.totalTermFreq(t)); System.out.println("document frequency: " + reader.docFreq(t)); PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes()); System.out.println("postings:\n"); while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq()); } }
From source file:io.anserini.integration.IndexerTest.java
License:Apache License
private void dumpPostings(IndexReader reader) throws IOException { // This is how you iterate through terms in the postings list. LeafReader leafReader = reader.leaves().get(0).reader(); TermsEnum termsEnum = leafReader.terms("text").iterator(); BytesRef bytesRef = termsEnum.next(); while (bytesRef != null) { // This is the current term in the dictionary. String token = bytesRef.utf8ToString(); Term term = new Term("text", token); System.out.print(token + " (df = " + reader.docFreq(term) + "):"); PostingsEnum postingsEnum = leafReader.postings(term); while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq())); }/*from w w w. j a v a 2 s.co m*/ System.out.println(""); bytesRef = termsEnum.next(); } }
From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java
License:Apache License
private boolean innerMoveNext() throws IOException { while (tryAdvanceDocIdSetIterator()) { LeafReader reader = currentLeaf.reader(); Bits liveDocs = reader.getLiveDocs(); int doc;/* w ww .ja v a 2 s . c om*/ while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) { continue; } onDoc(doc, reader); return true; } currentDocIdSetIt = null; } clearState(); return false; }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext2(String term) throws IOException { logger.info("searchWithContext2(" + term + ")"); Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); PostingsEnum pe = ireader.leaves().get(0).reader().postings(new Term("content", term)); DocsAndPositionsEnum dape = ireader.leaves().get(0).reader().termPositionsEnum(new Term("content", term)); if (null != dape) { while (dape.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { logger.info("dape.freq(): " + dape.freq() + " in " + dape.docID()); // for (int i = 0; i < dape.freq(); i++) { // int position = dape.nextPosition(); // BytesRef payload = dape.getPayload(); // logger.info("dape.getPayload(): " + new String(payload.bytes) + " in " + dape.docID()); // } }//from www .ja v a 2 s . c o m } }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext(String term) { try {/*from w w w . j a va 2s . c o m*/ logger.info("searchWithContext(" + term + ")"); SpanQuery spanQuery = new SpanTermQuery(new Term("content", term)); Directory indexDirectory = FSDirectory.open( Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); IndexReader reader = searcher.getIndexReader(); //spanQuery = (SpanQuery) spanQuery.rewrite(reader); //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = spanQuery.createWeight(searcher, false) .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); // Spans spans2 = weight.getSpans(reader.leaves().get(0), // SpanWeight.Postings.OFFSETS); //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS); ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs; logger.info("hits :" + sc.length); int i; if (null != spans) { // while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { for (int k = 0; k < sc.length; k++) { int docId = sc[k].doc; logger.info("docID: " + docId); int newDocID = spans.advance(docId); logger.info("newDocID: " + newDocID); int nextSpan = -1; while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) { logger.info("nextSpan : " + nextSpan); logger.info("spans.startPosition(): " + spans.startPosition()); logger.info("spans.endPosition() : " + spans.endPosition()); logger.info("spans.width() : " + spans.width()); Fields fields = reader.getTermVectors(docId); Terms terms = fields.terms("content"); TermsEnum termsEnum = terms.iterator(); BytesRef text; PostingsEnum postingEnum = null; int start = spans.startPosition() - 3; int end = spans.endPosition() + 3; while ((text = termsEnum.next()) != null) { //could store the BytesRef here, but String is easier for this example String s = new String(text.bytes, text.offset, text.length); // DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null); postingEnum = termsEnum.postings(postingEnum); if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { i = 0; int position = -1; while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) { if (position >= start && position <= end) { logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset + " length: " + text.length); } i++; } } } } } } else { logger.info("no " + term + " found!"); } } catch (IOException e) { logger.error(e.getMessage()); } logger.info("End."); }
From source file:javaewah.EWAHCompressedBitmap.java
License:Open Source License
@Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { final private IntIterator under = intIterator(); int docID; @Override/*from w w w . j a v a2 s .c o m*/ public int docID() { return docID; } @Override public int nextDoc() throws IOException { if (under.hasNext()) return docID = under.next(); else return docID = DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { // we can do faster! -> wordinbits while ((docID = nextDoc()) < target) { } return docID; } }; }
From source file:javaewah.EWAHCompressedBitmapTest.java
License:Open Source License
/** * Test ewah compressed bitmap./*from w w w . j a v a 2s . c o m*/ */ @Test public void testEWAHCompressedBitmap() throws IOException { System.out.println("testing EWAH"); long zero = 0; long specialval = 1l | (1l << 4) | (1l << 63); long notzero = ~zero; EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap(); myarray1.add(zero); myarray1.add(zero); myarray1.add(zero); myarray1.add(specialval); myarray1.add(specialval); myarray1.add(notzero); myarray1.add(zero); equal(myarray1.getPositions().size(), 6 + 64); EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap(); myarray2.add(zero); myarray2.add(specialval); myarray2.add(specialval); myarray2.add(notzero); myarray2.add(zero); myarray2.add(zero); myarray2.add(zero); equal(myarray2.getPositions().size(), 6 + 64); List<Integer> data1 = myarray1.getPositions(); List<Integer> data2 = myarray2.getPositions(); ArrayList<Integer> logicalor = new ArrayList<Integer>(); { HashSet<Integer> tmp = new HashSet<Integer>(); tmp.addAll(data1); tmp.addAll(data2); logicalor.addAll(tmp); } Collections.sort(logicalor); ArrayList<Integer> logicaland = new ArrayList<Integer>(); logicaland.addAll(data1); logicaland.retainAll(data2); Collections.sort(logicaland); EWAHCompressedBitmap arrayand = myarray1.and(myarray2); isTrue(arrayand.getPositions().equals(logicaland)); EWAHCompressedBitmap arrayor = myarray1.or(myarray2); isTrue(arrayor.getPositions().equals(logicalor)); EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1); isTrue(arrayandbis.getPositions().equals(logicaland)); EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1); isTrue(arrayorbis.getPositions().equals(logicalor)); EWAHCompressedBitmap x = new EWAHCompressedBitmap(); for (Integer i : myarray1.getPositions()) { x.set(i.intValue()); } isTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap(); for (Integer i : myarray2.getPositions()) { x.set(i.intValue()); } isTrue(x.getPositions().equals(myarray2.getPositions())); x = new EWAHCompressedBitmap(); int doc; for (DocIdSetIterator k = myarray1.iterator(); (doc = k.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) { x.set(doc); } isTrue(x.getPositions().equals(myarray1.getPositions())); x = new EWAHCompressedBitmap(); for (DocIdSetIterator k = myarray2.iterator(); (doc = k.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) { x.set(doc); } isTrue(x.getPositions().equals(myarray2.getPositions())); }
From source file:javaewah.EWAHCompressedBitmapTest.java
License:Open Source License
/** * Convenience function to assess equality between an array and an iterator over * Integers//from w w w. j ava 2 s .c om * * @param i the iterator * @param array the array */ static void equal(DocIdSetIterator i, int[] array) { int cursor = 0; int doc; try { while ((doc = i.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int x = doc; int y = array[cursor++]; if (x != y) throw new RuntimeException(x + " != " + y); } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:javaewah.EWAHCompressedBitmapTest.java
License:Open Source License
/** * Assess equality between an uncompressed bitmap and a compressed one, * part of a test contributed by Marc Polizzi * * @param jdkBitmap the jdk bitmap//from w w w . ja v a2s . co m * @param ewahBitmap the ewah bitmap */ static void assertEqualsIterator(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) throws IOException { final List<Integer> positions = new ArrayList<Integer>(); final DocIdSetIterator iter = ewahBitmap.iterator(); int doc; while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { final int bit = doc; if (!jdkBitmap.get(bit)) { throw new RuntimeException("iterator: bitset got different bits"); } positions.add(new Integer(bit)); } for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap.nextSetBit(pos + 1)) { if (!positions.contains(new Integer(pos))) { throw new RuntimeException("iterator: bitset got different bits"); } } }
From source file:javaewah.EWAHCompressedBitmapTest.java
License:Open Source License
/** * Test massive xor.//from ww w. j a va 2 s . co m */ @Test public void testMassiveXOR() throws IOException { System.out.println("testing massive xor (can take a couple of minutes)"); final int N = 16; EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; BitSet[] bset = new BitSet[N]; for (int k = 0; k < ewah.length; ++k) { ewah[k] = new EWAHCompressedBitmap(); } for (int k = 0; k < bset.length; ++k) { bset[k] = new BitSet(); } for (int k = 0; k < 30000; ++k) { ewah[(k + 2 * k * k) % ewah.length].set(k); bset[(k + 2 * k * k) % ewah.length].set(k); } EWAHCompressedBitmap answer = ewah[0]; BitSet bitsetanswer = bset[0]; for (int k = 1; k < ewah.length; ++k) { answer = answer.xor(ewah[k]); bitsetanswer.xor(bset[k]); assertEqualsPositions(bitsetanswer, answer); } int k = 0; int doc; for (DocIdSetIterator iter = answer.iterator(); (doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) { if (k != doc) System.out.println(answer.toDebugString()); equal(k, doc); k += 1; } }