Example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

List of usage examples for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Prototype

int NO_MORE_DOCS

To view the source code for org.apache.lucene.search DocIdSetIterator NO_MORE_DOCS.

Click Source Link

Document

When returned by #nextDoc() , #advance(int) and #docID() it means there are no more docs in the iterator.

Usage

From source file:io.anserini.index.IndexUtils.java

License:Apache License

public void printTermCounts(String termStr) throws IOException, ParseException {
    EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
    TermQuery q = (TermQuery) qp.parse(termStr);
    Term t = q.getTerm();//from   w  w w.  ja v a  2 s  . c  om

    System.out.println("raw term:             " + termStr);
    System.out.println("stemmed term:         " + q.toString(LuceneDocumentGenerator.FIELD_BODY));
    System.out.println("collection frequency: " + reader.totalTermFreq(t));
    System.out.println("document frequency:   " + reader.docFreq(t));

    PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY,
            t.bytes());
    System.out.println("postings:\n");
    while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
    }
}

From source file:io.anserini.integration.IndexerTest.java

License:Apache License

private void dumpPostings(IndexReader reader) throws IOException {
    // This is how you iterate through terms in the postings list.
    LeafReader leafReader = reader.leaves().get(0).reader();
    TermsEnum termsEnum = leafReader.terms("text").iterator();
    BytesRef bytesRef = termsEnum.next();
    while (bytesRef != null) {
        // This is the current term in the dictionary.
        String token = bytesRef.utf8ToString();
        Term term = new Term("text", token);
        System.out.print(token + " (df = " + reader.docFreq(term) + "):");

        PostingsEnum postingsEnum = leafReader.postings(term);
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            System.out.print(String.format(" (%s, %s)", postingsEnum.docID(), postingsEnum.freq()));
        }/*from   w w w.  j a v  a  2 s.co  m*/
        System.out.println("");

        bytesRef = termsEnum.next();
    }
}

From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java

License:Apache License

private boolean innerMoveNext() throws IOException {
    while (tryAdvanceDocIdSetIterator()) {
        LeafReader reader = currentLeaf.reader();
        Bits liveDocs = reader.getLiveDocs();
        int doc;/*  w  ww  .ja v  a 2 s  .  c om*/
        while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
                continue;
            }
            onDoc(doc, reader);
            return true;
        }
        currentDocIdSetIt = null;
    }
    clearState();
    return false;
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext2(String term) throws IOException {

    logger.info("searchWithContext2(" + term + ")");
    Directory indexDirectory = FSDirectory
            .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
    DirectoryReader ireader = DirectoryReader.open(indexDirectory);

    PostingsEnum pe = ireader.leaves().get(0).reader().postings(new Term("content", term));
    DocsAndPositionsEnum dape = ireader.leaves().get(0).reader().termPositionsEnum(new Term("content", term));
    if (null != dape) {
        while (dape.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            logger.info("dape.freq(): " + dape.freq() + " in " + dape.docID());
            //                for (int i = 0; i < dape.freq(); i++) {
            //                    int position = dape.nextPosition();
            //                    BytesRef payload = dape.getPayload();
            //                    logger.info("dape.getPayload(): " + new String(payload.bytes) + " in " + dape.docID());
            //                }
        }//from  www  .ja  v a 2  s . c  o m
    }
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {/*from w  w w . j a va  2s .  c  o  m*/
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:javaewah.EWAHCompressedBitmap.java

License:Open Source License

@Override
public DocIdSetIterator iterator() {
    return new DocIdSetIterator() {

        final private IntIterator under = intIterator();
        int docID;

        @Override/*from  w  w  w . j  a v  a2 s  .c o  m*/
        public int docID() {
            return docID;
        }

        @Override
        public int nextDoc() throws IOException {
            if (under.hasNext())
                return docID = under.next();
            else
                return docID = DocIdSetIterator.NO_MORE_DOCS;
        }

        @Override
        public int advance(int target) throws IOException {
            // we can do faster! -> wordinbits
            while ((docID = nextDoc()) < target) {
            }
            return docID;
        }
    };
}

From source file:javaewah.EWAHCompressedBitmapTest.java

License:Open Source License

/**
 * Test ewah compressed bitmap./*from w w w .  j a  v a 2s . c o  m*/
 */
@Test
public void testEWAHCompressedBitmap() throws IOException {
    System.out.println("testing EWAH");
    long zero = 0;
    long specialval = 1l | (1l << 4) | (1l << 63);
    long notzero = ~zero;
    EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap();
    myarray1.add(zero);
    myarray1.add(zero);
    myarray1.add(zero);
    myarray1.add(specialval);
    myarray1.add(specialval);
    myarray1.add(notzero);
    myarray1.add(zero);
    equal(myarray1.getPositions().size(), 6 + 64);
    EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap();
    myarray2.add(zero);
    myarray2.add(specialval);
    myarray2.add(specialval);
    myarray2.add(notzero);
    myarray2.add(zero);
    myarray2.add(zero);
    myarray2.add(zero);
    equal(myarray2.getPositions().size(), 6 + 64);
    List<Integer> data1 = myarray1.getPositions();
    List<Integer> data2 = myarray2.getPositions();
    ArrayList<Integer> logicalor = new ArrayList<Integer>();
    {
        HashSet<Integer> tmp = new HashSet<Integer>();
        tmp.addAll(data1);
        tmp.addAll(data2);
        logicalor.addAll(tmp);
    }
    Collections.sort(logicalor);
    ArrayList<Integer> logicaland = new ArrayList<Integer>();
    logicaland.addAll(data1);
    logicaland.retainAll(data2);
    Collections.sort(logicaland);
    EWAHCompressedBitmap arrayand = myarray1.and(myarray2);
    isTrue(arrayand.getPositions().equals(logicaland));
    EWAHCompressedBitmap arrayor = myarray1.or(myarray2);
    isTrue(arrayor.getPositions().equals(logicalor));
    EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1);
    isTrue(arrayandbis.getPositions().equals(logicaland));
    EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1);
    isTrue(arrayorbis.getPositions().equals(logicalor));
    EWAHCompressedBitmap x = new EWAHCompressedBitmap();
    for (Integer i : myarray1.getPositions()) {
        x.set(i.intValue());
    }
    isTrue(x.getPositions().equals(myarray1.getPositions()));
    x = new EWAHCompressedBitmap();
    for (Integer i : myarray2.getPositions()) {
        x.set(i.intValue());
    }
    isTrue(x.getPositions().equals(myarray2.getPositions()));
    x = new EWAHCompressedBitmap();
    int doc;
    for (DocIdSetIterator k = myarray1.iterator(); (doc = k.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) {
        x.set(doc);
    }
    isTrue(x.getPositions().equals(myarray1.getPositions()));
    x = new EWAHCompressedBitmap();
    for (DocIdSetIterator k = myarray2.iterator(); (doc = k.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) {
        x.set(doc);
    }
    isTrue(x.getPositions().equals(myarray2.getPositions()));
}

From source file:javaewah.EWAHCompressedBitmapTest.java

License:Open Source License

/**
 * Convenience function to assess equality between an array and an iterator over
 * Integers//from w  w  w. j  ava 2  s  .c om
 *
 * @param i the iterator
 * @param array the array
 */
static void equal(DocIdSetIterator i, int[] array) {
    int cursor = 0;
    int doc;
    try {
        while ((doc = i.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            int x = doc;
            int y = array[cursor++];
            if (x != y)
                throw new RuntimeException(x + " != " + y);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:javaewah.EWAHCompressedBitmapTest.java

License:Open Source License

/**
 * Assess equality between an uncompressed bitmap and a compressed one,
 * part of a test contributed by Marc Polizzi
 *
 * @param jdkBitmap the jdk bitmap//from w w w  .  ja  v  a2s  .  co  m
 * @param ewahBitmap the ewah bitmap
 */
static void assertEqualsIterator(BitSet jdkBitmap, EWAHCompressedBitmap ewahBitmap) throws IOException {
    final List<Integer> positions = new ArrayList<Integer>();
    final DocIdSetIterator iter = ewahBitmap.iterator();
    int doc;
    while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        final int bit = doc;
        if (!jdkBitmap.get(bit)) {
            throw new RuntimeException("iterator: bitset got different bits");
        }
        positions.add(new Integer(bit));
    }
    for (int pos = jdkBitmap.nextSetBit(0); pos >= 0; pos = jdkBitmap.nextSetBit(pos + 1)) {
        if (!positions.contains(new Integer(pos))) {
            throw new RuntimeException("iterator: bitset got different bits");
        }
    }
}

From source file:javaewah.EWAHCompressedBitmapTest.java

License:Open Source License

/**
 * Test massive xor.//from  ww w. j  a va  2 s .  co m
 */
@Test
public void testMassiveXOR() throws IOException {
    System.out.println("testing massive xor (can take a couple of minutes)");
    final int N = 16;
    EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
    BitSet[] bset = new BitSet[N];
    for (int k = 0; k < ewah.length; ++k) {
        ewah[k] = new EWAHCompressedBitmap();
    }
    for (int k = 0; k < bset.length; ++k) {
        bset[k] = new BitSet();
    }
    for (int k = 0; k < 30000; ++k) {
        ewah[(k + 2 * k * k) % ewah.length].set(k);
        bset[(k + 2 * k * k) % ewah.length].set(k);
    }
    EWAHCompressedBitmap answer = ewah[0];
    BitSet bitsetanswer = bset[0];
    for (int k = 1; k < ewah.length; ++k) {
        answer = answer.xor(ewah[k]);
        bitsetanswer.xor(bset[k]);
        assertEqualsPositions(bitsetanswer, answer);
    }
    int k = 0;
    int doc;
    for (DocIdSetIterator iter = answer.iterator(); (doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;) {
        if (k != doc)
            System.out.println(answer.toDebugString());
        equal(k, doc);
        k += 1;
    }
}