Example usage for org.apache.lucene.index PostingsEnum advance

List of usage examples for org.apache.lucene.index PostingsEnum advance

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum advance.

Prototype

public abstract int advance(int target) throws IOException;

Source Link

Document

Advances to the first beyond the current whose document number is greater than or equal to target, and returns the document number itself.

Usage

From source file:com.github.flaxsearch.resources.PositionsResource.java

License:Apache License

@GET
public DocTermData getDocTermData(@QueryParam("segment") Integer segment, @PathParam("field") String field,
        @PathParam("term") String term, @PathParam("docId") int docId) throws Exception {

    TermsEnum te = readerManager.findTermPostings(segment, field, term);
    PostingsEnum pe = te.postings(null, PostingsEnum.ALL);

    if (pe.advance(docId) != docId) {
        String seg = segment == null ? "" : " in segment " + segment;
        String msg = String.format(Locale.ROOT, "No document %d%s in index", docId, seg);
        throw new WebApplicationException(msg, Response.Status.NOT_FOUND);
    }//from w  w  w  .j  a  v  a 2 s  . c o  m
    List<PositionData> positions = new ArrayList<>();
    int remaining = pe.freq();
    while (remaining > 0) {
        remaining--;
        positions.add(new PositionData(pe));
    }

    return new DocTermData(docId, positions);
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks advancing docs/*from  w ww . j  a v  a2s .c  o  m*/
 */
public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
    if (leftDocs == null) {
        assertNull(rightDocs);
        return;
    }
    int docid = -1;
    int averageGap = MAXDOC / (1 + docFreq);
    int skipInterval = 16;

    while (true) {
        if (random().nextBoolean()) {
            // nextDoc()
            docid = leftDocs.nextDoc();
            assertEquals(docid, rightDocs.nextDoc());
        } else {
            // advance()
            int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
            docid = leftDocs.advance(skip);
            assertEquals(docid, rightDocs.advance(skip));
        }

        if (docid == DocIdSetIterator.NO_MORE_DOCS) {
            return;
        }
        // we don't assert freqs, they are allowed to be different
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks advancing docs + positions/*from   w w  w  .  ja  v a 2  s  .  c o m*/
 */
public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs)
        throws Exception {
    if (leftDocs == null || rightDocs == null) {
        assertNull(leftDocs);
        assertNull(rightDocs);
        return;
    }

    int docid = -1;
    int averageGap = MAXDOC / (1 + docFreq);
    int skipInterval = 16;

    while (true) {
        if (random().nextBoolean()) {
            // nextDoc()
            docid = leftDocs.nextDoc();
            assertEquals(docid, rightDocs.nextDoc());
        } else {
            // advance()
            int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
            docid = leftDocs.advance(skip);
            assertEquals(docid, rightDocs.advance(skip));
        }

        if (docid == DocIdSetIterator.NO_MORE_DOCS) {
            return;
        }
        int freq = leftDocs.freq();
        assertEquals(freq, rightDocs.freq());
        for (int i = 0; i < freq; i++) {
            assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
            // we don't compare the payloads, it's allowed that one is empty etc
        }
    }
}

From source file:org.nlp4l.lucene.BuddyWordsFinder.java

License:Apache License

public Scorer[] findCoincidentalTerms(String field, BytesRef term) throws IOException {

    baseTermFilter.start(reader, field, term);
    if (baseTermFilter.skip(term) || baseTermFilter.skipByPopularity(term))
        return null;
    //System.out.println(term.utf8ToString());

    Bits liveDocs = MultiFields.getLiveDocs(reader);

    PostingsEnum de = MultiFields.getTermDocsEnum(reader, field, term);
    if (de == null)
        return null;
    int numDocsAnalyzed = 0;
    phraseTerms.clear();/*from www. ja  v  a 2 s.c  o m*/

    while (de.nextDoc() != PostingsEnum.NO_MORE_DOCS && numDocsAnalyzed < maxDocsToAnalyze) {
        int docId = de.docID();

        //first record all of the positions of the term in a bitset which
        // represents terms in the current doc.
        int freq = de.freq();
        PostingsEnum pe = MultiFields.getTermPositionsEnum(reader, field, term);
        int ret = pe.advance(docId);
        if (ret == PostingsEnum.NO_MORE_DOCS)
            continue;
        termPos.clear();
        for (int i = 0; i < freq; i++) {
            int pos = pe.nextPosition();
            if (pos < termPos.size())
                termPos.set(pos);
        }

        // now look at all OTHER terms in this doc and see if they are
        // positioned in a pre-defined sized window around the current term
        Fields vectors = reader.getTermVectors(docId);
        // check it has term vectors
        if (vectors == null)
            return null;
        Terms vector = vectors.terms(field);
        // check it has position info
        if (vector == null || !vector.hasPositions())
            return null;

        TermsEnum te = vector.iterator();
        BytesRef otherTerm = null;
        while ((otherTerm = te.next()) != null) {
            if (term.bytesEquals(otherTerm))
                continue;
            coiTermFilter.start(reader, field, otherTerm);
            if (coiTermFilter.skip(otherTerm) || coiTermFilter.skipByPopularity(otherTerm))
                continue;

            PostingsEnum pe2 = MultiFields.getTermPositionsEnum(reader, field, otherTerm);
            ret = pe2.advance(docId);
            if (ret == PostingsEnum.NO_MORE_DOCS)
                continue;
            freq = pe2.freq();
            boolean matchFound = false;
            for (int i = 0; i < freq && (!matchFound); i++) {
                int pos = pe2.nextPosition();
                int startpos = Math.max(0, pos - slop);
                int endpos = pos + slop;
                for (int prevpos = startpos; (prevpos <= endpos) && (!matchFound); prevpos++) {
                    if (termPos.get(prevpos)) {
                        // Add term to hashmap containing co-occurence
                        // counts for this term
                        Scorer pt = phraseTerms.get(otherTerm.utf8ToString());
                        if (pt == null) {
                            pt = new Scorer(baseTermFilter.getCurrentTermDocFreq(), otherTerm.utf8ToString(),
                                    coiTermFilter.getCurrentTermDocFreq());
                            phraseTerms.put(pt.coiTerm, pt);
                        }
                        pt.incCoiDocCount();
                        matchFound = true;
                    }
                }
            }
        }
        numDocsAnalyzed++;
    } // end of while loop

    // now sort and dump the top terms associated with this term.
    TopTerms topTerms = new TopTerms(maxCoiTermsPerTerm);
    for (String key : phraseTerms.keySet()) {
        Scorer pt = phraseTerms.get(key);
        topTerms.insertWithOverflow(pt);
    }
    Scorer[] tops = new Scorer[topTerms.size()];
    int tp = tops.length - 1;
    while (topTerms.size() > 0) {
        Scorer top = topTerms.pop();
        tops[tp--] = top;
    }
    return tops;
}