Example usage for org.apache.lucene.search PhrasePositions firstPosition

List of usage examples for org.apache.lucene.search PhrasePositions firstPosition

Introduction

In this page you can find the example usage for org.apache.lucene.search PhrasePositions firstPosition.

Prototype

final void firstPosition() throws IOException 

Source Link

Usage

From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java

License:Apache License

/**
 * Score a candidate doc for all slop-valid position-combinations (matches)
 * encountered while traversing/hopping the PhrasePositions.
 * <br> The score contribution of a match depends on the distance:
 * <br> - highest score for distance=0 (exact match).
 * <br> - score gets lower as distance gets higher.
 * <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
 * once for "a b" (distance=0), and once for "b a" (distance=2).
 * <br>Possibly not all valid combinations are encountered, because for efficiency
 * we always propagate the least PhrasePosition. This allows to base on
 * PriorityQueue and move forward faster.
 * As result, for example, document "a b c b a"
 * would score differently for queries "a b c"~4 and "c b a"~4, although
 * they really are equivalent.//from  w  w w  .j  av  a 2s  .  c  o  m
 * Similarly, for doc "a b c b a f g", query "c b"~2
 * would get same score as "g f"~2, although "c b"~2 could be matched twice.
 * We may want to fix this in the future (currently not, for performance reasons).
 */
private float phraseFreq() throws IOException {
    // custom begins
    BitIntsHolder allPositions = new BitIntsHolder();

    BitIntsHolder positions = new BitIntsHolder();

    if (phrasePositions.length == 1) { // special handling for one term
        end = Integer.MIN_VALUE;
        PhrasePositions pp = phrasePositions[0];
        pp.firstPosition();
        if (pp.position > end) {
            end = pp.position;
        }
        int matchCount = 0;
        while (advancePP(pp)) {
            allPositions.set(pp.position + pp.offset);
            addPositions(positions, allPositions, pp.position + pp.offset, 0);
            matchCount++;
        }
        if (!positions.isEmpty()) {
            documentsToPositionsMap.put(docID(), positions);
        }
        return matchCount;
    }
    // custom ends

    if (!initPhrasePositions()) {
        return 0.0f;
    }

    // custom begins
    for (PhrasePositions phrasePositions : this.pq) {
        allPositions.set(phrasePositions.position + phrasePositions.offset);
    }
    // custom ends

    int numMatches = 0;
    PhrasePositions pp = pq.pop();
    int matchLength = end - pp.position;
    int next = pq.top().position;

    int lastEnd = this.end; // custom  remember last matched position

    while (advancePP(pp)) {

        if (hasRpts && !advanceRpts(pp)) {
            break; // pps exhausted
        }

        allPositions.set(pp.position + pp.offset);

        if (pp.position > next) { // done minimizing current match-length
            if (matchLength <= slop) {
                numMatches++;
                // custom  match found, remember positions
                addPositions(positions, allPositions, lastEnd, matchLength);
            }
            pq.add(pp);
            pp = pq.pop();
            next = pq.top().position;
            matchLength = end - pp.position;

            lastEnd = this.end; // custom  remember position of last match

        } else {
            int matchLength2 = end - pp.position;
            if (matchLength2 < matchLength) {
                matchLength = matchLength2;
            }

            lastEnd = this.end; // custom  remember position of last match
        }
    }
    if (matchLength <= slop) {
        numMatches++;
        addPositions(positions, allPositions, lastEnd, matchLength); // custom  match found, remember positions
    }
    // custom begins  if some positions were found then store them
    if (!positions.isEmpty()) {
        documentsToPositionsMap.put(docID(), positions);
    }
    // custom ends
    return numMatches;
}

From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java

License:Apache License

/** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */
private void initSimple() throws IOException {
    pq.clear();//from w ww . j av a  2  s.c om
    // position pps and build queue from list
    for (PhrasePositions pp : phrasePositions) {
        pp.firstPosition();
        if (pp.position > end) {
            end = pp.position;
        }
        pq.add(pp);
    }
}

From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java

License:Apache License

/** move all PPs to their first position */
private void placeFirstPositions() throws IOException {
    for (PhrasePositions pp : phrasePositions) {
        pp.firstPosition();
    }//  w  w  w .j  ava2 s  . com
}