List of usage examples for org.apache.lucene.search PhrasePositions firstPosition
final void firstPosition() throws IOException
From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java
License:Apache License
/** * Score a candidate doc for all slop-valid position-combinations (matches) * encountered while traversing/hopping the PhrasePositions. * <br> The score contribution of a match depends on the distance: * <br> - highest score for distance=0 (exact match). * <br> - score gets lower as distance gets higher. * <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice: * once for "a b" (distance=0), and once for "b a" (distance=2). * <br>Possibly not all valid combinations are encountered, because for efficiency * we always propagate the least PhrasePosition. This allows to base on * PriorityQueue and move forward faster. * As result, for example, document "a b c b a" * would score differently for queries "a b c"~4 and "c b a"~4, although * they really are equivalent.//from w w w .j av a 2s . c o m * Similarly, for doc "a b c b a f g", query "c b"~2 * would get same score as "g f"~2, although "c b"~2 could be matched twice. * We may want to fix this in the future (currently not, for performance reasons). */ private float phraseFreq() throws IOException { // custom begins BitIntsHolder allPositions = new BitIntsHolder(); BitIntsHolder positions = new BitIntsHolder(); if (phrasePositions.length == 1) { // special handling for one term end = Integer.MIN_VALUE; PhrasePositions pp = phrasePositions[0]; pp.firstPosition(); if (pp.position > end) { end = pp.position; } int matchCount = 0; while (advancePP(pp)) { allPositions.set(pp.position + pp.offset); addPositions(positions, allPositions, pp.position + pp.offset, 0); matchCount++; } if (!positions.isEmpty()) { documentsToPositionsMap.put(docID(), positions); } return matchCount; } // custom ends if (!initPhrasePositions()) { return 0.0f; } // custom begins for (PhrasePositions phrasePositions : this.pq) { allPositions.set(phrasePositions.position + phrasePositions.offset); } // custom ends int numMatches = 0; PhrasePositions pp = pq.pop(); int matchLength = end - pp.position; int next = pq.top().position; int lastEnd = this.end; // custom remember last matched position while (advancePP(pp)) { if (hasRpts && !advanceRpts(pp)) { break; // pps exhausted } allPositions.set(pp.position + pp.offset); if (pp.position > next) { // done minimizing current match-length if (matchLength <= slop) { numMatches++; // custom match found, remember positions addPositions(positions, allPositions, lastEnd, matchLength); } pq.add(pp); pp = pq.pop(); next = pq.top().position; matchLength = end - pp.position; lastEnd = this.end; // custom remember position of last match } else { int matchLength2 = end - pp.position; if (matchLength2 < matchLength) { matchLength = matchLength2; } lastEnd = this.end; // custom remember position of last match } } if (matchLength <= slop) { numMatches++; addPositions(positions, allPositions, lastEnd, matchLength); // custom match found, remember positions } // custom begins if some positions were found then store them if (!positions.isEmpty()) { documentsToPositionsMap.put(docID(), positions); } // custom ends return numMatches; }
From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java
License:Apache License
/** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */ private void initSimple() throws IOException { pq.clear();//from w ww . j av a 2 s.c om // position pps and build queue from list for (PhrasePositions pp : phrasePositions) { pp.firstPosition(); if (pp.position > end) { end = pp.position; } pq.add(pp); } }
From source file:org.opengrok.suggest.query.customized.CustomSloppyPhraseScorer.java
License:Apache License
/** move all PPs to their first position */ private void placeFirstPositions() throws IOException { for (PhrasePositions pp : phrasePositions) { pp.firstPosition(); }// w w w .j ava2 s . com }