Example usage for org.apache.lucene.search.spell WordBreakSpellChecker SEPARATOR_TERM

List of usage examples for org.apache.lucene.search.spell WordBreakSpellChecker SEPARATOR_TERM

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell WordBreakSpellChecker SEPARATOR_TERM.

Prototype

Term SEPARATOR_TERM

To view the source code for org.apache.lucene.search.spell WordBreakSpellChecker SEPARATOR_TERM.

Click Source Link

Document

Term that can be used to prohibit adjacent terms from being combined

Usage

From source file:org.apache.solr.spelling.WordBreakSolrSpellChecker.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    IndexReader ir = options.reader;/*w w w  . j  a v a 2 s.  c om*/
    int numSuggestions = options.count;

    StringBuilder sb = new StringBuilder();
    Token[] tokenArr = options.tokens.toArray(new Token[options.tokens.size()]);
    List<Term> termArr = new ArrayList<Term>(options.tokens.size() + 2);

    List<ResultEntry> breakSuggestionList = new ArrayList<ResultEntry>();
    boolean lastOneProhibited = false;
    boolean lastOneRequired = false;
    boolean lastOneprocedesNewBooleanOp = false;
    for (int i = 0; i < tokenArr.length; i++) {
        boolean prohibited = (tokenArr[i].getFlags()
                & QueryConverter.PROHIBITED_TERM_FLAG) == QueryConverter.PROHIBITED_TERM_FLAG;
        boolean required = (tokenArr[i].getFlags()
                & QueryConverter.REQUIRED_TERM_FLAG) == QueryConverter.REQUIRED_TERM_FLAG;
        boolean procedesNewBooleanOp = (tokenArr[i].getFlags()
                & QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG) == QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
        if (i > 0 && (prohibited != lastOneProhibited || required != lastOneRequired
                || lastOneprocedesNewBooleanOp)) {
            termArr.add(WordBreakSpellChecker.SEPARATOR_TERM);
        }
        lastOneProhibited = prohibited;
        lastOneRequired = required;
        lastOneprocedesNewBooleanOp = procedesNewBooleanOp;

        Term thisTerm = new Term(field, tokenArr[i].toString());
        termArr.add(thisTerm);
        if (breakWords) {
            SuggestWord[][] breakSuggestions = wbsp.suggestWordBreaks(thisTerm, numSuggestions, ir,
                    options.suggestMode, sortMethod);
            for (SuggestWord[] breakSuggestion : breakSuggestions) {
                sb.delete(0, sb.length());
                boolean firstOne = true;
                int freq = 0;
                for (SuggestWord word : breakSuggestion) {
                    if (!firstOne) {
                        sb.append(" ");
                    }
                    firstOne = false;
                    sb.append(word.string);
                    if (sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY) {
                        freq = Math.max(freq, word.freq);
                    } else {
                        freq += word.freq;
                    }
                }
                breakSuggestionList.add(new ResultEntry(tokenArr[i], sb.toString(), freq));
            }
        }
    }
    List<ResultEntry> combineSuggestionList = Collections.emptyList();
    CombineSuggestion[] combineSuggestions = wbsp.suggestWordCombinations(
            termArr.toArray(new Term[termArr.size()]), numSuggestions, ir, options.suggestMode);
    if (combineWords) {
        combineSuggestionList = new ArrayList<ResultEntry>(combineSuggestions.length);
        for (CombineSuggestion cs : combineSuggestions) {
            int firstTermIndex = cs.originalTermIndexes[0];
            int lastTermIndex = cs.originalTermIndexes[cs.originalTermIndexes.length - 1];
            sb.delete(0, sb.length());
            for (int i = firstTermIndex; i <= lastTermIndex; i++) {
                if (i > firstTermIndex) {
                    sb.append(" ");
                }
                sb.append(tokenArr[i].toString());
            }
            Token token = new Token(sb.toString(), tokenArr[firstTermIndex].startOffset(),
                    tokenArr[lastTermIndex].endOffset());
            combineSuggestionList.add(new ResultEntry(token, cs.suggestion.string, cs.suggestion.freq));
        }
    }

    // Interleave the two lists of suggestions into one SpellingResult
    SpellingResult result = new SpellingResult();
    Iterator<ResultEntry> breakIter = breakSuggestionList.iterator();
    Iterator<ResultEntry> combineIter = combineSuggestionList.iterator();
    ResultEntry lastBreak = breakIter.hasNext() ? breakIter.next() : null;
    ResultEntry lastCombine = combineIter.hasNext() ? combineIter.next() : null;
    int breakCount = 0;
    int combineCount = 0;
    while (lastBreak != null || lastCombine != null) {
        if (lastBreak == null) {
            result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
            result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
            lastCombine = null;
        } else if (lastCombine == null) {
            result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
            result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
            lastBreak = null;
        } else if (lastBreak.freq < lastCombine.freq) {
            result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
            result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
            lastCombine = null;
        } else if (lastCombine.freq < lastBreak.freq) {
            result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
            result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
            lastBreak = null;
        } else if (breakCount >= combineCount) {
            result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
            result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
            lastCombine = null;
        } else {
            result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
            result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
            lastBreak = null;
        }
        if (result.getSuggestions().size() > numSuggestions) {
            break;
        }
        if (lastBreak == null && breakIter.hasNext()) {
            lastBreak = breakIter.next();
            breakCount++;
        }
        if (lastCombine == null && combineIter.hasNext()) {
            lastCombine = combineIter.next();
            combineCount++;
        }
    }
    return result;
}