Example usage for org.apache.solr.spelling SpellingResult add

List of usage examples for org.apache.solr.spelling SpellingResult add

Introduction

In this page you can find the example usage for org.apache.solr.spelling SpellingResult add.

Prototype

public void add(Token token, String suggestion, int docFreq) 

Source Link

Document

Suggestions must be added with the best suggestion first.

Usage

From source file:org.dice.solrenhancements.spellchecker.DiceDirectSolrSpellChecker.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {

    LOG.debug("getSuggestions: " + options.tokens);
    // load the typos file if not loaded

    SpellingResult result = new SpellingResult();
    float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;

    for (Token token : options.tokens) {
        String tokenText = token.toString();
        Term term = new Term(field, tokenText);
        int freq = options.reader.docFreq(term);
        int count = (options.alternativeTermCount != null && freq > 0) ? options.alternativeTermCount
                : options.count;// w  ww  .  jav a  2s.com
        SuggestWord[] suggestions = checker.suggestSimilar(term, count, options.reader, options.suggestMode,
                accuracy);
        result.addFrequency(token, freq);

        // Dice functionality: Allow also loading of a list of spelling corrections to apply in addition
        // to the standard functionality. This allows us to configure common typos to correct that may exceed the
        // max edit distance used by solr
        if (this.typosLoaded) {
            String normTokenText = normalize(tokenText);
            String match = this.mapTypos.get(normTokenText);
            if (match != null) {
                int matchFreq = options.reader.docFreq(new Term(field, match));
                // only ever suggest values that are in the index and more frequent
                // than the original word
                if (matchFreq > 0 && matchFreq > freq) {
                    result.add(token, match, matchFreq);
                }
            }
        }

        // If considering alternatives to "correctly-spelled" terms, then add the
        // original as a viable suggestion.
        if (options.alternativeTermCount != null && freq > 0) {
            boolean foundOriginal = false;
            SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
            for (int i = 0; i < suggestions.length; i++) {
                if (suggestions[i].string.equals(tokenText)) {
                    foundOriginal = true;
                    break;
                }
                suggestionsWithOrig[i + 1] = suggestions[i];
            }
            if (!foundOriginal) {
                SuggestWord orig = new SuggestWord();
                orig.freq = freq;
                orig.string = tokenText;
                suggestionsWithOrig[0] = orig;
                suggestions = suggestionsWithOrig;
            }
        }
        if (suggestions.length == 0 && freq == 0) {
            List<String> empty = Collections.emptyList();
            result.add(token, empty);
        } else {
            for (SuggestWord suggestion : suggestions) {
                result.add(token, suggestion.string, suggestion.freq);
            }
        }
    }
    return result;
}

From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }/*from   w w  w  . j a v  a  2 s.c  o m*/

    SpellingResult res = new SpellingResult();
    for (Token currentToken : options.tokens) {
        String tokenText = currentToken.toString();

        // we need to ensure that we combine matches for different cases, and take the most common
        // where multiple case versions exist
        final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>();
        final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>();

        List<Token> tokensToTry = new ArrayList<Token>();
        tokensToTry.add(currentToken);
        tokensToTry.add(newToken(currentToken, toTitleCase(tokenText)));
        tokensToTry.add(newToken(currentToken, tokenText.toLowerCase()));
        tokensToTry.add(newToken(currentToken, tokenText.toUpperCase()));

        for (Token newToken : tokensToTry) {

            if (newToken.toString().equals(tokenText) && newToken != currentToken) {
                continue;
            }
            // if matches current token, skip
            List<LookupResult> tmpSuggestions = getLookupResults(options, newToken);
            if (tmpSuggestions != null) {
                for (LookupResult lu : tmpSuggestions) {
                    final String key = lu.key.toString().toLowerCase();
                    LookupResult existing = htSuggestions.get(key);
                    if (existing != null) {
                        // replace if more frequent
                        if (lu.value > existing.value) {
                            htSuggestions.put(key, lu);
                        }
                        htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int) lu.value);
                    } else {
                        htSuggestions.put(key, lu);
                        htSuggestionCounts.put(key, (int) lu.value);
                    }
                }
            }
        }

        List<String> suggestions = new ArrayList<String>(htSuggestions.keySet());
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        } else {
            Collections.sort(suggestions, new Comparator<String>() {
                public int compare(String sug1, String sug2) {
                    int sug1Count = htSuggestionCounts.get(sug1);
                    int sug2Count = htSuggestionCounts.get(sug2);
                    return sug2Count - sug1Count;
                }
            });
        }

        for (String match : suggestions) {
            LookupResult lr = htSuggestions.get(match);
            res.add(currentToken, lr.key.toString(), (int) lr.value);
        }

    }
    return res;
}

From source file:org.dice.solrenhancements.spellchecker.DiceSuggester.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }//from w ww.ja  v  a  2  s  . c  o m
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();

    for (Token currentToken : options.tokens) {
        scratch.chars = currentToken.buffer();
        scratch.offset = 0;
        scratch.length = currentToken.length();
        boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
                && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester);

        // get more than the requested suggestions as a lot get collapsed by the corrections
        List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10);
        if (suggestions == null || suggestions.size() == 0) {
            continue;
        }

        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        }

        final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>();
        for (LookupResult lr : suggestions) {
            String suggestion = lr.key.toString();
            if (this.suggestionAnalyzer != null) {
                String correction = getAnalyzerResult(suggestion);
                // multiple could map to the same, so don't repeat suggestions
                if (!isStringNullOrEmpty(correction)) {
                    if (lhm.containsKey(correction)) {
                        lhm.put(correction, lhm.get(correction) + (int) lr.value);
                    } else {
                        lhm.put(correction, (int) lr.value);
                    }
                }
            } else {
                lhm.put(suggestion, (int) lr.value);
            }

            if (lhm.size() >= options.count) {
                break;
            }
        }

        // sort by new doc frequency
        Map<String, Integer> orderedMap = null;
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            // retain the sort order from above
            orderedMap = lhm;
        } else {
            orderedMap = new TreeMap<String, Integer>(new Comparator<String>() {
                @Override
                public int compare(String s1, String s2) {
                    return lhm.get(s2).compareTo(lhm.get(s1));
                }
            });
            orderedMap.putAll(lhm);
        }

        for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) {
            res.add(currentToken, entry.getKey(), entry.getValue());
        }

    }
    return res;
}