List of usage examples for org.apache.solr.spelling SpellingResult add
public void add(Token token, String suggestion, int docFreq)
From source file:org.dice.solrenhancements.spellchecker.DiceDirectSolrSpellChecker.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); // load the typos file if not loaded SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount != null && freq > 0) ? options.alternativeTermCount : options.count;// w ww . jav a 2s.com SuggestWord[] suggestions = checker.suggestSimilar(term, count, options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // Dice functionality: Allow also loading of a list of spelling corrections to apply in addition // to the standard functionality. This allows us to configure common typos to correct that may exceed the // max edit distance used by solr if (this.typosLoaded) { String normTokenText = normalize(tokenText); String match = this.mapTypos.get(normTokenText); if (match != null) { int matchFreq = options.reader.docFreq(new Term(field, match)); // only ever suggest values that are in the index and more frequent // than the original word if (matchFreq > 0 && matchFreq > freq) { result.add(token, match, matchFreq); } } } // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount != null && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if (suggestions.length == 0 && freq == 0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; }
From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }/*from w w w . j a v a 2 s.c o m*/ SpellingResult res = new SpellingResult(); for (Token currentToken : options.tokens) { String tokenText = currentToken.toString(); // we need to ensure that we combine matches for different cases, and take the most common // where multiple case versions exist final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>(); final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>(); List<Token> tokensToTry = new ArrayList<Token>(); tokensToTry.add(currentToken); tokensToTry.add(newToken(currentToken, toTitleCase(tokenText))); tokensToTry.add(newToken(currentToken, tokenText.toLowerCase())); tokensToTry.add(newToken(currentToken, tokenText.toUpperCase())); for (Token newToken : tokensToTry) { if (newToken.toString().equals(tokenText) && newToken != currentToken) { continue; } // if matches current token, skip List<LookupResult> tmpSuggestions = getLookupResults(options, newToken); if (tmpSuggestions != null) { for (LookupResult lu : tmpSuggestions) { final String key = lu.key.toString().toLowerCase(); LookupResult existing = htSuggestions.get(key); if (existing != null) { // replace if more frequent if (lu.value > existing.value) { htSuggestions.put(key, lu); } htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int) lu.value); } else { htSuggestions.put(key, lu); htSuggestionCounts.put(key, (int) lu.value); } } } } List<String> suggestions = new ArrayList<String>(htSuggestions.keySet()); if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } else { Collections.sort(suggestions, new Comparator<String>() { public int compare(String sug1, String sug2) { int sug1Count = htSuggestionCounts.get(sug1); int sug2Count = htSuggestionCounts.get(sug2); return sug2Count - sug1Count; } }); } for (String match : suggestions) { LookupResult lr = htSuggestions.get(match); res.add(currentToken, lr.key.toString(), (int) lr.value); } } return res; }
From source file:org.dice.solrenhancements.spellchecker.DiceSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }//from w ww.ja v a 2 s . c o m SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token currentToken : options.tokens) { scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); // get more than the requested suggestions as a lot get collapsed by the corrections List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10); if (suggestions == null || suggestions.size() == 0) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>(); for (LookupResult lr : suggestions) { String suggestion = lr.key.toString(); if (this.suggestionAnalyzer != null) { String correction = getAnalyzerResult(suggestion); // multiple could map to the same, so don't repeat suggestions if (!isStringNullOrEmpty(correction)) { if (lhm.containsKey(correction)) { lhm.put(correction, lhm.get(correction) + (int) lr.value); } else { lhm.put(correction, (int) lr.value); } } } else { lhm.put(suggestion, (int) lr.value); } if (lhm.size() >= options.count) { break; } } // sort by new doc frequency Map<String, Integer> orderedMap = null; if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { // retain the sort order from above orderedMap = lhm; } else { orderedMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String s1, String s2) { return lhm.get(s2).compareTo(lhm.get(s1)); } }); orderedMap.putAll(lhm); } for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) { res.add(currentToken, entry.getKey(), entry.getValue()); } } return res; }