List of usage examples for org.apache.solr.spelling SpellingResult SpellingResult
public SpellingResult()
From source file:fi.nationallibrary.ndl.solrvoikko.VoikkoSpellChecker.java
License:Open Source License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { SpellingResult result = new SpellingResult(); // TODO/* ww w . jav a2s. c o m*/ return result; }
From source file:org.dice.solrenhancements.spellchecker.DiceDirectSolrSpellChecker.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); // load the typos file if not loaded SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount != null && freq > 0) ? options.alternativeTermCount : options.count;/*from w w w . j a v a 2s. co m*/ SuggestWord[] suggestions = checker.suggestSimilar(term, count, options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // Dice functionality: Allow also loading of a list of spelling corrections to apply in addition // to the standard functionality. This allows us to configure common typos to correct that may exceed the // max edit distance used by solr if (this.typosLoaded) { String normTokenText = normalize(tokenText); String match = this.mapTypos.get(normTokenText); if (match != null) { int matchFreq = options.reader.docFreq(new Term(field, match)); // only ever suggest values that are in the index and more frequent // than the original word if (matchFreq > 0 && matchFreq > freq) { result.add(token, match, matchFreq); } } } // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount != null && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if (suggestions.length == 0 && freq == 0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; }
From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }//from ww w . j av a 2 s . co m SpellingResult res = new SpellingResult(); for (Token currentToken : options.tokens) { String tokenText = currentToken.toString(); // we need to ensure that we combine matches for different cases, and take the most common // where multiple case versions exist final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>(); final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>(); List<Token> tokensToTry = new ArrayList<Token>(); tokensToTry.add(currentToken); tokensToTry.add(newToken(currentToken, toTitleCase(tokenText))); tokensToTry.add(newToken(currentToken, tokenText.toLowerCase())); tokensToTry.add(newToken(currentToken, tokenText.toUpperCase())); for (Token newToken : tokensToTry) { if (newToken.toString().equals(tokenText) && newToken != currentToken) { continue; } // if matches current token, skip List<LookupResult> tmpSuggestions = getLookupResults(options, newToken); if (tmpSuggestions != null) { for (LookupResult lu : tmpSuggestions) { final String key = lu.key.toString().toLowerCase(); LookupResult existing = htSuggestions.get(key); if (existing != null) { // replace if more frequent if (lu.value > existing.value) { htSuggestions.put(key, lu); } htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int) lu.value); } else { htSuggestions.put(key, lu); htSuggestionCounts.put(key, (int) lu.value); } } } } List<String> suggestions = new ArrayList<String>(htSuggestions.keySet()); if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } else { Collections.sort(suggestions, new Comparator<String>() { public int compare(String sug1, String sug2) { int sug1Count = htSuggestionCounts.get(sug1); int sug2Count = htSuggestionCounts.get(sug2); return sug2Count - sug1Count; } }); } for (String match : suggestions) { LookupResult lr = htSuggestions.get(match); res.add(currentToken, lr.key.toString(), (int) lr.value); } } return res; }
From source file:org.dice.solrenhancements.spellchecker.DiceSpellCheckComponent.java
License:Apache License
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return;//w w w . j a v a 2 s . co m } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q == null) { // enforce useage of the spellcheck.q parameter - i.e. a query we can tokenize with a regular tokenizer and not // a solr query for the spell checking. Useage of the SolrQueryConverter is buggy and breaks frequently throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The spellcheck.q parameter is required."); } else { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsInteger == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsInteger.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount != null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest == null ? 0 : maxResultsForSuggest); NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate, isCorrectlySpelled); if (collate) { ModifiableSolrParams modParams = new ModifiableSolrParams(params); // SH: having both spellcheck.q and q set screws up collations for some queries, such as "java develope" modParams.remove(CommonParams.Q); //SH: Note that the collator runs a query against the DF specified field. Ideally it should //run the query against the spellchecker field but that's inaccessible here addCollationsToResponse(modParams, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap()); } NamedList response = new SimpleOrderedMap(); response.add("suggestions", suggestions); rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
From source file:org.dice.solrenhancements.spellchecker.DiceSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }/*from ww w. j av a2 s . c om*/ SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token currentToken : options.tokens) { scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); // get more than the requested suggestions as a lot get collapsed by the corrections List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10); if (suggestions == null || suggestions.size() == 0) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>(); for (LookupResult lr : suggestions) { String suggestion = lr.key.toString(); if (this.suggestionAnalyzer != null) { String correction = getAnalyzerResult(suggestion); // multiple could map to the same, so don't repeat suggestions if (!isStringNullOrEmpty(correction)) { if (lhm.containsKey(correction)) { lhm.put(correction, lhm.get(correction) + (int) lr.value); } else { lhm.put(correction, (int) lr.value); } } } else { lhm.put(suggestion, (int) lr.value); } if (lhm.size() >= options.count) { break; } } // sort by new doc frequency Map<String, Integer> orderedMap = null; if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { // retain the sort order from above orderedMap = lhm; } else { orderedMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String s1, String s2) { return lhm.get(s2).compareTo(lhm.get(s1)); } }); orderedMap.putAll(lhm); } for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) { res.add(currentToken, entry.getKey(), entry.getValue()); } } return res; }