List of usage examples for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR
SuggestMode SUGGEST_MORE_POPULAR
To view the source code for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR.
Click Source Link
From source file:de.blizzy.documentr.search.PageFinder.java
License:Open Source License
private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication, IndexSearcher searcher) throws IOException, ParseException, TimeoutException { List<WordPosition> words = Lists.newArrayList(); TokenStream tokenStream = null;// w ww .j ava 2 s . c o m try { tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.addAttribute(OffsetAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); String text = charTerm.toString(); if (StringUtils.isNotBlank(text)) { OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class); WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset()); words.add(word); } } tokenStream.end(); } finally { Util.closeQuietly(tokenStream); } Collections.reverse(words); StringBuilder suggestedSearchText = new StringBuilder(searchText); StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText); boolean foundSuggestions = false; String now = String.valueOf(System.currentTimeMillis()); String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ DirectSpellChecker spellChecker = new DirectSpellChecker(); IndexReader reader = searcher.getIndexReader(); for (WordPosition word : words) { Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord()); SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader, SuggestMode.SUGGEST_MORE_POPULAR); if (suggestions.length > 0) { String suggestedWord = suggestions[0].string; int start = word.getStart(); int end = word.getEnd(); suggestedSearchText.replace(start, end, suggestedWord); suggestedSearchTextHtml.replace(start, end, startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker); foundSuggestions = true; } } if (foundSuggestions) { String suggestion = suggestedSearchText.toString(); SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher); int suggestionTotalHits = suggestionResult.getTotalHits(); if (suggestionTotalHits > 0) { String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString()) .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$ return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits); } } return null; }
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication, IndexSearcher searcher) throws IOException, ParseException, TimeoutException { List<WordPosition> words = Lists.newArrayList(); TokenStream tokenStream = null;//from www. j av a 2 s . c om try { tokenStream = analyzer.tokenStream(ALL_TEXT_SUGGESTIONS, new StringReader(searchText)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.addAttribute(OffsetAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); String text = charTerm.toString(); if (StringUtils.isNotBlank(text)) { OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class); WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset()); words.add(word); } } tokenStream.end(); } finally { Closeables.closeQuietly(tokenStream); } Collections.reverse(words); StringBuilder suggestedSearchText = new StringBuilder(searchText); StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText); boolean foundSuggestions = false; String now = String.valueOf(System.currentTimeMillis()); String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ DirectSpellChecker spellChecker = new DirectSpellChecker(); IndexReader reader = searcher.getIndexReader(); for (WordPosition word : words) { Term term = new Term(ALL_TEXT_SUGGESTIONS, word.getWord()); SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader, SuggestMode.SUGGEST_MORE_POPULAR); if (suggestions.length > 0) { String suggestedWord = suggestions[0].string; int start = word.getStart(); int end = word.getEnd(); suggestedSearchText.replace(start, end, suggestedWord); suggestedSearchTextHtml.replace(start, end, startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker); foundSuggestions = true; } } if (foundSuggestions) { String suggestion = suggestedSearchText.toString(); SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher); int suggestionTotalHits = suggestionResult.getTotalHits(); if (suggestionTotalHits > 0) { String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString()) .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$ return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits); } } return null; }
From source file:org.apache.solr.handler.component.SpellCheckComponent.java
License:Apache License
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return;//from w w w .j a va 2 s . c o m } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q != null) { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } else { q = rb.getQueryString(); if (q == null) { q = params.get(CommonParams.Q); } tokens = queryConverter.convert(q); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsInteger == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsInteger.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount != null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest == null ? 0 : maxResultsForSuggest); NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate, isCorrectlySpelled); if (collate) { addCollationsToResponse(params, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap()); } NamedList response = new SimpleOrderedMap(); response.add("suggestions", suggestions); rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
From source file:org.apache.solr.spelling.suggest.Suggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }/*from w ww .j a v a 2 s.c o m*/ SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token t : options.tokens) { scratch.chars = t.buffer(); scratch.offset = 0; scratch.length = t.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { res.add(t, lr.key.toString(), (int) lr.value); } } return res; }
From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }//from w w w. j av a2 s .c o m SpellingResult res = new SpellingResult(); for (Token currentToken : options.tokens) { String tokenText = currentToken.toString(); // we need to ensure that we combine matches for different cases, and take the most common // where multiple case versions exist final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>(); final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>(); List<Token> tokensToTry = new ArrayList<Token>(); tokensToTry.add(currentToken); tokensToTry.add(newToken(currentToken, toTitleCase(tokenText))); tokensToTry.add(newToken(currentToken, tokenText.toLowerCase())); tokensToTry.add(newToken(currentToken, tokenText.toUpperCase())); for (Token newToken : tokensToTry) { if (newToken.toString().equals(tokenText) && newToken != currentToken) { continue; } // if matches current token, skip List<LookupResult> tmpSuggestions = getLookupResults(options, newToken); if (tmpSuggestions != null) { for (LookupResult lu : tmpSuggestions) { final String key = lu.key.toString().toLowerCase(); LookupResult existing = htSuggestions.get(key); if (existing != null) { // replace if more frequent if (lu.value > existing.value) { htSuggestions.put(key, lu); } htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int) lu.value); } else { htSuggestions.put(key, lu); htSuggestionCounts.put(key, (int) lu.value); } } } } List<String> suggestions = new ArrayList<String>(htSuggestions.keySet()); if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } else { Collections.sort(suggestions, new Comparator<String>() { public int compare(String sug1, String sug2) { int sug1Count = htSuggestionCounts.get(sug1); int sug2Count = htSuggestionCounts.get(sug2); return sug2Count - sug1Count; } }); } for (String match : suggestions) { LookupResult lr = htSuggestions.get(match); res.add(currentToken, lr.key.toString(), (int) lr.value); } } return res; }
From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java
License:Apache License
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) { CharsRef scratch = new CharsRef(); scratch.chars = currentToken.buffer(); scratch.offset = 0;//from w ww . j a v a2 s .com scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null || suggestions.size() == 0) { return null; } return suggestions; }
From source file:org.dice.solrenhancements.spellchecker.DiceSpellCheckComponent.java
License:Apache License
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return;// w ww . j a va 2s . c o m } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q == null) { // enforce useage of the spellcheck.q parameter - i.e. a query we can tokenize with a regular tokenizer and not // a solr query for the spell checking. Useage of the SolrQueryConverter is buggy and breaks frequently throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The spellcheck.q parameter is required."); } else { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsInteger == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsInteger.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount != null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest == null ? 0 : maxResultsForSuggest); NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate, isCorrectlySpelled); if (collate) { ModifiableSolrParams modParams = new ModifiableSolrParams(params); // SH: having both spellcheck.q and q set screws up collations for some queries, such as "java develope" modParams.remove(CommonParams.Q); //SH: Note that the collator runs a query against the DF specified field. Ideally it should //run the query against the spellchecker field but that's inaccessible here addCollationsToResponse(modParams, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap()); } NamedList response = new SimpleOrderedMap(); response.add("suggestions", suggestions); rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
From source file:org.dice.solrenhancements.spellchecker.DiceSuggester.java
License:Apache License
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; }//from w ww . j a v a 2 s . co m SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token currentToken : options.tokens) { scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); // get more than the requested suggestions as a lot get collapsed by the corrections List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10); if (suggestions == null || suggestions.size() == 0) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>(); for (LookupResult lr : suggestions) { String suggestion = lr.key.toString(); if (this.suggestionAnalyzer != null) { String correction = getAnalyzerResult(suggestion); // multiple could map to the same, so don't repeat suggestions if (!isStringNullOrEmpty(correction)) { if (lhm.containsKey(correction)) { lhm.put(correction, lhm.get(correction) + (int) lr.value); } else { lhm.put(correction, (int) lr.value); } } } else { lhm.put(suggestion, (int) lr.value); } if (lhm.size() >= options.count) { break; } } // sort by new doc frequency Map<String, Integer> orderedMap = null; if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { // retain the sort order from above orderedMap = lhm; } else { orderedMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String s1, String s2) { return lhm.get(s2).compareTo(lhm.get(s1)); } }); orderedMap.putAll(lhm); } for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) { res.add(currentToken, entry.getKey(), entry.getValue()); } } return res; }
From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java
License:Apache License
private static SuggestMode resolveSuggestMode(String suggestMode) { suggestMode = suggestMode.toLowerCase(Locale.US); if ("missing".equals(suggestMode)) { return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; } else if ("popular".equals(suggestMode)) { return SuggestMode.SUGGEST_MORE_POPULAR; } else if ("always".equals(suggestMode)) { return SuggestMode.SUGGEST_ALWAYS; } else {//from ww w . ja v a 2 s . com throw new IllegalArgumentException("Illegal suggest mode " + suggestMode); } }
From source file:org.elasticsearch.search.suggest.phrase.NoisyChannelSpellCheckerTests.java
License:Apache License
@Test public void testMarvelHeros() throws IOException { RAMDirectory dir = new RAMDirectory(); Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); mapping.put("body_ngram", new Analyzer() { @Override//from www .ja va2s .co m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader); ShingleFilter tf = new ShingleFilter(t, 2, 3); tf.setOutputUnigrams(false); return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, tf)); } }); mapping.put("body", new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader); return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, t)); } }); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_41), mapping); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_41, wrapper); IndexWriter writer = new IndexWriter(dir, conf); BufferedReader reader = new BufferedReader(new InputStreamReader( NoisyChannelSpellCheckerTests.class.getResourceAsStream("/config/names.txt"), Charsets.UTF_8)); String line = null; while ((line = reader.readLine()) != null) { Document doc = new Document(); doc.add(new Field("body", line, TextField.TYPE_NOT_STORED)); doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED)); writer.addDocument(doc); } DirectoryReader ir = DirectoryReader.open(writer, false); WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker(); spellchecker.setMinQueryLength(1); DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5); Result result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2); Correction[] corrections = result.corrections; assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american <em>ace</em>")); assertThat(result.cutoffScore, greaterThan(0d)); result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1); corrections = result.corrections; assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ame")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame")); assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE)); suggester = new NoisyChannelSpellChecker(0.85); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections; assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel")); assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr</em> the <em>god</em> jewel")); assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(), equalTo("xor the <em>god</em> jewel")); assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorn</em> the <em>god</em> jewel")); assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr</em> the got jewel")); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2).corrections; assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel")); assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel")); // Test some of the highlighting corner cases suggester = new NoisyChannelSpellChecker(0.85); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2).corrections; assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel")); assertThat(corrections[3].join(space).utf8ToString(), equalTo("xor teh god jewel")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr the god</em> jewel")); assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(), equalTo("xor <em>the god</em> jewel")); assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorn the god</em> jewel")); assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("xor teh <em>god</em> jewel")); // test synonyms Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader); TokenFilter filter = new LowerCaseFilter(Version.LUCENE_41, t); try { SolrSynonymParser parser = new SolrSynonymParser(true, false, new WhitespaceAnalyzer(Version.LUCENE_41)); ((SolrSynonymParser) parser).parse( new StringReader("usa => usa, america, american\nursa => usa, america, american")); filter = new SynonymFilter(filter, parser.build(), true); } catch (Exception e) { throw new RuntimeException(e); } return new TokenStreamComponents(t, filter); } }; spellchecker.setAccuracy(0.0f); spellchecker.setMinPrefix(1); spellchecker.setMinQueryLength(1); suggester = new NoisyChannelSpellChecker(0.85); wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections; assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>")); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body")); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections; assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>")); // Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body")); corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections; assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>")); }