Example usage for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR

List of usage examples for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR.

Prototype

SuggestMode SUGGEST_MORE_POPULAR

To view the source code for org.apache.lucene.search.spell SuggestMode SUGGEST_MORE_POPULAR.

Click Source Link

Document

Return only suggested words that are as frequent or more frequent than the searched word

Usage

From source file:de.blizzy.documentr.search.PageFinder.java

License:Open Source License

private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication,
        IndexSearcher searcher) throws IOException, ParseException, TimeoutException {

    List<WordPosition> words = Lists.newArrayList();

    TokenStream tokenStream = null;// w  ww .j ava  2  s .  c o  m
    try {
        tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText));
        tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
            String text = charTerm.toString();
            if (StringUtils.isNotBlank(text)) {
                OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class);
                WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset());
                words.add(word);
            }
        }
        tokenStream.end();
    } finally {
        Util.closeQuietly(tokenStream);
    }

    Collections.reverse(words);

    StringBuilder suggestedSearchText = new StringBuilder(searchText);
    StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText);
    boolean foundSuggestions = false;
    String now = String.valueOf(System.currentTimeMillis());
    String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    DirectSpellChecker spellChecker = new DirectSpellChecker();
    IndexReader reader = searcher.getIndexReader();
    for (WordPosition word : words) {
        Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord());
        SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader,
                SuggestMode.SUGGEST_MORE_POPULAR);
        if (suggestions.length > 0) {
            String suggestedWord = suggestions[0].string;
            int start = word.getStart();
            int end = word.getEnd();
            suggestedSearchText.replace(start, end, suggestedWord);
            suggestedSearchTextHtml.replace(start, end,
                    startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker);

            foundSuggestions = true;
        }
    }

    if (foundSuggestions) {
        String suggestion = suggestedSearchText.toString();
        SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher);
        int suggestionTotalHits = suggestionResult.getTotalHits();
        if (suggestionTotalHits > 0) {
            String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString())
                    .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$
            return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits);
        }
    }

    return null;
}

From source file:de.blizzy.documentr.search.PageIndex.java

License:Open Source License

private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication,
        IndexSearcher searcher) throws IOException, ParseException, TimeoutException {

    List<WordPosition> words = Lists.newArrayList();

    TokenStream tokenStream = null;//from www.  j  av  a 2 s  .  c om
    try {
        tokenStream = analyzer.tokenStream(ALL_TEXT_SUGGESTIONS, new StringReader(searchText));
        tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
            String text = charTerm.toString();
            if (StringUtils.isNotBlank(text)) {
                OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class);
                WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset());
                words.add(word);
            }
        }
        tokenStream.end();
    } finally {
        Closeables.closeQuietly(tokenStream);
    }

    Collections.reverse(words);

    StringBuilder suggestedSearchText = new StringBuilder(searchText);
    StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText);
    boolean foundSuggestions = false;
    String now = String.valueOf(System.currentTimeMillis());
    String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    DirectSpellChecker spellChecker = new DirectSpellChecker();
    IndexReader reader = searcher.getIndexReader();
    for (WordPosition word : words) {
        Term term = new Term(ALL_TEXT_SUGGESTIONS, word.getWord());
        SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader,
                SuggestMode.SUGGEST_MORE_POPULAR);
        if (suggestions.length > 0) {
            String suggestedWord = suggestions[0].string;
            int start = word.getStart();
            int end = word.getEnd();
            suggestedSearchText.replace(start, end, suggestedWord);
            suggestedSearchTextHtml.replace(start, end,
                    startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker);

            foundSuggestions = true;
        }
    }

    if (foundSuggestions) {
        String suggestion = suggestedSearchText.toString();
        SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher);
        int suggestionTotalHits = suggestionResult.getTotalHits();
        if (suggestionTotalHits > 0) {
            String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString())
                    .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$
            return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits);
        }
    }

    return null;
}

From source file:org.apache.solr.handler.component.SpellCheckComponent.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
        return;//from   w  w w .j  a va  2  s  .  c  o  m
    }
    boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
    String q = params.get(SPELLCHECK_Q);
    SolrSpellChecker spellChecker = getSpellChecker(params);
    Collection<Token> tokens = null;

    if (q != null) {
        //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
        tokens = getTokens(q, spellChecker.getQueryAnalyzer());
    } else {
        q = rb.getQueryString();
        if (q == null) {
            q = params.get(CommonParams.Q);
        }
        tokens = queryConverter.convert(q);
    }
    if (tokens != null && tokens.isEmpty() == false) {
        if (spellChecker != null) {
            int count = params.getInt(SPELLCHECK_COUNT, 1);
            boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
            boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
            boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
            float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
            Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT);
            Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
            ModifiableSolrParams customParams = new ModifiableSolrParams();
            for (String checkerName : getDictionaryNames(params)) {
                customParams.add(getCustomParams(checkerName, params));
            }

            Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
            long hits = 0;
            if (hitsInteger == null) {
                hits = rb.getNumberDocumentsFound();
            } else {
                hits = hitsInteger.longValue();
            }
            SpellingResult spellingResult = null;
            if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
                SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
                if (onlyMorePopular) {
                    suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
                } else if (alternativeTermCount != null) {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }

                IndexReader reader = rb.req.getSearcher().getIndexReader();
                SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount,
                        suggestMode, extendedResults, accuracy, customParams);
                spellingResult = spellChecker.getSuggestions(options);
            } else {
                spellingResult = new SpellingResult();
            }
            boolean isCorrectlySpelled = hits > (maxResultsForSuggest == null ? 0 : maxResultsForSuggest);
            NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate,
                    isCorrectlySpelled);
            if (collate) {
                addCollationsToResponse(params, spellingResult, rb, q, suggestions,
                        spellChecker.isSuggestionsMayOverlap());
            }
            NamedList response = new SimpleOrderedMap();
            response.add("suggestions", suggestions);
            rb.rsp.add("spellcheck", response);

        } else {
            throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: "
                    + getDictionaryNameAsSingleString(getDictionaryNames(params)));
        }
    }
}

From source file:org.apache.solr.spelling.suggest.Suggester.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }/*from  w  ww  .j a v  a 2  s.c  o m*/
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();
    for (Token t : options.tokens) {
        scratch.chars = t.buffer();
        scratch.offset = 0;
        scratch.length = t.length();
        boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
                && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester);
        List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
        if (suggestions == null) {
            continue;
        }
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        }
        for (LookupResult lr : suggestions) {
            res.add(t, lr.key.toString(), (int) lr.value);
        }
    }
    return res;
}

From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }//from   w  w w. j av a2 s  .c o m

    SpellingResult res = new SpellingResult();
    for (Token currentToken : options.tokens) {
        String tokenText = currentToken.toString();

        // we need to ensure that we combine matches for different cases, and take the most common
        // where multiple case versions exist
        final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>();
        final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>();

        List<Token> tokensToTry = new ArrayList<Token>();
        tokensToTry.add(currentToken);
        tokensToTry.add(newToken(currentToken, toTitleCase(tokenText)));
        tokensToTry.add(newToken(currentToken, tokenText.toLowerCase()));
        tokensToTry.add(newToken(currentToken, tokenText.toUpperCase()));

        for (Token newToken : tokensToTry) {

            if (newToken.toString().equals(tokenText) && newToken != currentToken) {
                continue;
            }
            // if matches current token, skip
            List<LookupResult> tmpSuggestions = getLookupResults(options, newToken);
            if (tmpSuggestions != null) {
                for (LookupResult lu : tmpSuggestions) {
                    final String key = lu.key.toString().toLowerCase();
                    LookupResult existing = htSuggestions.get(key);
                    if (existing != null) {
                        // replace if more frequent
                        if (lu.value > existing.value) {
                            htSuggestions.put(key, lu);
                        }
                        htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int) lu.value);
                    } else {
                        htSuggestions.put(key, lu);
                        htSuggestionCounts.put(key, (int) lu.value);
                    }
                }
            }
        }

        List<String> suggestions = new ArrayList<String>(htSuggestions.keySet());
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        } else {
            Collections.sort(suggestions, new Comparator<String>() {
                public int compare(String sug1, String sug2) {
                    int sug1Count = htSuggestionCounts.get(sug1);
                    int sug2Count = htSuggestionCounts.get(sug2);
                    return sug2Count - sug1Count;
                }
            });
        }

        for (String match : suggestions) {
            LookupResult lr = htSuggestions.get(match);
            res.add(currentToken, lr.key.toString(), (int) lr.value);
        }

    }
    return res;
}

From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java

License:Apache License

private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) {
    CharsRef scratch = new CharsRef();
    scratch.chars = currentToken.buffer();
    scratch.offset = 0;//from w ww .  j a  v a2  s  .com
    scratch.length = currentToken.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
            && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester);

    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null || suggestions.size() == 0) {
        return null;
    }

    return suggestions;
}

From source file:org.dice.solrenhancements.spellchecker.DiceSpellCheckComponent.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
        return;//  w ww .  j  a va 2s .  c  o  m
    }
    boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
    String q = params.get(SPELLCHECK_Q);
    SolrSpellChecker spellChecker = getSpellChecker(params);
    Collection<Token> tokens = null;

    if (q == null) {
        // enforce useage of the spellcheck.q parameter - i.e. a query we can tokenize with a regular tokenizer and not
        // a solr query for the spell checking. Useage of the SolrQueryConverter is buggy and breaks frequently
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The spellcheck.q parameter is required.");
    } else {
        //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
        tokens = getTokens(q, spellChecker.getQueryAnalyzer());
    }
    if (tokens != null && tokens.isEmpty() == false) {
        if (spellChecker != null) {
            int count = params.getInt(SPELLCHECK_COUNT, 1);
            boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
            boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
            boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
            float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
            Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT);
            Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
            ModifiableSolrParams customParams = new ModifiableSolrParams();
            for (String checkerName : getDictionaryNames(params)) {
                customParams.add(getCustomParams(checkerName, params));
            }

            Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
            long hits = 0;
            if (hitsInteger == null) {
                hits = rb.getNumberDocumentsFound();
            } else {
                hits = hitsInteger.longValue();
            }
            SpellingResult spellingResult = null;
            if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
                SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
                if (onlyMorePopular) {
                    suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
                } else if (alternativeTermCount != null) {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }

                IndexReader reader = rb.req.getSearcher().getIndexReader();
                SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount,
                        suggestMode, extendedResults, accuracy, customParams);
                spellingResult = spellChecker.getSuggestions(options);
            } else {
                spellingResult = new SpellingResult();
            }
            boolean isCorrectlySpelled = hits > (maxResultsForSuggest == null ? 0 : maxResultsForSuggest);
            NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate,
                    isCorrectlySpelled);
            if (collate) {
                ModifiableSolrParams modParams = new ModifiableSolrParams(params);
                // SH: having both spellcheck.q and q set screws up collations for some queries, such as "java develope"
                modParams.remove(CommonParams.Q);

                //SH: Note that the collator runs a query against the DF specified field. Ideally it should
                //run the query against the spellchecker field but that's inaccessible here
                addCollationsToResponse(modParams, spellingResult, rb, q, suggestions,
                        spellChecker.isSuggestionsMayOverlap());
            }
            NamedList response = new SimpleOrderedMap();
            response.add("suggestions", suggestions);
            rb.rsp.add("spellcheck", response);

        } else {
            throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: "
                    + getDictionaryNameAsSingleString(getDictionaryNames(params)));
        }
    }
}

From source file:org.dice.solrenhancements.spellchecker.DiceSuggester.java

License:Apache License

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }//from w ww  . j a  v  a 2  s . co  m
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();

    for (Token currentToken : options.tokens) {
        scratch.chars = currentToken.buffer();
        scratch.offset = 0;
        scratch.length = currentToken.length();
        boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
                && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester);

        // get more than the requested suggestions as a lot get collapsed by the corrections
        List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10);
        if (suggestions == null || suggestions.size() == 0) {
            continue;
        }

        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        }

        final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>();
        for (LookupResult lr : suggestions) {
            String suggestion = lr.key.toString();
            if (this.suggestionAnalyzer != null) {
                String correction = getAnalyzerResult(suggestion);
                // multiple could map to the same, so don't repeat suggestions
                if (!isStringNullOrEmpty(correction)) {
                    if (lhm.containsKey(correction)) {
                        lhm.put(correction, lhm.get(correction) + (int) lr.value);
                    } else {
                        lhm.put(correction, (int) lr.value);
                    }
                }
            } else {
                lhm.put(suggestion, (int) lr.value);
            }

            if (lhm.size() >= options.count) {
                break;
            }
        }

        // sort by new doc frequency
        Map<String, Integer> orderedMap = null;
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            // retain the sort order from above
            orderedMap = lhm;
        } else {
            orderedMap = new TreeMap<String, Integer>(new Comparator<String>() {
                @Override
                public int compare(String s1, String s2) {
                    return lhm.get(s2).compareTo(lhm.get(s1));
                }
            });
            orderedMap.putAll(lhm);
        }

        for (Map.Entry<String, Integer> entry : orderedMap.entrySet()) {
            res.add(currentToken, entry.getKey(), entry.getValue());
        }

    }
    return res;
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java

License:Apache License

private static SuggestMode resolveSuggestMode(String suggestMode) {
    suggestMode = suggestMode.toLowerCase(Locale.US);
    if ("missing".equals(suggestMode)) {
        return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
    } else if ("popular".equals(suggestMode)) {
        return SuggestMode.SUGGEST_MORE_POPULAR;
    } else if ("always".equals(suggestMode)) {
        return SuggestMode.SUGGEST_ALWAYS;
    } else {//from ww w  .  ja v  a 2 s . com
        throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
    }
}

From source file:org.elasticsearch.search.suggest.phrase.NoisyChannelSpellCheckerTests.java

License:Apache License

@Test
public void testMarvelHeros() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    mapping.put("body_ngram", new Analyzer() {

        @Override//from  www  .ja  va2s  .co  m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            ShingleFilter tf = new ShingleFilter(t, 2, 3);
            tf.setOutputUnigrams(false);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, tf));
        }

    });

    mapping.put("body", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, t));
        }

    });
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_41),
            mapping);

    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_41, wrapper);
    IndexWriter writer = new IndexWriter(dir, conf);
    BufferedReader reader = new BufferedReader(new InputStreamReader(
            NoisyChannelSpellCheckerTests.class.getResourceAsStream("/config/names.txt"), Charsets.UTF_8));
    String line = null;
    while ((line = reader.readLine()) != null) {
        Document doc = new Document();
        doc.add(new Field("body", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
    }

    DirectoryReader ir = DirectoryReader.open(writer, false);
    WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
            new BytesRef(" "), 0.5f);

    NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
    DirectSpellChecker spellchecker = new DirectSpellChecker();
    spellchecker.setMinQueryLength(1);
    DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body",
            SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
    Result result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body",
            wordScorer, 1, 2);
    Correction[] corrections = result.corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ace"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american <em>ace</em>"));
    assertThat(result.cutoffScore, greaterThan(0d));

    result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body",
            wordScorer, 0, 1);
    corrections = result.corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ame"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
    assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE));

    suggester = new NoisyChannelSpellChecker(0.85);
    wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
            new BytesRef(" "), 0.5f);
    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir,
            "body", wordScorer, 0, 2).corrections;
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
    assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
    assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(),
            equalTo("<em>xorr</em> the <em>god</em> jewel"));
    assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(),
            equalTo("xor the <em>god</em> jewel"));
    assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(),
            equalTo("<em>xorn</em> the <em>god</em> jewel"));
    assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(),
            equalTo("<em>xorr</em> the got jewel"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir,
            "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
    assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
    assertThat(corrections[3].join(space).utf8ToString(), equalTo("xorr the got jewel"));

    // Test some of the highlighting corner cases
    suggester = new NoisyChannelSpellChecker(0.85);
    wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
            new BytesRef(" "), 0.5f);
    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir,
            "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
    assertThat(corrections[2].join(space).utf8ToString(), equalTo("xorn the god jewel"));
    assertThat(corrections[3].join(space).utf8ToString(), equalTo("xor teh god jewel"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(),
            equalTo("<em>xorr the god</em> jewel"));
    assertThat(corrections[1].join(space, preTag, postTag).utf8ToString(),
            equalTo("xor <em>the god</em> jewel"));
    assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(),
            equalTo("<em>xorn the god</em> jewel"));
    assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(),
            equalTo("xor teh <em>god</em> jewel"));

    // test synonyms

    Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            TokenFilter filter = new LowerCaseFilter(Version.LUCENE_41, t);
            try {
                SolrSynonymParser parser = new SolrSynonymParser(true, false,
                        new WhitespaceAnalyzer(Version.LUCENE_41));
                ((SolrSynonymParser) parser).parse(
                        new StringReader("usa => usa, america, american\nursa => usa, america, american"));
                filter = new SynonymFilter(filter, parser.build(), true);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            return new TokenStreamComponents(t, filter);
        }
    };

    spellchecker.setAccuracy(0.0f);
    spellchecker.setMinPrefix(1);
    spellchecker.setMinQueryLength(1);
    suggester = new NoisyChannelSpellChecker(0.85);
    wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
            new BytesRef(" "), 0.5f);
    corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));

    generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
            10, null, analyzer, MultiFields.getTerms(ir, "body"));
    corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));

    // Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
    generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
            10, null, analyzer, MultiFields.getTerms(ir, "body"));
    corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
    assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
}