Example usage for org.apache.lucene.search.spell StringDistance getDistance

List of usage examples for org.apache.lucene.search.spell StringDistance getDistance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell StringDistance getDistance.

Prototype

public float getDistance(String s1, String s2);

Source Link

Document

Returns a float between 0 and 1 based on how similar the specified strings are to one another.

Usage

From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java

/**
 * Resolves terms that could not be resolved with the lucene approach. This
 * brute-force function is significantly slower, but always works
 *
 * @param needles the URIs that produced errors in lucene
 * @param possibles the set of all possible solutions
 * @param hits populate this multimap with matches
 * @param levy the string distance object to use to measure hits
 * @param minDistance the minimum similarity measure
 *//*from   w  ww.  j a  va  2 s. c  o  m*/
private void fallbackResolve(Collection<IRI> needles, Map<IRI, String> possibles, MultiMap<IRI, Hit> hits,
        StringDistance levy, float minDistance) {
    log.debug("falling back to resolve " + needles.size() + " items");

    for (IRI needle : needles) {
        String needlelabel = labels.get(needle);

        for (Map.Entry<IRI, String> en : possibles.entrySet()) {
            IRI match = en.getKey();
            String matchlabel = en.getValue();

            float distance = levy.getDistance(needlelabel, matchlabel);
            if (distance >= minDistance && !match.equals(needle)) {
                hits.add(needle, new Hit(match, matchlabel, uriToTypeLkp.get(match), distance));
            }
        }
    }
}

From source file:org.apache.solr.spelling.SolrSpellChecker.java

License:Apache License

/**
 * Integrate spelling suggestions from the various shards in a distributed environment.
 *///from   w w  w  . j av  a  2  s  . c o m
public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count,
        boolean extendedResults) {
    float min = 0.5f;
    try {
        min = getAccuracy();
    } catch (UnsupportedOperationException uoe) {
        //just use .5 as a default
    }

    StringDistance sd = null;
    try {
        sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
    } catch (UnsupportedOperationException uoe) {
        sd = new LevensteinDistance();
    }

    SpellingResult result = new SpellingResult();
    for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
        String original = entry.getKey();

        //Only use this suggestion if all shards reported it as misspelled.
        Integer numShards = mergeData.origVsShards.get(original);
        if (numShards < mergeData.totalNumberShardResponses) {
            continue;
        }

        HashSet<String> suggested = entry.getValue();
        SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
        for (String suggestion : suggested) {
            SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
            sug.score = sd.getDistance(original, sug.string);
            if (sug.score < min)
                continue;
            sugQueue.insertWithOverflow(sug);
            if (sugQueue.size() == numSug) {
                // if queue full, maintain the minScore score
                min = sugQueue.top().score;
            }
        }

        // create token
        SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
        Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());

        // get top 'count' suggestions out of 'sugQueue.size()' candidates
        SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
        // skip the first sugQueue.size() - count elements
        for (int k = 0; k < sugQueue.size() - count; k++)
            sugQueue.pop();
        // now collect the top 'count' responses
        for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
            suggestions[k] = sugQueue.pop();
        }

        if (extendedResults) {
            Integer o = mergeData.origVsFreq.get(original);
            if (o != null)
                result.addFrequency(token, o);
            for (SuggestWord word : suggestions)
                result.add(token, word.string, word.freq);
        } else {
            List<String> words = new ArrayList<String>(sugQueue.size());
            for (SuggestWord word : suggestions)
                words.add(word.string);
            result.add(token, words);
        }
    }
    return result;
}