Example usage for org.apache.lucene.search.spell SpellChecker SpellChecker

List of usage examples for org.apache.lucene.search.spell SpellChecker SpellChecker

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell SpellChecker SpellChecker.

Prototype

public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException 

Source Link

Document

Use the given directory as a spell checker index.

Usage

From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java

/**
 * Resolves "near" matches from the elements of the given type. If
 * {@link #across} is <code>true</code>, each element will be compared to all
 * elements of all types./*  w ww  .  ja  v a 2 s .  c  om*/
 *
 * @param uri the concept/relation class (not instance) to resolve
 * @param minDistance the minimum allowable similarity
 * @return map of uri-to-hits
 */
public MultiMap<IRI, Hit> check(IRI uri, final float minDistance) {
    MultiMap<IRI, Hit> hits = new MultiMap<>();

    // get our universe of possible hits
    Map<IRI, String> possibles = getHitUniverse(uri);
    MultiMap<String, IRI> revpos = MultiMap.flip(possibles);

    Directory ramdir = new RAMDirectory();
    StandardAnalyzer analyzer = null;
    SpellChecker speller = null;

    List<IRI> errors = new ArrayList<>();
    try {
        analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        speller = new SpellChecker(ramdir, strdist);

        StringBuilder names = new StringBuilder();
        for (String s : possibles.values()) {
            names.append(s).append("\n");
        }
        PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(names.toString()));
        speller.indexDictionary(ptd, config, true);

        List<IRI> needles = typeToURILkp.get(uri);
        for (IRI needle : needles) {
            String needlelabel = labels.get(needle);
            try {
                String[] suggestions = speller.suggestSimilar(needlelabel, 20, minDistance);
                for (String s : suggestions) {
                    // found a match, so figure out what we actually matched
                    float distance = strdist.getDistance(needlelabel, s);

                    for (IRI match : revpos.get(s)) {
                        hits.add(needle, new Hit(match, s, uriToTypeLkp.get(match), distance));
                    }
                }
            } catch (Exception e) {
                // our fallback resolution always works; it's just a ton slower
                errors.add(needle);
            }
        }
    } catch (Exception e) {
        log.error(e, e);
    } finally {
        for (Closeable c : new Closeable[] { analyzer, ramdir, speller }) {
            if (null != c) {
                try {
                    c.close();
                } catch (Exception e) {
                    log.warn(e, e);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        fallbackResolve(errors, possibles, hits, strdist, minDistance);
    }

    return hits;
}