Example usage for org.apache.lucene.search.spell DirectSpellChecker INTERNAL_LEVENSHTEIN

List of usage examples for org.apache.lucene.search.spell DirectSpellChecker INTERNAL_LEVENSHTEIN

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell DirectSpellChecker INTERNAL_LEVENSHTEIN.

Prototype

StringDistance INTERNAL_LEVENSHTEIN

To view the source code for org.apache.lucene.search.spell DirectSpellChecker INTERNAL_LEVENSHTEIN.

Click Source Link

Document

The default StringDistance, Damerau-Levenshtein distance implemented internally via LevenshteinAutomata .

Usage

From source file:org.apache.solr.spelling.DirectSolrSpellChecker.java

License:Apache License

@Override
public String init(NamedList config, SolrCore core) {
    LOG.info("init: " + config);
    String name = super.init(config, core);

    Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    String compClass = (String) config.get(COMPARATOR_CLASS);
    if (compClass != null) {
        if (compClass.equalsIgnoreCase(SCORE_COMP))
            comp = SuggestWordQueue.DEFAULT_COMPARATOR;
        else if (compClass.equalsIgnoreCase(FREQ_COMP))
            comp = new SuggestWordFrequencyComparator();
        else //must be a FQCN
            comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
    }//from  ww  w. j a v a 2 s.co  m

    StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    String distClass = (String) config.get(STRING_DISTANCE);
    if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
        sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

    float minAccuracy = DEFAULT_ACCURACY;
    Float accuracy = (Float) config.get(ACCURACY);
    if (accuracy != null)
        minAccuracy = accuracy;

    int maxEdits = DEFAULT_MAXEDITS;
    Integer edits = (Integer) config.get(MAXEDITS);
    if (edits != null)
        maxEdits = edits;

    int minPrefix = DEFAULT_MINPREFIX;
    Integer prefix = (Integer) config.get(MINPREFIX);
    if (prefix != null)
        minPrefix = prefix;

    int maxInspections = DEFAULT_MAXINSPECTIONS;
    Integer inspections = (Integer) config.get(MAXINSPECTIONS);
    if (inspections != null)
        maxInspections = inspections;

    float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
    Float threshold = (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
    if (threshold != null)
        minThreshold = threshold;

    int minQueryLength = DEFAULT_MINQUERYLENGTH;
    Integer queryLength = (Integer) config.get(MINQUERYLENGTH);
    if (queryLength != null)
        minQueryLength = queryLength;

    float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
    Float queryFreq = (Float) config.get(MAXQUERYFREQUENCY);
    if (queryFreq != null)
        maxQueryFrequency = queryFreq;

    checker.setComparator(comp);
    checker.setDistance(sd);
    checker.setMaxEdits(maxEdits);
    checker.setMinPrefix(minPrefix);
    checker.setAccuracy(minAccuracy);
    checker.setThresholdFrequency(minThreshold);
    checker.setMaxInspections(maxInspections);
    checker.setMinQueryLength(minQueryLength);
    checker.setMaxQueryFrequency(maxQueryFrequency);
    checker.setLowerCaseTerms(false);

    return name;
}

From source file:org.dice.solrenhancements.spellchecker.DiceDirectSolrSpellChecker.java

License:Apache License

@Override
public String init(NamedList config, SolrCore core) {
    LOG.info("init: " + config);
    String name = super.init(config, core);

    Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    String compClass = (String) config.get(COMPARATOR_CLASS);
    if (compClass != null) {
        if (compClass.equalsIgnoreCase(SCORE_COMP))
            comp = SuggestWordQueue.DEFAULT_COMPARATOR;
        else if (compClass.equalsIgnoreCase(FREQ_COMP))
            comp = new SuggestWordFrequencyComparator();
        else //must be a FQCN
            comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
    }//from ww w .  j  av a  2  s  .  c o m

    characterEncoding = DEFAULT_SOURCE_FILE_CHAR_ENCODING;
    String charEncoding = (String) config.get(SOURCE_FILE_CHAR_ENCODING);
    if (charEncoding != null && charEncoding.length() != 0) {
        characterEncoding = charEncoding;
    }

    StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    String distClass = (String) config.get(STRING_DISTANCE);
    if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
        sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

    float minAccuracy = DEFAULT_ACCURACY;
    Float accuracy = (Float) config.get(ACCURACY);
    if (accuracy != null)
        minAccuracy = accuracy;

    int maxEdits = DEFAULT_MAXEDITS;
    Integer edits = (Integer) config.get(MAXEDITS);
    if (edits != null)
        maxEdits = edits;

    int minPrefix = DEFAULT_MINPREFIX;
    Integer prefix = (Integer) config.get(MINPREFIX);
    if (prefix != null)
        minPrefix = prefix;

    int maxInspections = DEFAULT_MAXINSPECTIONS;
    Integer inspections = (Integer) config.get(MAXINSPECTIONS);
    if (inspections != null)
        maxInspections = inspections;

    float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
    Float threshold = (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
    if (threshold != null)
        minThreshold = threshold;

    int minQueryLength = DEFAULT_MINQUERYLENGTH;
    Integer queryLength = (Integer) config.get(MINQUERYLENGTH);
    if (queryLength != null)
        minQueryLength = queryLength;

    float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
    Float queryFreq = (Float) config.get(MAXQUERYFREQUENCY);
    if (queryFreq != null)
        maxQueryFrequency = queryFreq;

    Object oTyposFileName = config.get(TYPOS_FILENAME_CFG);
    if (oTyposFileName != null) {
        this.typosFile = oTyposFileName.toString();
    }

    checker.setComparator(comp);
    checker.setDistance(sd);
    checker.setMaxEdits(maxEdits);
    checker.setMinPrefix(minPrefix);
    checker.setAccuracy(minAccuracy);
    checker.setThresholdFrequency(minThreshold);
    checker.setMaxInspections(maxInspections);
    checker.setMinQueryLength(minQueryLength);
    checker.setMaxQueryFrequency(maxQueryFrequency);
    checker.setLowerCaseTerms(false);

    return name;
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java

License:Apache License

private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {//  w  ww  .j a v  a 2s.  c o m
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorTests.java

License:Apache License

public void testFromString() {
    assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"),
            equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
    assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"),
            instanceOf(LuceneLevenshteinDistance.class));
    assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"),
            instanceOf(LevensteinDistance.class));
    assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaro_winkler"),
            instanceOf(JaroWinklerDistance.class));
    assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));

    expectThrows(IllegalArgumentException.class,
            () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
    expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null));
}

From source file:org.elasticsearch.search.suggest.SuggestUtils.java

License:Apache License

public static StringDistance resolveDistance(String distanceVal) {
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein  
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {/*from w  w  w  .  j  av  a 2  s . c om*/
        throw new ElasticsearchIllegalArgumentException("Illegal distance option " + distanceVal);
    }
}