Example usage for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE

List of usage examples for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE

Introduction

In this page you can find the example usage for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE.

Prototype

int MAXIMUM_SUPPORTED_DISTANCE

To view the source code for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE.

Click Source Link

Document

Maximum edit distance this class can generate an automaton for.

Usage

From source file:com.sindicetech.siren.search.node.NodeFuzzyQuery.java

License:Open Source License

/**
 * Create a new SirenFuzzyQuery that will match terms with a similarity
 * of at least <code>minimumSimilarity</code> to <code>term</code>.
 * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
 * of that length is also required.//from  w w  w.  j av  a2 s  . c  o m
 *
 * @param term the term to search for
 * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength length of common (non-fuzzy) prefix
 * @param maxExpansions the maximum number of terms to match. If this number is
 *  greater than {@link NodeBooleanQuery#getMaxClauseCount} when the query is rewritten,
 *  then the maxClauseCount will be used instead.
 * @param transpositions true if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 */
public NodeFuzzyQuery(final Term term, final int maxEdits, final int prefixLength, final int maxExpansions,
        final boolean transpositions) {
    super(term.field());

    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException(
                "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (prefixLength < 0) {
        throw new IllegalArgumentException("prefixLength cannot be negative.");
    }
    if (maxExpansions < 0) {
        throw new IllegalArgumentException("maxExpansions cannot be negative.");
    }

    this.term = term;
    this.maxEdits = maxEdits;
    this.prefixLength = prefixLength;
    this.transpositions = transpositions;
    this.maxExpansions = maxExpansions;
    this.setRewriteMethod(new MultiNodeTermQuery.TopTermsScoringNodeBooleanQueryRewrite(maxExpansions));
}

From source file:org.codelibs.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java

License:Apache License

/**
 * Sets the maximum edit distance candidate suggestions can have in
 * order to be considered as a suggestion. Can only be a value between 1
 * and 2. Any other value result in an bad request error being thrown.
 * Defaults to <tt>2</tt>./*from  www.j  ava2 s .  c o  m*/
 */
public DirectCandidateGeneratorBuilder maxEdits(Integer maxEdits) {
    if (maxEdits < 1 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException("Illegal max_edits value " + maxEdits);
    }
    this.maxEdits = maxEdits;
    return this;
}

From source file:org.elasticsearch.common.unit.Fuzziness.java

License:Apache License

public float asSimilarity(String text) {
    if (this == AUTO) {
        final int len = termLen(text);
        if (len <= 2) {
            return 0.0f;
        } else if (len > 5) {
            return 0.5f;
        } else {//w w w .  j  av  a 2s  . co m
            return 0.66f;
        }
        //            return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text))));
    }
    if (fuzziness instanceof Float) { // it's a similarity
        return ((Float) fuzziness).floatValue();
    } else if (fuzziness instanceof Integer) { // it's an edit!
        int dist = Math.min(((Integer) fuzziness).intValue(), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
        return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text))));
    } else {
        final float similarity = Float.parseFloat(fuzziness.toString());
        if (similarity >= 0.0f && similarity < 1.0f) {
            return similarity;
        }
    }
    throw new ElasticsearchIllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]");
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java

License:Apache License

@Override
public PhraseSuggestionContext.DirectCandidateGenerator build(MapperService mapperService) throws IOException {
    PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
    generator.setField(this.field);
    transferIfNotNull(this.size, generator::size);
    if (this.preFilter != null) {
        generator.preFilter(mapperService.analysisService().analyzer(this.preFilter));
        if (generator.preFilter() == null) {
            throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists");
        }/*from  w  w w.j  a  v a 2 s. c  o m*/
    }
    if (this.postFilter != null) {
        generator.postFilter(mapperService.analysisService().analyzer(this.postFilter));
        if (generator.postFilter() == null) {
            throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists");
        }
    }
    transferIfNotNull(this.accuracy, generator::accuracy);
    if (this.suggestMode != null) {
        generator.suggestMode(resolveSuggestMode(this.suggestMode));
    }
    if (this.sort != null) {
        generator.sort(SortBy.resolve(this.sort));
    }
    if (this.stringDistance != null) {
        generator.stringDistance(resolveDistance(this.stringDistance));
    }
    transferIfNotNull(this.maxEdits, generator::maxEdits);
    if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits());
    }
    transferIfNotNull(this.maxInspections, generator::maxInspections);
    transferIfNotNull(this.maxTermFreq, generator::maxTermFreq);
    transferIfNotNull(this.prefixLength, generator::prefixLength);
    transferIfNotNull(this.minWordLength, generator::minWordLength);
    transferIfNotNull(this.minDocFreq, generator::minDocFreq);
    return generator;
}

From source file:org.elasticsearch.search.suggest.SuggestUtils.java

License:Apache License

public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName,
        DirectSpellcheckerSettings suggestion) throws IOException {
    if ("accuracy".equals(fieldName)) {
        suggestion.accuracy(parser.floatValue());
    } else if (Fields.SUGGEST_MODE.match(fieldName)) {
        suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text()));
    } else if ("sort".equals(fieldName)) {
        suggestion.sort(SuggestUtils.resolveSort(parser.text()));
    } else if (Fields.STRING_DISTANCE.match(fieldName)) {
        suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text()));
    } else if (Fields.MAX_EDITS.match(fieldName)) {
        suggestion.maxEdits(parser.intValue());
        if (suggestion.maxEdits() < 1
                || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
            throw new ElasticsearchIllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits());
        }/* w  ww  . ja va 2  s  . co  m*/
    } else if (Fields.MAX_INSPECTIONS.match(fieldName)) {
        suggestion.maxInspections(parser.intValue());
    } else if (Fields.MAX_TERM_FREQ.match(fieldName)) {
        suggestion.maxTermFreq(parser.floatValue());
    } else if (Fields.PREFIX_LENGTH.match(fieldName)) {
        suggestion.prefixLength(parser.intValue());
    } else if (Fields.MIN_WORD_LENGTH.match(fieldName)) {
        suggestion.minQueryLength(parser.intValue());
    } else if (Fields.MIN_DOC_FREQ.match(fieldName)) {
        suggestion.minDocFreq(parser.floatValue());
    } else {
        return false;
    }
    return true;
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private Query fuzzyQuery(String field, Element node) throws XPathException {
    int maxEdits = FuzzyQuery.defaultMaxEdits;
    String attr = node.getAttribute("max-edits");
    if (attr != null && attr.length() > 0) {
        try {/*from w  w  w.  j a  v  a  2 s . c  o  m*/
            maxEdits = Integer.parseInt(attr);
            if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
                throw new XPathException("Query parameter max-edits must by <= "
                        + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
            }
        } catch (NumberFormatException e) {
            throw new XPathException("Query parameter 'max-edits' should be an integer value. Got: " + attr);
        }
    }
    return new FuzzyQuery(new Term(field, getText(node)), maxEdits);
}

From source file:org.hibernate.search.query.dsl.impl.TermQueryContext.java

License:LGPL

public void setMaxEditDistance(int maxEditDistance) {
    if (maxEditDistance < 1 || maxEditDistance > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw log.incorrectEditDistance();
    }//from   w  w  w .j ava2s .  com
    this.maxEditDistance = maxEditDistance;
}

From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java

License:Apache License

protected Query newFuzzyQuery(String fieldName, String termText, int maxEdits, int prefixLen, int maxExpansions,
        boolean transpositions) {
    maxEdits = Math.min(maxEdits, getFuzzyMaxEdits());
    BytesRef analyzed = normalizeMultiTerm(fieldName, termText);

    //note that this is subtly different from createFieldQuery
    if (maxEdits == 0) {
        return new TermQuery(new Term(fieldName, analyzed));
    }/*  w  w w .ja v a2  s  .  c  om*/
    MultiTermQuery mtq = null;
    if (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        mtq = new SlowFuzzyQuery(new Term(fieldName, analyzed), maxEdits, prefixLen, maxExpansions);
    } else {
        mtq = new FuzzyQuery(new Term(fieldName, analyzed), maxEdits, prefixLen, maxExpansions, transpositions);
    }
    return wrapMultiTermRewrite(mtq);
}