List of usage examples for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE
int MAXIMUM_SUPPORTED_DISTANCE
To view the source code for org.apache.lucene.util.automaton LevenshteinAutomata MAXIMUM_SUPPORTED_DISTANCE.
Click Source Link
From source file:com.sindicetech.siren.search.node.NodeFuzzyQuery.java
License:Open Source License
/** * Create a new SirenFuzzyQuery that will match terms with a similarity * of at least <code>minimumSimilarity</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required.//from w w w. j av a2 s . c o m * * @param term the term to search for * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions the maximum number of terms to match. If this number is * greater than {@link NodeBooleanQuery#getMaxClauseCount} when the query is rewritten, * then the maxClauseCount will be used instead. * @param transpositions true if transpositions should be treated as a primitive * edit operation. If this is false, comparisons will implement the classic * Levenshtein algorithm. */ public NodeFuzzyQuery(final Term term, final int maxEdits, final int prefixLength, final int maxExpansions, final boolean transpositions) { super(term.field()); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException( "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException("prefixLength cannot be negative."); } if (maxExpansions < 0) { throw new IllegalArgumentException("maxExpansions cannot be negative."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; this.setRewriteMethod(new MultiNodeTermQuery.TopTermsScoringNodeBooleanQueryRewrite(maxExpansions)); }
From source file:org.codelibs.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java
License:Apache License
/** * Sets the maximum edit distance candidate suggestions can have in * order to be considered as a suggestion. Can only be a value between 1 * and 2. Any other value result in an bad request error being thrown. * Defaults to <tt>2</tt>./*from www.j ava2 s . c o m*/ */ public DirectCandidateGeneratorBuilder maxEdits(Integer maxEdits) { if (maxEdits < 1 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("Illegal max_edits value " + maxEdits); } this.maxEdits = maxEdits; return this; }
From source file:org.elasticsearch.common.unit.Fuzziness.java
License:Apache License
public float asSimilarity(String text) { if (this == AUTO) { final int len = termLen(text); if (len <= 2) { return 0.0f; } else if (len > 5) { return 0.5f; } else {//w w w . j av a 2s . co m return 0.66f; } // return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text)))); } if (fuzziness instanceof Float) { // it's a similarity return ((Float) fuzziness).floatValue(); } else if (fuzziness instanceof Integer) { // it's an edit! int dist = Math.min(((Integer) fuzziness).intValue(), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text)))); } else { final float similarity = Float.parseFloat(fuzziness.toString()); if (similarity >= 0.0f && similarity < 1.0f) { return similarity; } } throw new ElasticsearchIllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]"); }
From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java
License:Apache License
@Override public PhraseSuggestionContext.DirectCandidateGenerator build(MapperService mapperService) throws IOException { PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator(); generator.setField(this.field); transferIfNotNull(this.size, generator::size); if (this.preFilter != null) { generator.preFilter(mapperService.analysisService().analyzer(this.preFilter)); if (generator.preFilter() == null) { throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists"); }/*from w w w.j a v a 2 s. c o m*/ } if (this.postFilter != null) { generator.postFilter(mapperService.analysisService().analyzer(this.postFilter)); if (generator.postFilter() == null) { throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists"); } } transferIfNotNull(this.accuracy, generator::accuracy); if (this.suggestMode != null) { generator.suggestMode(resolveSuggestMode(this.suggestMode)); } if (this.sort != null) { generator.sort(SortBy.resolve(this.sort)); } if (this.stringDistance != null) { generator.stringDistance(resolveDistance(this.stringDistance)); } transferIfNotNull(this.maxEdits, generator::maxEdits); if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits()); } transferIfNotNull(this.maxInspections, generator::maxInspections); transferIfNotNull(this.maxTermFreq, generator::maxTermFreq); transferIfNotNull(this.prefixLength, generator::prefixLength); transferIfNotNull(this.minWordLength, generator::minWordLength); transferIfNotNull(this.minDocFreq, generator::minDocFreq); return generator; }
From source file:org.elasticsearch.search.suggest.SuggestUtils.java
License:Apache License
public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName, DirectSpellcheckerSettings suggestion) throws IOException { if ("accuracy".equals(fieldName)) { suggestion.accuracy(parser.floatValue()); } else if (Fields.SUGGEST_MODE.match(fieldName)) { suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text())); } else if ("sort".equals(fieldName)) { suggestion.sort(SuggestUtils.resolveSort(parser.text())); } else if (Fields.STRING_DISTANCE.match(fieldName)) { suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text())); } else if (Fields.MAX_EDITS.match(fieldName)) { suggestion.maxEdits(parser.intValue()); if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new ElasticsearchIllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits()); }/* w ww . ja va 2 s . co m*/ } else if (Fields.MAX_INSPECTIONS.match(fieldName)) { suggestion.maxInspections(parser.intValue()); } else if (Fields.MAX_TERM_FREQ.match(fieldName)) { suggestion.maxTermFreq(parser.floatValue()); } else if (Fields.PREFIX_LENGTH.match(fieldName)) { suggestion.prefixLength(parser.intValue()); } else if (Fields.MIN_WORD_LENGTH.match(fieldName)) { suggestion.minQueryLength(parser.intValue()); } else if (Fields.MIN_DOC_FREQ.match(fieldName)) { suggestion.minDocFreq(parser.floatValue()); } else { return false; } return true; }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Query fuzzyQuery(String field, Element node) throws XPathException { int maxEdits = FuzzyQuery.defaultMaxEdits; String attr = node.getAttribute("max-edits"); if (attr != null && attr.length() > 0) { try {/*from w w w. j a v a 2 s . c o m*/ maxEdits = Integer.parseInt(attr); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new XPathException("Query parameter max-edits must by <= " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } } catch (NumberFormatException e) { throw new XPathException("Query parameter 'max-edits' should be an integer value. Got: " + attr); } } return new FuzzyQuery(new Term(field, getText(node)), maxEdits); }
From source file:org.hibernate.search.query.dsl.impl.TermQueryContext.java
License:LGPL
public void setMaxEditDistance(int maxEditDistance) { if (maxEditDistance < 1 || maxEditDistance > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw log.incorrectEditDistance(); }//from w w w .j ava2s . com this.maxEditDistance = maxEditDistance; }
From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java
License:Apache License
protected Query newFuzzyQuery(String fieldName, String termText, int maxEdits, int prefixLen, int maxExpansions, boolean transpositions) { maxEdits = Math.min(maxEdits, getFuzzyMaxEdits()); BytesRef analyzed = normalizeMultiTerm(fieldName, termText); //note that this is subtly different from createFieldQuery if (maxEdits == 0) { return new TermQuery(new Term(fieldName, analyzed)); }/* w w w .ja v a2 s . c om*/ MultiTermQuery mtq = null; if (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { mtq = new SlowFuzzyQuery(new Term(fieldName, analyzed), maxEdits, prefixLen, maxExpansions); } else { mtq = new FuzzyQuery(new Term(fieldName, analyzed), maxEdits, prefixLen, maxExpansions, transpositions); } return wrapMultiTermRewrite(mtq); }