List of usage examples for org.apache.lucene.search.spell DirectSpellChecker setThresholdFrequency
public void setThresholdFrequency(float thresholdFrequency)
From source file:org.codelibs.elasticsearch.search.suggest.DirectSpellcheckerSettings.java
License:Apache License
public DirectSpellChecker createDirectSpellChecker() { DirectSpellChecker directSpellChecker = new DirectSpellChecker(); directSpellChecker.setAccuracy(accuracy()); Comparator<SuggestWord> comparator; switch (sort()) { case SCORE:/*from w w w.ja v a2 s .c o m*/ comparator = SCORE_COMPARATOR; break; case FREQUENCY: comparator = LUCENE_FREQUENCY; break; default: throw new IllegalArgumentException("Illegal suggest sort: " + sort()); } directSpellChecker.setComparator(comparator); directSpellChecker.setDistance(stringDistance()); directSpellChecker.setMaxEdits(maxEdits()); directSpellChecker.setMaxInspections(maxInspections()); directSpellChecker.setMaxQueryFrequency(maxTermFreq()); directSpellChecker.setMinPrefix(prefixLength()); directSpellChecker.setMinQueryLength(minWordLength()); directSpellChecker.setThresholdFrequency(minDocFreq()); directSpellChecker.setLowerCaseTerms(false); return directSpellChecker; }
From source file:org.elasticsearch.search.suggest.SuggestUtils.java
License:Apache License
public static DirectSpellChecker getDirectSpellChecker(DirectSpellcheckerSettings suggestion) { DirectSpellChecker directSpellChecker = new DirectSpellChecker(); directSpellChecker.setAccuracy(suggestion.accuracy()); Comparator<SuggestWord> comparator; switch (suggestion.sort()) { case SCORE:/*from w w w . j a v a 2s . c om*/ comparator = SCORE_COMPARATOR; break; case FREQUENCY: comparator = LUCENE_FREQUENCY; break; default: throw new ElasticsearchIllegalArgumentException("Illegal suggest sort: " + suggestion.sort()); } directSpellChecker.setComparator(comparator); directSpellChecker.setDistance(suggestion.stringDistance()); directSpellChecker.setMaxEdits(suggestion.maxEdits()); directSpellChecker.setMaxInspections(suggestion.maxInspections()); directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq()); directSpellChecker.setMinPrefix(suggestion.prefixLength()); directSpellChecker.setMinQueryLength(suggestion.minWordLength()); directSpellChecker.setThresholdFrequency(suggestion.minDocFreq()); directSpellChecker.setLowerCaseTerms(false); return directSpellChecker; }
From source file:perf.CreateQueries.java
License:Apache License
private static void makeFuzzyAndRespellQueries(IndexReader r, String field, TermFreq[] topTerms, Writer queriesOut) throws IOException { System.out.println("\nFind top fuzzy/respell terms..."); final DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.setThresholdFrequency(1.0f); final MostFrequentTerms pq = new MostFrequentTerms(NUM_QUERIES); // TODO: use threads...? int count = 0; for (TermFreq tdf : topTerms) { if ((++count) % 1000 == 0) { System.out.println(" " + count + " of " + topTerms.length + "..."); }//from w ww . j a v a 2 s . co m if (tdf.term.length < 5) { continue; } // TODO: make my own fuzzy enum? long sumDF = 0; SuggestWord[] suggested = spellChecker.suggestSimilar(new Term(field, tdf.term), 50, r, SuggestMode.SUGGEST_MORE_POPULAR); if (suggested.length < 5) { continue; } for (SuggestWord suggest : suggested) { sumDF += suggest.freq; } // Strongly favor higher number of suggestions and gently favor higher sumDF: final long score = (long) (Math.log(sumDF) * suggested.length); final TermFreq newTF = new TermFreq(tdf.term, score); final TermFreq bumpedTF = pq.insertWithOverflow(newTF); if (bumpedTF != newTF) { System.out.println( " " + newTF.term.utf8ToString() + " score=" + score + " suggestCount=" + suggested.length); } } if (pq.size() < NUM_QUERIES) { throw new RuntimeException("index is too small: only " + pq.size() + " top fuzzy terms"); } int downTo = NUM_QUERIES; while (pq.size() > 0) { TermFreq tdf = pq.pop(); System.out.println(" " + tdf.term.utf8ToString() + " freq=" + tdf.df); queriesOut.write("Fuzzy1: " + tdf.term.utf8ToString() + "~1\n"); queriesOut.write("Fuzzy2: " + tdf.term.utf8ToString() + "~2\n"); queriesOut.write("Respell: " + tdf.term.utf8ToString() + "\n"); } queriesOut.flush(); }