List of usage examples for org.apache.lucene.search.spell JaroWinklerDistance JaroWinklerDistance
public JaroWinklerDistance()
From source file:com.mythesis.userbehaviouranalysis.ProfileAnalysis.java
License:Apache License
/** * a method that stores the query that has been suggested by the user * @param crawlerOutputPath SWebRank output directory used to check if a relevant query already exists * @param profile the query's relevant profile * @param query the given query//from w ww . j a v a2s. c om */ public void storeQuery(String crawlerOutputPath, String profile, String query) { System.out.println(crawlerOutputPath); System.out.println(profile); System.out.println(query); //Find output paths File root = new File(crawlerOutputPath); File[] contents = root.listFiles(); List<String> sWebRanklevels = new ArrayList<>(); for (File f : contents) { if (f.getAbsolutePath().contains("level")) sWebRanklevels.add(f.getAbsolutePath()); } //Find all query paths List<String> queries = new ArrayList<>(); for (String s : sWebRanklevels) { File level = new File(s); File[] queriesFiles = level.listFiles(); for (File f : queriesFiles) { if (!f.getAbsolutePath().contains("txt")) { String str = f.getAbsolutePath(); queries.add(str.substring(str.lastIndexOf("\\") + 1).replace("-query", "").replace("+", " ")); } } } //check if a relevant query already exists - I use Jaro-Winkler distance query = query.trim().replaceAll(" +", " "); for (String q : queries) { JaroWinklerDistance jwd = new JaroWinklerDistance(); double distance = jwd.getDistance(q, query); if (distance > 0.9) { // threshold = 0.9 return; } } Mongo mongo = new Mongo("localhost", 27017); DB db = mongo.getDB("profileAnalysis"); DBCollection DBqueries = db.getCollection("newQueries"); BasicDBObject searchQuery = new BasicDBObject(); searchQuery.put("profile", profile); DBObject document = DBqueries.findOne(searchQuery); boolean flag = false; //check if a relevant query exists in the database - I use Jaro-Winkler distance if (document != null) { flag = true; BasicDBList storedQueries = (BasicDBList) document.get("queries"); for (Object quer : storedQueries) { JaroWinklerDistance jwd = new JaroWinklerDistance(); double distance = jwd.getDistance((String) quer, query); if (distance > 0.9) { // threshold = 0.9 return; } } } //if document already exists add the new query if (flag) { DBqueries.update(searchQuery, new BasicDBObject("$push", new BasicDBObject("queries", query))); } else { //otherwise create a new document BasicDBList dbl = new BasicDBList(); dbl.add(query); BasicDBObject entry = new BasicDBObject("profile", profile).append("queries", dbl); DBqueries.insert(entry); } }
From source file:fastcampus.lucene.example.search.SpellCheckerExample.java
License:Apache License
public static void main(String[] args) throws Exception { Directory directory = FSDirectory.open(Paths.get("./index/spell/")); SpellChecker spellChecker = new SpellChecker(directory); //Analyzer analyzer = new StandardAnalyzer(); // ? Analyzer analyzer = new Analyzer() { @Override//from w w w.ja v a 2 s . c o m protected TokenStreamComponents createComponents(String s) { Reader reader = new StringReader(s); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(reader); String name = "nfc_cf"; Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE); TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer); return new TokenStreamComponents(tokenizer, filter); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ? ? Path path = Paths.get("./data/spell/dic.txt"); spellChecker.setSpellIndex(directory); spellChecker.clearIndex(); spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true); String wordForSuggestions = "?"; //spellChecker.setStringDistance(new LevensteinDistance()); //#Levenstein spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler int suggestionsNumber = 1; String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber); if (suggestions != null && suggestions.length > 0) { for (String word : suggestions) { System.out.println("Did you mean:" + word); } } else { System.out.println("No suggestions found for word:" + wordForSuggestions); } }
From source file:lu.list.itis.dkd.aig.SimilarityProvider.java
License:Apache License
/** * Method used to compute a string similarity using Jaro Winkler as implemented by Apache's * Lucene./*w ww . java2 s. c o m*/ * * @param _this * The first variable to compute similarity with. * @param that * The second variable to compute similarity with. * @return The string similarity of both variables. * @see JaroWinklerDistance#getDistance(String, String) */ private float computeStringSimilarity(final Variable _this, final Variable that) { if (!useStringSimilarity) { return 0f; } return new JaroWinklerDistance().getDistance(_this.getTextContent(), that.getTextContent()); }
From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java
License:Apache License
private static StringDistance resolveDistance(String distanceVal) { distanceVal = distanceVal.toLowerCase(Locale.US); if ("internal".equals(distanceVal)) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); // TODO Jaro and Winkler are 2 people - so apply same naming logic // as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { return new NGramDistance(); } else {/* ww w. j a va2s . co m*/ throw new IllegalArgumentException("Illegal distance option " + distanceVal); } }
From source file:org.elasticsearch.search.suggest.SuggestUtils.java
License:Apache License
public static StringDistance resolveDistance(String distanceVal) { if ("internal".equals(distanceVal)) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { return new NGramDistance(); } else {/*from w w w.j av a 2s. c o m*/ throw new ElasticsearchIllegalArgumentException("Illegal distance option " + distanceVal); } }
From source file:uk.ac.open.kmi.squire.ontologymatching.JaroWinklerSimilarity.java
@Override public float computeMatchingScore(String s1, String s2) { JaroWinklerDistance d = new JaroWinklerDistance(); return d.getDistance(s1, s2); }