Example usage for org.apache.lucene.search.spell JaroWinklerDistance JaroWinklerDistance

List of usage examples for org.apache.lucene.search.spell JaroWinklerDistance JaroWinklerDistance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell JaroWinklerDistance JaroWinklerDistance.

Prototype

public JaroWinklerDistance() 

Source Link

Document

Creates a new distance metric with the default threshold for the Jaro Winkler bonus (0.7)

Usage

From source file:com.mythesis.userbehaviouranalysis.ProfileAnalysis.java

License:Apache License

/**
 * a method that stores the query that has been suggested by the user
 * @param crawlerOutputPath SWebRank output directory used to check if a relevant query already exists
 * @param profile the query's relevant profile
 * @param query the given query//from  w  ww . j a  v a2s. c  om
 */
public void storeQuery(String crawlerOutputPath, String profile, String query) {

    System.out.println(crawlerOutputPath);
    System.out.println(profile);
    System.out.println(query);
    //Find output paths
    File root = new File(crawlerOutputPath);
    File[] contents = root.listFiles();
    List<String> sWebRanklevels = new ArrayList<>();
    for (File f : contents) {
        if (f.getAbsolutePath().contains("level"))
            sWebRanklevels.add(f.getAbsolutePath());
    }

    //Find all query paths
    List<String> queries = new ArrayList<>();
    for (String s : sWebRanklevels) {
        File level = new File(s);
        File[] queriesFiles = level.listFiles();
        for (File f : queriesFiles) {
            if (!f.getAbsolutePath().contains("txt")) {
                String str = f.getAbsolutePath();
                queries.add(str.substring(str.lastIndexOf("\\") + 1).replace("-query", "").replace("+", " "));
            }
        }
    }

    //check if a relevant query already exists - I use Jaro-Winkler distance
    query = query.trim().replaceAll(" +", " ");
    for (String q : queries) {
        JaroWinklerDistance jwd = new JaroWinklerDistance();
        double distance = jwd.getDistance(q, query);
        if (distance > 0.9) { // threshold = 0.9
            return;
        }
    }

    Mongo mongo = new Mongo("localhost", 27017);
    DB db = mongo.getDB("profileAnalysis");

    DBCollection DBqueries = db.getCollection("newQueries");
    BasicDBObject searchQuery = new BasicDBObject();
    searchQuery.put("profile", profile);
    DBObject document = DBqueries.findOne(searchQuery);
    boolean flag = false;

    //check if a relevant query exists in the database - I use Jaro-Winkler distance
    if (document != null) {
        flag = true;
        BasicDBList storedQueries = (BasicDBList) document.get("queries");
        for (Object quer : storedQueries) {
            JaroWinklerDistance jwd = new JaroWinklerDistance();
            double distance = jwd.getDistance((String) quer, query);
            if (distance > 0.9) { // threshold = 0.9
                return;
            }
        }
    }

    //if document already exists add the new query
    if (flag) {
        DBqueries.update(searchQuery, new BasicDBObject("$push", new BasicDBObject("queries", query)));
    } else { //otherwise create a new document
        BasicDBList dbl = new BasicDBList();
        dbl.add(query);
        BasicDBObject entry = new BasicDBObject("profile", profile).append("queries", dbl);
        DBqueries.insert(entry);
    }
}

From source file:fastcampus.lucene.example.search.SpellCheckerExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Directory directory = FSDirectory.open(Paths.get("./index/spell/"));
    SpellChecker spellChecker = new SpellChecker(directory);

    //Analyzer analyzer = new StandardAnalyzer();                             // ? 
    Analyzer analyzer = new Analyzer() {
        @Override//from w w  w.ja  v  a 2 s . c o  m
        protected TokenStreamComponents createComponents(String s) {
            Reader reader = new StringReader(s);
            Tokenizer tokenizer = new StandardTokenizer();
            tokenizer.setReader(reader);
            String name = "nfc_cf";
            Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE);
            TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ?  ?

    Path path = Paths.get("./data/spell/dic.txt");

    spellChecker.setSpellIndex(directory);
    spellChecker.clearIndex();
    spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true);
    String wordForSuggestions = "?";
    //spellChecker.setStringDistance(new LevensteinDistance());  //#Levenstein  
    spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler 

    int suggestionsNumber = 1;
    String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber);
    if (suggestions != null && suggestions.length > 0) {

        for (String word : suggestions) {

            System.out.println("Did you mean:" + word);

        }

    } else {

        System.out.println("No suggestions found for word:" + wordForSuggestions);

    }

}

From source file:lu.list.itis.dkd.aig.SimilarityProvider.java

License:Apache License

/**
 * Method used to compute a string similarity using Jaro Winkler as implemented by Apache's
 * Lucene./*w ww  .  java2 s.  c o m*/
 *
 * @param _this
 *        The first variable to compute similarity with.
 * @param that
 *        The second variable to compute similarity with.
 * @return The string similarity of both variables.
 * @see JaroWinklerDistance#getDistance(String, String)
 */
private float computeStringSimilarity(final Variable _this, final Variable that) {
    if (!useStringSimilarity) {
        return 0f;
    }

    return new JaroWinklerDistance().getDistance(_this.getTextContent(), that.getTextContent());
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder.java

License:Apache License

private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {/*  ww  w.  j  a va2s  . co  m*/
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}

From source file:org.elasticsearch.search.suggest.SuggestUtils.java

License:Apache License

public static StringDistance resolveDistance(String distanceVal) {
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein  
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {/*from  w  w  w.j av  a  2s. c  o  m*/
        throw new ElasticsearchIllegalArgumentException("Illegal distance option " + distanceVal);
    }
}

From source file:uk.ac.open.kmi.squire.ontologymatching.JaroWinklerSimilarity.java

@Override
public float computeMatchingScore(String s1, String s2) {
    JaroWinklerDistance d = new JaroWinklerDistance();
    return d.getDistance(s1, s2);
}