Example usage for org.apache.lucene.search.spell JaroWinklerDistance getDistance

List of usage examples for org.apache.lucene.search.spell JaroWinklerDistance getDistance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell JaroWinklerDistance getDistance.

Prototype

@Override
    public float getDistance(String s1, String s2) 

Source Link

Usage

From source file:com.mythesis.userbehaviouranalysis.ProfileAnalysis.java

License:Apache License

/**
 * a method that stores the query that has been suggested by the user
 * @param crawlerOutputPath SWebRank output directory used to check if a relevant query already exists
 * @param profile the query's relevant profile
 * @param query the given query/*from  ww w  .j a va 2 s. c  o m*/
 */
public void storeQuery(String crawlerOutputPath, String profile, String query) {

    System.out.println(crawlerOutputPath);
    System.out.println(profile);
    System.out.println(query);
    //Find output paths
    File root = new File(crawlerOutputPath);
    File[] contents = root.listFiles();
    List<String> sWebRanklevels = new ArrayList<>();
    for (File f : contents) {
        if (f.getAbsolutePath().contains("level"))
            sWebRanklevels.add(f.getAbsolutePath());
    }

    //Find all query paths
    List<String> queries = new ArrayList<>();
    for (String s : sWebRanklevels) {
        File level = new File(s);
        File[] queriesFiles = level.listFiles();
        for (File f : queriesFiles) {
            if (!f.getAbsolutePath().contains("txt")) {
                String str = f.getAbsolutePath();
                queries.add(str.substring(str.lastIndexOf("\\") + 1).replace("-query", "").replace("+", " "));
            }
        }
    }

    //check if a relevant query already exists - I use Jaro-Winkler distance
    query = query.trim().replaceAll(" +", " ");
    for (String q : queries) {
        JaroWinklerDistance jwd = new JaroWinklerDistance();
        double distance = jwd.getDistance(q, query);
        if (distance > 0.9) { // threshold = 0.9
            return;
        }
    }

    Mongo mongo = new Mongo("localhost", 27017);
    DB db = mongo.getDB("profileAnalysis");

    DBCollection DBqueries = db.getCollection("newQueries");
    BasicDBObject searchQuery = new BasicDBObject();
    searchQuery.put("profile", profile);
    DBObject document = DBqueries.findOne(searchQuery);
    boolean flag = false;

    //check if a relevant query exists in the database - I use Jaro-Winkler distance
    if (document != null) {
        flag = true;
        BasicDBList storedQueries = (BasicDBList) document.get("queries");
        for (Object quer : storedQueries) {
            JaroWinklerDistance jwd = new JaroWinklerDistance();
            double distance = jwd.getDistance((String) quer, query);
            if (distance > 0.9) { // threshold = 0.9
                return;
            }
        }
    }

    //if document already exists add the new query
    if (flag) {
        DBqueries.update(searchQuery, new BasicDBObject("$push", new BasicDBObject("queries", query)));
    } else { //otherwise create a new document
        BasicDBList dbl = new BasicDBList();
        dbl.add(query);
        BasicDBObject entry = new BasicDBObject("profile", profile).append("queries", dbl);
        DBqueries.insert(entry);
    }
}

From source file:uk.ac.open.kmi.squire.ontologymatching.JaroWinklerSimilarity.java

@Override
public float computeMatchingScore(String s1, String s2) {
    JaroWinklerDistance d = new JaroWinklerDistance();
    return d.getDistance(s1, s2);
}