Example usage for org.apache.commons.lang StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(String s, String t) 

Source Link

Document

Find the Levenshtein distance between two Strings.

Usage

From source file:com.iflytek.spider.metadata.SpellCheckedMetadata.java

/**
 * Get the normalized name of metadata attribute name. This method tries to
 * find a well-known metadata name (one of the metadata names defined in this
 * class) that matches the specified name. The matching is error tolerent. For
 * instance,//from   w  ww  .j  av a2s .  c o  m
 * <ul>
 * <li>content-type gives Content-Type</li>
 * <li>CoNtEntType gives Content-Type</li>
 * <li>ConTnTtYpe gives Content-Type</li>
 * </ul>
 * If no matching with a well-known metadata name is found, then the original
 * name is returned.
 * 
 * @param name
 *          Name to normalize
 * @return normalized name
 */
public static String getNormalizedName(final String name) {
    String searched = normalize(name);
    String value = NAMES_IDX.get(searched);

    if ((value == null) && (normalized != null)) {
        int threshold = searched.length() / TRESHOLD_DIVIDER;
        for (int i = 0; i < normalized.length && value == null; i++) {
            if (StringUtils.getLevenshteinDistance(searched, normalized[i]) < threshold) {
                value = NAMES_IDX.get(normalized[i]);
            }
        }
    }
    return (value != null) ? value : name;
}

From source file:com.panet.imeta.core.row.ValueDataUtil.java

/**Levenshtein distance (LD) is a measure of the similarity between two strings, 
 * which we will refer to as the source string (s) and the target string (t). 
 * The distance is the number of deletions, insertions, or substitutions required to transform s into t. 
 *//* w  ww  .  jav a2  s.c  o  m*/
public static Long getLevenshtein_Distance(ValueMetaInterface metaA, Object dataA, ValueMetaInterface metaB,
        Object dataB) {
    if (dataA == null || dataB == null)
        return null;
    return new Long(StringUtils.getLevenshteinDistance(dataA.toString(), dataB.toString()));
}

From source file:com.google.gerrit.server.util.MostSpecificComparator.java

private int distance(String pattern) {
    String example;//ww w .  j  av  a 2  s . co m
    if (RefControl.isRE(pattern)) {
        example = RefControl.shortestExample(pattern);

    } else if (pattern.endsWith("/*")) {
        example = pattern.substring(0, pattern.length() - 1) + '1';

    } else if (pattern.equals(refName)) {
        return 0;

    } else {
        return Math.max(pattern.length(), refName.length());
    }
    return StringUtils.getLevenshteinDistance(example, refName);
}

From source file:fr.ign.cogit.geoxygene.matching.dst.sources.text.LevenshteinDist.java

/**
 * Mesure de Levenshtein.//  w  ww .j ava  2  s.c o  m
 * 
 * @param s
 * @param t
 * @return int
 *    as coefficient de similarit
 */
public double compute(String s, String t) {
    if (s == null || t == null) {
        throw new IllegalArgumentException("Strings must not be null");
    }

    double l = StringUtils.getLevenshteinDistance(s.toLowerCase(), t.toLowerCase());
    DST_LOGGER.info("        StringUtils.getLevenshteinDistance : " + l);

    // return 1.0 - l / Math.max (s.length(), t.length());
    return l / Math.max(s.length(), t.length());
}

From source file:com.evanmclean.evlib.text.FuzzyLexicon.java

/**
 * Find the minimum difference between the word and the set of words in the
 * lexicon.// w w w.java  2 s  .c om
 * 
 * @param word
 * @param word_set
 * @return
 */
private int difference(final String word, final Set<String> word_set) {
    if ((word_set == null) || (word_set.size() <= 0))
        return StringUtils.getLevenshteinDistance(word, "");

    int diff = Integer.MAX_VALUE;
    for (String lex : word_set)
        diff = Math.min(diff, StringUtils.getLevenshteinDistance(word, lex));
    return diff;
}

From source file:edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.EditDistanceCollectiveAnswerScorer.java

private double getDistance(String text1, String text2) {
    int distance = StringUtils.getLevenshteinDistance(text1, text2);
    return (double) distance / Math.max(text1.length(), text2.length());
}

From source file:com.mmounirou.spotirss.spotify.tracks.SpotifyHrefQuery.java

private int getLevenshteinDistance(XTracks xtrack, Track track) {
    int trackDistance = StringUtils.getLevenshteinDistance(xtrack.getCleanedTrackName(), track.getSong());
    int artistDistance = getArtistDistance(track, xtrack);

    //    System.out.println(" trackDistance = " + trackDistance + " artistDistance = " + artistDistance + " track = " + xtrack);

    return (trackDistance + 1) * (artistDistance + 1) + artistDistance;
}

From source file:com.mmounirou.spotirss.spotify.tracks.SpotifyHrefQuery.java

private Integer getMinDistance(String strArtist, Set<String> artists) {
    Set<Integer> treeSet = new TreeSet<Integer>();
    for (String artist : artists) {
        treeSet.add(StringUtils.getLevenshteinDistance(strArtist, artist));
    }//w ww  .  j  a v  a 2  s.c  o  m
    return treeSet.iterator().next();
}

From source file:com.jroossien.boxx.util.Str.java

/**
 * Get the best matching value for the specified input out of the array of values.
 * This uses the levenshtein distance from {@link StringUtils}
 * If an exact match is found that match will be returned.
 *
 * @param input The input string to find a match for.
 * @param values Array of values to match with input string.
 * @return The best match from the specified values. (May be empty when there are no values or no match)
 */// w  w  w  .ja v a2s .  c  om
public static String bestMatch(String input, String... values) {
    String bestMatch = "";
    int lowestDiff = input.length() - 1;
    for (String value : values) {
        int diff = StringUtils.getLevenshteinDistance(input, value);
        if (diff == 0) {
            return value;
        }
        if (diff < lowestDiff) {
            bestMatch = value;
            lowestDiff = diff;
        }
    }
    return bestMatch;
}

From source file:com.ntua.cosmos.hackathonplanneraas.Planner.java

@Deprecated
public JSONObject searchEventSolution(JSONObject obj) {
    StoredPaths pan = new StoredPaths();
    JSONObject returned = new JSONObject();
    long since = 0;
    long ts = 0;/*from   w w w  . j  a  v a  2 s .co m*/
    ArrayList<String> names = new ArrayList<>();
    ArrayList<String> values = new ArrayList<>();
    String originalString = "";
    double similarity = 0.0, current = 0.0;
    if (!obj.isEmpty()) {
        Set keys = obj.keySet();
        Iterator iter = keys.iterator();
        for (; iter.hasNext();) {
            String temporary = String.valueOf(iter.next());
            if (temporary.startsWith("has"))
                names.add(temporary);
        }

        names.stream().forEach((name) -> {
            values.add(String.valueOf(obj.get(name)));
        });
    }
    originalString = values.stream().map((value) -> value).reduce(originalString, String::concat);
    //Begin the initialisation process.
    OntModelSpec s = new OntModelSpec(PelletReasonerFactory.THE_SPEC);
    OntDocumentManager dm = OntDocumentManager.getInstance();
    dm.setFileManager(FileManager.get());
    s.setDocumentManager(dm);
    OntModel m = ModelFactory.createOntologyModel(s, null);
    InputStream in = FileManager.get().open(StoredPaths.casebasepath);
    if (in == null) {
        throw new IllegalArgumentException("File: " + StoredPaths.casebasepath + " not found");
    }
    // read the file
    m.read(in, null);
    //begin building query string.
    String queryString = pan.prefixrdf + pan.prefixowl + pan.prefixxsd + pan.prefixrdfs + pan.prefixCasePath;
    queryString += "\nSELECT DISTINCT ";
    for (int i = 0; i < names.size(); i++) {
        queryString += "?param" + i + " ";
    }
    queryString += "?message ?handle ?URI WHERE {";
    for (int i = 0; i < names.size(); i++) {
        queryString += "?event base:" + names.get(i) + " ?param" + i + " . ";
    }
    queryString += "?event base:isSolvedBy ?solution . ?solution base:exposesMessage ?message . ?solution base:eventHandledBy ?handle . ?solution base:URI ?URI}";
    try {
        String testString = "";
        Query query = QueryFactory.create(queryString);
        QueryExecution qe = QueryExecutionFactory.create(query, m);
        ResultSet results = qe.execSelect();
        for (; results.hasNext();) {
            QuerySolution soln = results.nextSolution();
            // Access variables: soln.get("x");
            Literal lit;
            for (int i = 0; i < names.size(); i++) {
                lit = soln.getLiteral("param" + i);// Get a result variable by name.
                String temporary = String.valueOf(lit).substring(0, String.valueOf(lit).indexOf("^^"));
                testString += temporary;
            }
            String longer = testString, shorter = originalString;
            if (testString.length() < originalString.length()) { // longer should always have greater length
                longer = originalString;
                shorter = testString;
            }
            int longerLength = longer.length();
            System.out.println("Similarity between:" + originalString + " and " + testString + " is:");
            current = (longerLength - StringUtils.getLevenshteinDistance(longer, shorter))
                    / (double) longerLength;
            System.out.println(current + " out of 1.0.");
            if (similarity < current) {
                similarity = current;
                returned.clear();
                returned.put("message", soln.getLiteral("message").getString());
                returned.put("URI", soln.getLiteral("URI").getString());
                returned.put("handle", soln.getLiteral("handle").getString());
            }
        }
    } catch (Exception e) {
        System.out.println("Search is interrupted by an error.");
    }
    m.close();
    return returned;
}