Example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(CharSequence s, CharSequence t) 

Source Link

Document

<p>Find the Levenshtein distance between two Strings.</p> <p>This is the number of changes needed to change one String into another, where each change is a single character modification (deletion, insertion or substitution).</p> <p>The previous implementation of the Levenshtein distance algorithm was from <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p> <p>Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which can occur when my Java implementation is used with very large strings.<br> This implementation of the Levenshtein distance algorithm is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p> <pre> StringUtils.getLevenshteinDistance(null, *) = IllegalArgumentException StringUtils.getLevenshteinDistance(*, null) = IllegalArgumentException StringUtils.getLevenshteinDistance("","") = 0 StringUtils.getLevenshteinDistance("","a") = 1 StringUtils.getLevenshteinDistance("aaapppp", "") = 7 StringUtils.getLevenshteinDistance("frog", "fog") = 1 StringUtils.getLevenshteinDistance("fly", "ant") = 3 StringUtils.getLevenshteinDistance("elephant", "hippo") = 7 StringUtils.getLevenshteinDistance("hippo", "elephant") = 7 StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 StringUtils.getLevenshteinDistance("hello", "hallo") = 1 </pre>

Usage

From source file:gt.org.ms.api.requesting.ValidationsHelper.java

public static <T> T findBestMatchItem(final String param, List<T> options) {
    List<MatchItem> weights = new ArrayList<MatchItem>(
            Collections2.transform(options, new Function<T, MatchItem>() {
                @Override/*from w w w .  j av  a 2s  .  c  om*/
                public MatchItem apply(T f) {
                    return new MatchItem(
                            StringUtils.getLevenshteinDistance(param.toLowerCase(), f.toString().toLowerCase()),
                            f);
                }
            }));

    Collections.sort(weights);
    //fot those objects without tostring implementation it will be wrong so better return null
    return (T) weights.get(0).value;
}

From source file:com.hpe.caf.worker.testing.ContentComparer.java

public static double calculateSimilarity(String s1, String s2) {
    String longer = s1, shorter = s2;
    if (s1.length() < s2.length()) { // longer should always have greater length
        longer = s2;/*from   w  w w. j  a  v a 2  s  .  com*/
        shorter = s1;
    }
    int longerLength = longer.length();
    if (longerLength == 0) {
        return 1.0;
        /* both strings are zero length */ }
    return (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) / (double) longerLength;
}

From source file:com.ijuru.ijambo.Utils.java

/**
 * Scrambles the letters of a word. Shuffles the word twice and returns
 * the result with the highest levenshtein distance from the original
 * @param word the word// w  ww . jav  a 2  s .  c o m
 * @return the scrambled word
 */
public static String scrambleWord(String word) {
    List<String> chars1 = Arrays.asList(word.split(""));
    List<String> chars2 = new ArrayList<String>(chars1);

    Collections.shuffle(chars1);
    Collections.shuffle(chars2);

    String scramble1 = StringUtils.join(chars1, "");
    String scramble2 = StringUtils.join(chars2, "");

    int dist1 = StringUtils.getLevenshteinDistance(word, scramble1);
    int dist2 = StringUtils.getLevenshteinDistance(word, scramble2);

    return (dist1 > dist2) ? scramble1 : scramble2;
}

From source file:cognition.common.utils.StringTools.java

public static int getLevenshteinDistance(String str1, String str2) {
    return StringUtils.getLevenshteinDistance(str1, str2);
}

From source file:ch.uzh.ifi.seal.changedistiller.treedifferencing.matching.measure.LevenshteinSimilarityCalculator.java

@Override
public double calculateSimilarity(String left, String right) {
    double levenshteinDistance = StringUtils.getLevenshteinDistance(left, right);
    double worstCaseDistance = calculateWorstCaseDistance(left, right);
    if (worstCaseDistance != 0d) {
        return (worstCaseDistance - levenshteinDistance) / worstCaseDistance;
    }//w w w  . j  a  va  2 s.  c o  m
    return 0d;
}

From source file:kenh.expl.functions.GetLevenshteinDistance.java

public int process(String str1, String str2) {
    try {//from w  w  w  .j ava  2s .com
        return StringUtils.getLevenshteinDistance(str1, str2);
    } catch (Exception e) {
        return 0;
    }
}

From source file:de.undercouch.citeproc.helper.Levenshtein.java

/**
 * Searches the given collection of strings and returns the string that
 * has the lowest Levenshtein distance to a given second string <code>t</code>.
 * If the collection contains multiple strings with the same distance to
 * <code>t</code> only the first one will be returned.
 * @param <T> the type of the strings in the given collection
 * @param ss the collection to search/*from  www  .  jav a2  s. co  m*/
 * @param t the second string
 * @return the string with the lowest Levenshtein distance
 */
public static <T extends CharSequence> T findMinimum(Collection<T> ss, CharSequence t) {
    int min = Integer.MAX_VALUE;
    T result = null;
    for (T s : ss) {
        int d = StringUtils.getLevenshteinDistance(s, t);
        if (d < min) {
            min = d;
            result = s;
        }
    }
    return result;
}

From source file:changenodes.matching.calculators.LevenshteinSimilarityCalculator.java

public double calculateSimilarity(String left, String right) {
    double levenshteinDistance = StringUtils.getLevenshteinDistance(left, right);
    double worstCaseDistance = calculateWorstCaseDistance(left, right);
    if (worstCaseDistance != 0d) {
        return (worstCaseDistance - levenshteinDistance) / worstCaseDistance;
    }//from   w w w.j a v a2s .  c o  m
    return 0d;
}

From source file:com.screenslicer.core.util.StringUtil.java

public static int dist(String str1, String str2) {
    if (distCache.size() > MAX_DIST_CACHE) {
        distCache.clear();//  w  w  w . ja va2 s.  c  o m
    }
    String cacheKey = str1 + "<<>>" + str2;
    if (distCache.containsKey(cacheKey)) {
        return distCache.get(cacheKey);
    }
    int dist = StringUtils.getLevenshteinDistance(str1, str2);
    distCache.put(cacheKey, dist);
    return dist;
}

From source file:de.undercouch.citeproc.helper.Levenshtein.java

/**
 * Searches the given collection of strings and returns a collection of at
 * most <code>n</code> strings that have the lowest Levenshtein distance
 * to a given string <code>t</code>. The returned collection will be
 * sorted according to the distance with the string with the lowest
 * distance at the first position.//from   www.ja  v a2 s  .c  o m
 * @param <T> the type of the strings in the given collection
 * @param ss the collection to search
 * @param t the string to compare to
 * @param n the maximum number of strings to return
 * @param threshold a threshold for individual item distances. Only items
 * with a distance below this threshold will be included in the result.
 * @return the strings with the lowest Levenshtein distance
 */
public static <T extends CharSequence> Collection<T> findMinimum(Collection<T> ss, CharSequence t, int n,
        int threshold) {
    LinkedList<Item<T>> result = new LinkedList<Item<T>>();
    for (T s : ss) {
        int d = StringUtils.getLevenshteinDistance(s, t);
        if (d < threshold) {
            result.offer(new Item<T>(s, d));

            if (result.size() > n + 10) {
                //resort, but not too often
                Collections.sort(result);
                while (result.size() > n)
                    result.removeLast();
            }
        }
    }

    Collections.sort(result);
    while (result.size() > n)
        result.removeLast();

    List<T> arr = new ArrayList<T>(n);
    for (Item<T> i : result) {
        arr.add(i.str);
    }
    return arr;
}