List of usage examples for org.apache.commons.lang3 StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(CharSequence s, CharSequence t)
From source file:gt.org.ms.api.requesting.ValidationsHelper.java
public static <T> T findBestMatchItem(final String param, List<T> options) { List<MatchItem> weights = new ArrayList<MatchItem>( Collections2.transform(options, new Function<T, MatchItem>() { @Override/*from w w w . j av a 2s . c om*/ public MatchItem apply(T f) { return new MatchItem( StringUtils.getLevenshteinDistance(param.toLowerCase(), f.toString().toLowerCase()), f); } })); Collections.sort(weights); //fot those objects without tostring implementation it will be wrong so better return null return (T) weights.get(0).value; }
From source file:com.hpe.caf.worker.testing.ContentComparer.java
public static double calculateSimilarity(String s1, String s2) { String longer = s1, shorter = s2; if (s1.length() < s2.length()) { // longer should always have greater length longer = s2;/*from w w w. j a v a 2 s . com*/ shorter = s1; } int longerLength = longer.length(); if (longerLength == 0) { return 1.0; /* both strings are zero length */ } return (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) / (double) longerLength; }
From source file:com.ijuru.ijambo.Utils.java
/** * Scrambles the letters of a word. Shuffles the word twice and returns * the result with the highest levenshtein distance from the original * @param word the word// w ww . jav a 2 s . c o m * @return the scrambled word */ public static String scrambleWord(String word) { List<String> chars1 = Arrays.asList(word.split("")); List<String> chars2 = new ArrayList<String>(chars1); Collections.shuffle(chars1); Collections.shuffle(chars2); String scramble1 = StringUtils.join(chars1, ""); String scramble2 = StringUtils.join(chars2, ""); int dist1 = StringUtils.getLevenshteinDistance(word, scramble1); int dist2 = StringUtils.getLevenshteinDistance(word, scramble2); return (dist1 > dist2) ? scramble1 : scramble2; }
From source file:cognition.common.utils.StringTools.java
public static int getLevenshteinDistance(String str1, String str2) { return StringUtils.getLevenshteinDistance(str1, str2); }
From source file:ch.uzh.ifi.seal.changedistiller.treedifferencing.matching.measure.LevenshteinSimilarityCalculator.java
@Override public double calculateSimilarity(String left, String right) { double levenshteinDistance = StringUtils.getLevenshteinDistance(left, right); double worstCaseDistance = calculateWorstCaseDistance(left, right); if (worstCaseDistance != 0d) { return (worstCaseDistance - levenshteinDistance) / worstCaseDistance; }//w w w . j a va 2 s. c o m return 0d; }
From source file:kenh.expl.functions.GetLevenshteinDistance.java
public int process(String str1, String str2) { try {//from w w w .j ava 2s .com return StringUtils.getLevenshteinDistance(str1, str2); } catch (Exception e) { return 0; } }
From source file:de.undercouch.citeproc.helper.Levenshtein.java
/** * Searches the given collection of strings and returns the string that * has the lowest Levenshtein distance to a given second string <code>t</code>. * If the collection contains multiple strings with the same distance to * <code>t</code> only the first one will be returned. * @param <T> the type of the strings in the given collection * @param ss the collection to search/*from www . jav a2 s. co m*/ * @param t the second string * @return the string with the lowest Levenshtein distance */ public static <T extends CharSequence> T findMinimum(Collection<T> ss, CharSequence t) { int min = Integer.MAX_VALUE; T result = null; for (T s : ss) { int d = StringUtils.getLevenshteinDistance(s, t); if (d < min) { min = d; result = s; } } return result; }
From source file:changenodes.matching.calculators.LevenshteinSimilarityCalculator.java
public double calculateSimilarity(String left, String right) { double levenshteinDistance = StringUtils.getLevenshteinDistance(left, right); double worstCaseDistance = calculateWorstCaseDistance(left, right); if (worstCaseDistance != 0d) { return (worstCaseDistance - levenshteinDistance) / worstCaseDistance; }//from w w w.j a v a2s . c o m return 0d; }
From source file:com.screenslicer.core.util.StringUtil.java
public static int dist(String str1, String str2) { if (distCache.size() > MAX_DIST_CACHE) { distCache.clear();// w w w . ja va2 s. c o m } String cacheKey = str1 + "<<>>" + str2; if (distCache.containsKey(cacheKey)) { return distCache.get(cacheKey); } int dist = StringUtils.getLevenshteinDistance(str1, str2); distCache.put(cacheKey, dist); return dist; }
From source file:de.undercouch.citeproc.helper.Levenshtein.java
/** * Searches the given collection of strings and returns a collection of at * most <code>n</code> strings that have the lowest Levenshtein distance * to a given string <code>t</code>. The returned collection will be * sorted according to the distance with the string with the lowest * distance at the first position.//from www.ja v a2 s .c o m * @param <T> the type of the strings in the given collection * @param ss the collection to search * @param t the string to compare to * @param n the maximum number of strings to return * @param threshold a threshold for individual item distances. Only items * with a distance below this threshold will be included in the result. * @return the strings with the lowest Levenshtein distance */ public static <T extends CharSequence> Collection<T> findMinimum(Collection<T> ss, CharSequence t, int n, int threshold) { LinkedList<Item<T>> result = new LinkedList<Item<T>>(); for (T s : ss) { int d = StringUtils.getLevenshteinDistance(s, t); if (d < threshold) { result.offer(new Item<T>(s, d)); if (result.size() > n + 10) { //resort, but not too often Collections.sort(result); while (result.size() > n) result.removeLast(); } } } Collections.sort(result); while (result.size() > n) result.removeLast(); List<T> arr = new ArrayList<T>(n); for (Item<T> i : result) { arr.add(i.str); } return arr; }