List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(String s, String t)
Find the Levenshtein distance between two Strings.
From source file:com.rslakra.java.string.TestApacheStringUtils.java
public static void main(String args[]) { System.err.println(StringUtils.abbreviate("Take time off working", 0, 10)); System.err.println(StringUtils.capitalize("vanderLust")); System.err.println(StringUtils.center("MTV", 7, '=')); System.err.println(StringUtils.chomp("temperature", "ure")); System.err.println(StringUtils.chop("Dane")); System.err.println(StringUtils.contains("Dorothy", "oro")); System.err.println(StringUtils.containsNone("r u m t", new char[] { 'r', 'o' })); System.err.println(StringUtils.containsOnly("r u m t", new char[] { 'r', 'o' })); System.err.println(StringUtils.countMatches("arthur", "r")); System.err.println(StringUtils.deleteWhitespace("f f f f")); System.err.println(StringUtils.difference("govern", "government")); System.err.println(StringUtils.getLevenshteinDistance("govern", "government")); }
From source file:com.apexxs.neonblack.scoring.StringScorers.java
public Float getLevenshtien(String s1, String s2) { Integer ldScore = StringUtils.getLevenshteinDistance(s1.toLowerCase(), s2.toLowerCase()); Float ld = (float) ldScore; int maxLength = Math.max(s1.length(), s2.length()); ld = 1.0f - (ld / (float) maxLength); return ld;/*from ww w . j a va 2s. c o m*/ }
From source file:com.vangent.hieos.empi.distance.LevenshteinDistanceFunction.java
/** * // ww w. j a va2 s .co m * @param s1 * @param s2 * @return */ public double getDistance(String s1, String s2) { if (s1 == null && s2 == null) { // If both are null, return 1.0 (exact match). return 1.0; } if (s1 == null || s2 == null) { // If one of the strings is null, return 0.0 (no match). return 0.0; } int lens1 = s1.length(); int lens2 = s2.length(); if (lens1 == 0 && lens2 == 0) { // Both are the empty string, return 1.0 (exact match). return 1.0; } // Compute Levenshtein Distance. int distance = StringUtils.getLevenshteinDistance(s1, s2); // Now normalize between 0.0 and 1.0 int maxlen = lens1 >= lens2 ? lens1 : lens2; // maxlen = 0 should never happen. return 1.0 - ((double) distance / (double) maxlen); }
From source file:edu.umd.cs.psl.ui.functions.textsimilarity.LevenshteinStringSimilarity.java
@Override public double similarity(String s1, String s2) { int maxLen = Math.max(s1.length(), s2.length()); if (maxLen == 0) return 1.0; double ldist = StringUtils.getLevenshteinDistance(s1, s2); double sim = 1.0 - (ldist / maxLen); if (sim > similarityThreshold) return sim; return 0.0;//w w w . j a v a2s .c om }
From source file:edu.mayo.cts2.framework.filter.match.LevenshteinDistanceMatcher.java
@Override protected float doMatchScore(String matchText, String cadidate) { int longestString = Math.max(matchText.length(), cadidate.length()); int score = StringUtils.getLevenshteinDistance(matchText, cadidate); return this.normalize(score, longestString); }
From source file:edu.umd.cs.psl.ui.functions.textsimilarity.LevenshteinSimilarity.java
@Override public double getValue(ReadOnlyDatabase db, GroundTerm... args) { String a = ((StringAttribute) args[0]).getValue(); String b = ((StringAttribute) args[1]).getValue(); int maxLen = Math.max(a.length(), b.length()); if (maxLen == 0) return 1.0; double ldist = StringUtils.getLevenshteinDistance(a, b); double sim = 1.0 - (ldist / maxLen); if (sim > similarityThreshold) return sim; return 0.0;//from w ww . jav a2s .co m }
From source file:com.quui.chat.Preprocessor.java
/** * Cleans all occurences and almost-occurences (Levenshtein Distance) of * nick in message.// w ww. j av a2 s. c om * @param message The message to clean * @param nick The nick to clean from the message * @return The cleaned message */ static public String clean(String message, String nick) { String[] toks = message.toLowerCase().split("[^?!'\\p{L}]"); String[] nickToks = nick.toLowerCase().split("[^\\p{L}]"); for (int j = 0; j < toks.length; j++) { for (int i = 0; i < nickToks.length; i++) { int dist = StringUtils.getLevenshteinDistance(toks[j], nickToks[i]); if (dist < 2 && toks[j].length() > 3 && nickToks[i].length() > 3) {// || Log.logger .debug("Cutting out, L-Dist zw. " + toks[j] + " und " + nickToks[i] + " ist: " + dist); toks[j] = ""; } } } String result = ""; for (int j = 0; j < toks.length; j++) { result = (result + toks[j].trim()).trim() + " "; } return result.trim(); }
From source file:it.acubelab.smaph.SmaphUtils.java
/** * @param tokenB/* w w w . ja v a2 s . c om*/ * a word. * @param tokenQ * another word. * @return the normalized edit distance between tokenB and tokenQ. */ public static float getNormEditDistance(String tokenB, String tokenQ) { if (tokenQ.isEmpty() || tokenB.isEmpty()) return 1; int lev = StringUtils.getLevenshteinDistance(tokenB, tokenQ); return (float) lev / (float) Math.max(tokenB.length(), tokenQ.length()); }
From source file:com.intellectualcrafters.plot.util.StringComparison.java
/** * Compare two strings/*from w w w .j av a 2 s. com*/ * * @param s1 String Base * @param s2 Object * * @return match */ public static int compare(final String s1, final String s2) { int distance = StringUtils.getLevenshteinDistance(s1, s2); if (s2.contains(s1)) { distance -= (Math.min(s1.length(), s2.length())); } if (s2.startsWith(s1)) { distance -= 4; } return distance; }
From source file:com.github.stagirs.lingvo.morph.MorphPredictor.java
public static Morph get(String word) { MorphStateMachine.State state = MorphStateMachine.begin(); int finish = word.length() - 1; for (; finish >= word.length() - 2; finish--) { MorphStateMachine.State curState = MorphStateMachine.getState(state, word.charAt(finish)); if (curState == null) { break; }/* ww w. j a va 2 s. c o m*/ state = curState; } MorphIterator iterator = new MorphIterator(state); Morph minMorph = null; int minDistanceLev = Integer.MAX_VALUE; int minDistanceSoundex = Integer.MAX_VALUE; DaitchMokotoffSoundex soundex = new DaitchMokotoffSoundex(); String translitWord = toTranslit(word); byte[] encoded = getBytes(soundex, translitWord); while (iterator.hasNext()) { Morph morph = iterator.next(); int distanceLev = StringUtils.getLevenshteinDistance(word, morph.getRaw()); if (minDistanceLev < distanceLev) { continue; } int distanceSoundex = getDistance(soundex, morph.getRaw(), encoded); if (minDistanceLev == distanceLev && minDistanceSoundex < distanceSoundex) { continue; } minMorph = morph; minDistanceLev = distanceLev; minDistanceSoundex = distanceSoundex; } if (minMorph == null) { return null; } minMorph.setWord(word); return minMorph; }