Example usage for org.apache.commons.lang StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(String s, String t) 

Source Link

Document

Find the Levenshtein distance between two Strings.

Usage

From source file:com.rslakra.java.string.TestApacheStringUtils.java

public static void main(String args[]) {

    System.err.println(StringUtils.abbreviate("Take time off working", 0, 10));
    System.err.println(StringUtils.capitalize("vanderLust"));
    System.err.println(StringUtils.center("MTV", 7, '='));
    System.err.println(StringUtils.chomp("temperature", "ure"));
    System.err.println(StringUtils.chop("Dane"));
    System.err.println(StringUtils.contains("Dorothy", "oro"));
    System.err.println(StringUtils.containsNone("r u m t", new char[] { 'r', 'o' }));
    System.err.println(StringUtils.containsOnly("r u m t", new char[] { 'r', 'o' }));
    System.err.println(StringUtils.countMatches("arthur", "r"));
    System.err.println(StringUtils.deleteWhitespace("f f f f"));
    System.err.println(StringUtils.difference("govern", "government"));
    System.err.println(StringUtils.getLevenshteinDistance("govern", "government"));

}

From source file:com.apexxs.neonblack.scoring.StringScorers.java

public Float getLevenshtien(String s1, String s2) {
    Integer ldScore = StringUtils.getLevenshteinDistance(s1.toLowerCase(), s2.toLowerCase());
    Float ld = (float) ldScore;

    int maxLength = Math.max(s1.length(), s2.length());

    ld = 1.0f - (ld / (float) maxLength);
    return ld;/*from  ww  w . j a  va  2s.  c o m*/
}

From source file:com.vangent.hieos.empi.distance.LevenshteinDistanceFunction.java

/**
 * //  ww  w. j a  va2 s  .co m
 * @param s1
 * @param s2
 * @return
 */
public double getDistance(String s1, String s2) {
    if (s1 == null && s2 == null) {
        // If both are null, return 1.0 (exact match).
        return 1.0;
    }
    if (s1 == null || s2 == null) {
        // If one of the strings is null, return 0.0 (no match).
        return 0.0;
    }
    int lens1 = s1.length();
    int lens2 = s2.length();
    if (lens1 == 0 && lens2 == 0) {
        // Both are the empty string, return 1.0 (exact match).
        return 1.0;
    }

    // Compute Levenshtein Distance.
    int distance = StringUtils.getLevenshteinDistance(s1, s2);

    // Now normalize between 0.0 and 1.0
    int maxlen = lens1 >= lens2 ? lens1 : lens2;

    // maxlen = 0 should never happen.
    return 1.0 - ((double) distance / (double) maxlen);
}

From source file:edu.umd.cs.psl.ui.functions.textsimilarity.LevenshteinStringSimilarity.java

@Override
public double similarity(String s1, String s2) {

    int maxLen = Math.max(s1.length(), s2.length());
    if (maxLen == 0)
        return 1.0;

    double ldist = StringUtils.getLevenshteinDistance(s1, s2);
    double sim = 1.0 - (ldist / maxLen);

    if (sim > similarityThreshold)
        return sim;

    return 0.0;//w  w  w  . j  a v  a2s .c om
}

From source file:edu.mayo.cts2.framework.filter.match.LevenshteinDistanceMatcher.java

@Override
protected float doMatchScore(String matchText, String cadidate) {
    int longestString = Math.max(matchText.length(), cadidate.length());

    int score = StringUtils.getLevenshteinDistance(matchText, cadidate);
    return this.normalize(score, longestString);
}

From source file:edu.umd.cs.psl.ui.functions.textsimilarity.LevenshteinSimilarity.java

@Override
public double getValue(ReadOnlyDatabase db, GroundTerm... args) {

    String a = ((StringAttribute) args[0]).getValue();
    String b = ((StringAttribute) args[1]).getValue();

    int maxLen = Math.max(a.length(), b.length());
    if (maxLen == 0)
        return 1.0;

    double ldist = StringUtils.getLevenshteinDistance(a, b);
    double sim = 1.0 - (ldist / maxLen);

    if (sim > similarityThreshold)
        return sim;

    return 0.0;//from w ww  .  jav  a2s .co m
}

From source file:com.quui.chat.Preprocessor.java

/**
 * Cleans all occurences and almost-occurences (Levenshtein Distance) of
 * nick in message.//  w ww. j  av  a2 s. c om
 * @param message The message to clean
 * @param nick The nick to clean from the message
 * @return The cleaned message
 */
static public String clean(String message, String nick) {

    String[] toks = message.toLowerCase().split("[^?!'\\p{L}]");
    String[] nickToks = nick.toLowerCase().split("[^\\p{L}]");
    for (int j = 0; j < toks.length; j++) {
        for (int i = 0; i < nickToks.length; i++) {
            int dist = StringUtils.getLevenshteinDistance(toks[j], nickToks[i]);
            if (dist < 2 && toks[j].length() > 3 && nickToks[i].length() > 3) {// ||
                Log.logger
                        .debug("Cutting out, L-Dist zw. " + toks[j] + " und " + nickToks[i] + " ist: " + dist);
                toks[j] = "";
            }
        }

    }
    String result = "";
    for (int j = 0; j < toks.length; j++) {
        result = (result + toks[j].trim()).trim() + " ";
    }
    return result.trim();
}

From source file:it.acubelab.smaph.SmaphUtils.java

/**
 * @param tokenB/* w w w  . ja v  a2 s .  c  om*/
 *            a word.
 * @param tokenQ
 *            another word.
 * @return the normalized edit distance between tokenB and tokenQ.
 */
public static float getNormEditDistance(String tokenB, String tokenQ) {
    if (tokenQ.isEmpty() || tokenB.isEmpty())
        return 1;
    int lev = StringUtils.getLevenshteinDistance(tokenB, tokenQ);
    return (float) lev / (float) Math.max(tokenB.length(), tokenQ.length());
}

From source file:com.intellectualcrafters.plot.util.StringComparison.java

/**
 * Compare two strings/*from w  w w  .j av a 2  s. com*/
 *
 * @param s1 String Base
 * @param s2 Object
 *
 * @return match
 */
public static int compare(final String s1, final String s2) {
    int distance = StringUtils.getLevenshteinDistance(s1, s2);
    if (s2.contains(s1)) {
        distance -= (Math.min(s1.length(), s2.length()));
    }
    if (s2.startsWith(s1)) {
        distance -= 4;
    }
    return distance;
}

From source file:com.github.stagirs.lingvo.morph.MorphPredictor.java

public static Morph get(String word) {
    MorphStateMachine.State state = MorphStateMachine.begin();
    int finish = word.length() - 1;
    for (; finish >= word.length() - 2; finish--) {
        MorphStateMachine.State curState = MorphStateMachine.getState(state, word.charAt(finish));
        if (curState == null) {
            break;
        }/*  ww w.  j a  va 2 s. c o  m*/
        state = curState;
    }
    MorphIterator iterator = new MorphIterator(state);
    Morph minMorph = null;
    int minDistanceLev = Integer.MAX_VALUE;
    int minDistanceSoundex = Integer.MAX_VALUE;
    DaitchMokotoffSoundex soundex = new DaitchMokotoffSoundex();
    String translitWord = toTranslit(word);
    byte[] encoded = getBytes(soundex, translitWord);
    while (iterator.hasNext()) {
        Morph morph = iterator.next();
        int distanceLev = StringUtils.getLevenshteinDistance(word, morph.getRaw());
        if (minDistanceLev < distanceLev) {
            continue;
        }
        int distanceSoundex = getDistance(soundex, morph.getRaw(), encoded);
        if (minDistanceLev == distanceLev && minDistanceSoundex < distanceSoundex) {
            continue;
        }
        minMorph = morph;
        minDistanceLev = distanceLev;
        minDistanceSoundex = distanceSoundex;
    }
    if (minMorph == null) {
        return null;
    }
    minMorph.setWord(word);
    return minMorph;
}