Java String Levenshtein Distance levenshteinDistance(String wordForm, String lemma)

Here you can find the source of levenshteinDistance(String wordForm, String lemma)

Description

Computes the Levenshtein distance of two strings in a matrix.

License

Apache License

Parameter

Parameter Description
wordForm the form
lemma the lemma

Return

the distance

Declaration

public static int[][] levenshteinDistance(String wordForm, String lemma) 

Method Source Code

//package com.java2s;
/*/*from  www.ja v  a2s. co  m*/
 *Copyright 2016 Rodrigo Agerri
    
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    
   http://www.apache.org/licenses/LICENSE-2.0
    
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
 */

public class Main {
    /**
     * Computes the Levenshtein distance of two strings in a matrix.
     * Based on pseudo-code provided here:
     * https://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance
     * which in turn is based on the paper Wagner, Robert A.; Fischer, Michael J. (1974),
     * "The String-to-String Correction Problem", Journal of the ACM 21 (1): 168-173
     * @param wordForm the form
     * @param lemma the lemma
     * @return the distance
     */
    public static int[][] levenshteinDistance(String wordForm, String lemma) {

        int wordLength = wordForm.length();
        int lemmaLength = lemma.length();
        int cost;
        int[][] distance = new int[wordLength + 1][lemmaLength + 1];

        if (wordLength == 0) {
            return distance;
        }
        if (lemmaLength == 0) {
            return distance;
        }
        //fill in the rows of column 0
        for (int i = 0; i <= wordLength; i++) {
            distance[i][0] = i;
        }
        //fill in the columns of row 0
        for (int j = 0; j <= lemmaLength; j++) {
            distance[0][j] = j;
        }
        //fill in the rest of the matrix calculating the minimum distance
        for (int i = 1; i <= wordLength; i++) {
            int s_i = wordForm.charAt(i - 1);
            for (int j = 1; j <= lemmaLength; j++) {
                if (s_i == lemma.charAt(j - 1)) {
                    cost = 0;
                } else {
                    cost = 1;
                }
                //obtain minimum distance from calculating deletion, insertion, substitution
                distance[i][j] = minimum(distance[i - 1][j] + 1, distance[i][j - 1] + 1,
                        distance[i - 1][j - 1] + cost);
            }
        }
        return distance;
    }

    /**
     * Get mininum of three values.
     * @param a number a
     * @param b number b
     * @param c number c
     * @return the minimum
     */
    private static int minimum(int a, int b, int c) {
        int minValue;
        minValue = a;
        if (b < minValue) {
            minValue = b;
        }
        if (c < minValue) {
            minValue = c;
        }
        return minValue;
    }
}

Related

  1. levenshteinDistance(String s, String t, int limit)
  2. levenshteinDistance(String s1, String s2)
  3. levenshteinDistance(String s1, String s2)
  4. levenshteinDistance(String st1, String st2)
  5. levenshteinDistance(String string1, String string2)
  6. levenshteinDistance(String x, String y)
  7. levenshteinDistance(String[] a, String[] b)
  8. levenshteinDistanceRatio(CharSequence lhs, CharSequence rhs)
  9. levenshteinEquals(double threshold, String dom1, String dom2)