Identifies whether two strings are close enough that they are likely to be intended to be the same string - Android java.lang

Android examples for java.lang:String Algorithm

Description

Identifies whether two strings are close enough that they are likely to be intended to be the same string

Demo Code

import android.support.v4.util.LruCache;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.regex.Pattern;

public class Main{

    /**/*from ww w .  j  ava 2  s .c  o  m*/
     * Identifies whether two strings are close enough that they are likely to be 
     * intended to be the same string. Fuzzy matching is only performed on strings that are
     * longer than a certain size.
     * 
     * 
     * @param a 
     * @param b
     * @return true if the two strings meet CommCare's fuzzy match definition, false otherwise.
     */
    public static boolean fuzzyMatch(String a, String b) {
        //tweakable parameter: Minimum length before edit distance
        //starts being used (this is probably not necessary, and
        //basically only makes sure that "at" doesn't match "or" or similar
        if (b.length() > 3) {
            int sizeDiff = Math.abs(a.length() - b.length());
            int distance = LevenshteinDistance(a, b);
            //tweakable parameter: edit distance past string length disparity
            if (distance <= 2) {
                return true;
            }
        }
        return false;
    }

    /**
     * Computes the Levenshtein Distance between two strings.
     * 
     * This code is sourced and unmodified from wikibooks under 
     * the Creative Commons attribution share-alike 3.0 license and
     * by be re-used under the terms of that license.
     * 
     * http://creativecommons.org/licenses/by-sa/3.0/
     * 
     * TODO: re-implement for efficiency/licensing possibly.
     * 
     * @param s0
     * @param s1
     * 
     * @return 
     */
    public static int LevenshteinDistance(String s0, String s1) {
        int len0 = s0.length() + 1;
        int len1 = s1.length() + 1;

        // the array of distances
        int[] cost = new int[len0];
        int[] newcost = new int[len0];

        // initial cost of skipping prefix in String s0
        for (int i = 0; i < len0; i++)
            cost[i] = i;

        // dynamicaly computing the array of distances

        // transformation cost for each letter in s1
        for (int j = 1; j < len1; j++) {

            // initial cost of skipping prefix in String s1
            newcost[0] = j - 1;

            // transformation cost for each letter in s0
            for (int i = 1; i < len0; i++) {

                // matching current letters in both strings
                int match = (s0.charAt(i - 1) == s1.charAt(j - 1)) ? 0 : 1;

                // computing cost for each transformation
                int cost_replace = cost[i - 1] + match;
                int cost_insert = cost[i] + 1;
                int cost_delete = newcost[i - 1] + 1;

                // keep minimum cost
                newcost[i] = Math.min(Math.min(cost_insert, cost_delete),
                        cost_replace);
            }

            // swap cost/newcost arrays
            int[] swap = cost;
            cost = newcost;
            newcost = swap;
        }

        // the distance is the cost for transforming all letters in both strings
        return cost[len0 - 1];
    }

}

Related Tutorials