Example usage for org.apache.commons.codec.language Soundex difference

List of usage examples for org.apache.commons.codec.language Soundex difference

Introduction

In this page you can find the example usage for org.apache.commons.codec.language Soundex difference.

Prototype

public int difference(String s1, String s2) throws EncoderException 

Source Link

Document

Encodes the Strings and returns the number of characters in the two encoded Strings that are the same.

Usage

From source file:com.plugin.UI.Windows.BiblePeopleSearchResultsDialog.java

/**
 * Checks if is soundex calc.//  w ww.j a v a2  s .  c om
 * 
 * @param str the str
 * @param _term the _term
 * 
 * @return true, if is soundex calc
 */
private boolean isSoundexCalc(String str, String _term) {
    Soundex s = new Soundex();
    try {
        if (s.difference(str, _term) == 4)
            return true;
    } catch (Exception e) {
        return false;
    }
    if (str.indexOf(" ") != -1) {
        String[] str2 = str.split("[ ]");
        for (int i = 0; i < str2.length; i++) {

            try {
                if (s.difference(str.split("[ ]")[i], _term) == 4)
                    return true;
            } catch (Exception e) {
                return false;
            }
        }
    }

    return false;
}

From source file:org.eobjects.datacleaner.phonetic.PhoneticSimilarityFinder.java

public boolean matches(String value, SimilarityGroup similarityGroup) {
    // first do exact matching
    for (String similarityGroupValue : similarityGroup.getValues()) {
        if (value.equals(similarityGroupValue)) {
            return true;
        }//  w  ww.j  a v  a 2s  .  c o m
    }

    Soundex soundex = new Soundex();
    RefinedSoundex refinedSoundex = new RefinedSoundex();
    Metaphone metaphone = new Metaphone();

    double threshold;
    if (matchMode == MatchMode.STRICT) {
        threshold = STRICT_SIMILARITY_THRESHOLD;
    } else {
        threshold = LOOSE_SIMILARITY_THRESHOLD;
    }
    int soundexThreshold = (int) Math.round(threshold * 4);

    for (String similarityGroupValue : similarityGroup.getValues()) {
        boolean metaphoneEquals = metaphone.isMetaphoneEqual(value, similarityGroupValue);
        if (metaphoneEquals) {
            return true;
        }

        try {
            int soundexDiff = soundex.difference(value, similarityGroupValue);

            if (soundexDiff >= soundexThreshold) {
                return true;
            }
        } catch (Exception e) {
            logger.error("Could not determine soundex difference", e);
        }

        int refinedSoundexThreshold = (int) Math
                .round(threshold * Math.min(value.length(), similarityGroupValue.length()));

        try {
            int refinedSoundexDiff = refinedSoundex.difference(value, similarityGroupValue);

            if (refinedSoundexDiff >= refinedSoundexThreshold) {
                return true;
            }
        } catch (Exception e) {
            logger.error("Could not determine refined soundex difference", e);
        }
    }

    return false;
}

From source file:sandra.examples.oneshot.voicelaunch.VoiceLaunch.java

/**
 * Compares the names using their phonetic similarity, using the soundex algorithm.
 * We have used an implementation of this algorithm provided by Apache.
 * Attention: it only works for English/*  w ww. j  a v  a  2s.co m*/
 */
private double comparePhonetic(String recognizedApp, String nameApp) {
    Soundex soundex = new Soundex();

    //Returns the number of characters in the two encoded Strings that are the same. 
    //This return value ranges from 0 to the length of the shortest encoded String: 0 indicates little or no similarity, 
    //and 4 out of 4 (for example) indicates strong similarity or identical values. 
    double sim = 0;
    try {
        sim = soundex.difference(recognizedApp, nameApp);
    } catch (Exception e) {
        Log.e(LOGTAG, "Error during soundex encoding. Similarity forced to 0");
        sim = 0;
    }
    return sim / 4;
}