Example usage for org.apache.commons.codec.language RefinedSoundex difference

List of usage examples for org.apache.commons.codec.language RefinedSoundex difference

Introduction

In this page you can find the example usage for org.apache.commons.codec.language RefinedSoundex difference.

Prototype

public int difference(String s1, String s2) throws EncoderException 

Source Link

Document

Returns the number of characters in the two encoded Strings that are the same.

Usage

From source file:org.eobjects.datacleaner.phonetic.PhoneticSimilarityFinder.java

public boolean matches(String value, SimilarityGroup similarityGroup) {
    // first do exact matching
    for (String similarityGroupValue : similarityGroup.getValues()) {
        if (value.equals(similarityGroupValue)) {
            return true;
        }/*from  w w  w .  j  a v a2 s  . c  om*/
    }

    Soundex soundex = new Soundex();
    RefinedSoundex refinedSoundex = new RefinedSoundex();
    Metaphone metaphone = new Metaphone();

    double threshold;
    if (matchMode == MatchMode.STRICT) {
        threshold = STRICT_SIMILARITY_THRESHOLD;
    } else {
        threshold = LOOSE_SIMILARITY_THRESHOLD;
    }
    int soundexThreshold = (int) Math.round(threshold * 4);

    for (String similarityGroupValue : similarityGroup.getValues()) {
        boolean metaphoneEquals = metaphone.isMetaphoneEqual(value, similarityGroupValue);
        if (metaphoneEquals) {
            return true;
        }

        try {
            int soundexDiff = soundex.difference(value, similarityGroupValue);

            if (soundexDiff >= soundexThreshold) {
                return true;
            }
        } catch (Exception e) {
            logger.error("Could not determine soundex difference", e);
        }

        int refinedSoundexThreshold = (int) Math
                .round(threshold * Math.min(value.length(), similarityGroupValue.length()));

        try {
            int refinedSoundexDiff = refinedSoundex.difference(value, similarityGroupValue);

            if (refinedSoundexDiff >= refinedSoundexThreshold) {
                return true;
            }
        } catch (Exception e) {
            logger.error("Could not determine refined soundex difference", e);
        }
    }

    return false;
}