Example usage for org.apache.commons.lang StringUtils getLevenshteinDistance

List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils getLevenshteinDistance.

Prototype

public static int getLevenshteinDistance(String s, String t) 

Source Link

Document

Find the Levenshtein distance between two Strings.

Usage

From source file:org.codesecure.dependencycheck.utils.SSDeep.java

static int score_strings(char[] s1, char[] s2, int block_size) {
    int score = 0;
    int len1, len2;

    len1 = s1.length;/*from   w w w . j av a2  s.  c  om*/
    len2 = s2.length;

    if (len1 > SPAMSUM_LENGTH || len2 > SPAMSUM_LENGTH) {
        /* not a real spamsum signature? */
        return 0;
    }

    /* the two strings must have a common substring of length
       ROLLING_WINDOW to be candidates */
    if (has_common_substring(s1, s2) == 0) {
        return 0;
    }

    /* compute the edit distance between the two strings. The edit distance gives
       us a pretty good idea of how closely related the two strings are */
    score = StringUtils.getLevenshteinDistance(new String(s1), new String(s2));

    /* scale the edit distance by the lengths of the two
       strings. This changes the score to be a measure of the
       proportion of the message that has changed rather than an
       absolute quantity. It also copes with the variability of
       the string lengths. */
    score = (score * SPAMSUM_LENGTH) / (len1 + len2);

    /* at this stage the score occurs roughly on a 0-64 scale,
     * with 0 being a good match and 64 being a complete
     * mismatch */

    /* rescale to a 0-100 scale (friendlier to humans) */
    score = (100 * score) / 64;

    /* it is possible to get a score above 100 here, but it is a
       really terrible match */
    if (score >= 100)
        return 0;

    /* now re-scale on a 0-100 scale with 0 being a poor match and
       100 being a excellent match. */
    score = 100 - score;

    //  printf ("len1: %"PRIu32"  len2: %"PRIu32"\n", len1, len2);

    /* when the blocksize is small we don't want to exaggerate the match size */
    if (score > block_size / MIN_BLOCKSIZE * Math.min(len1, len2)) {
        score = block_size / MIN_BLOCKSIZE * Math.min(len1, len2);
    }
    return score;
}

From source file:org.eclipse.jubula.rc.common.components.FindComponentBP.java

/**
 * computes the Equivalence of 2 names<p>
 * Example :<p>//  w  w  w .j a va  2  s  . c  o  m
 *  jButton1 <=> jButton1 = 100.0<p>
 *  jButton1 <=> jButton2 = 87.5<p>
 *  jButton1 <=> jTextField1 = 20.0 <p>
 *  jButton1 <=> jTextField2 = 10.0 <p>
 * @param name1 String
 * @param name2 String
 * @return  percentage as double
 */
private double getNameEquivalence(String name1, String name2) {
    int diff = StringUtils.getLevenshteinDistance(name1, name2);
    double nameEquivalence = 1.0 / Math.max(name1.length(), name2.length())
            * (Math.max(name1.length(), name2.length()) - diff);
    return nameEquivalence;
}

From source file:org.eclipse.smila.connectivity.framework.crawler.web.metadata.Metadata.java

/**
 * Get the normalized name of meta data attribute name. This method tries to find a well-known meta data name (one of
 * the meta data names defined in this class) that matches the specified name. The matching is error tolerent. For
 * instance,/*from  w w w  .j a  va2 s. c om*/
 * <ul>
 * <li>content-type gives Content-Type</li>
 * <li>CoNtEntType gives Content-Type</li>
 * <li>ConTnTtYpe gives Content-Type</li>
 * </ul>
 * If no matching with a well-known meta data name is found, then the original name is returned.
 * 
 * @param name
 *          Meta data attribute name.
 * @return String
 */
public static String getNormalizedName(final String name) {
    final String searched = normalize(name);
    String value = s_namesIdx.get(searched);

    if ((value == null) && (s_normalized != null)) {
        final int threshold = searched.length() / THRESHOLD_FACTOR;
        for (int i = 0; i < s_normalized.length && value == null; i++) {
            if (StringUtils.getLevenshteinDistance(searched, s_normalized[i]) < threshold) {
                value = s_namesIdx.get(s_normalized[i]);
            }
        }
    }
    if (value != null) {
        return value;
    } else {
        return name;
    }
}

From source file:org.eclipse.xtext.xbase.ui.quickfix.JavaTypeQuickfixes.java

protected boolean isSimilarTypeName(String s0, String s1) {
    double levenshteinDistance = StringUtils.getLevenshteinDistance(s0, s1);
    return levenshteinDistance <= 3;
}

From source file:org.gradle.execution.TaskNameResolvingBuildExecuter.java

private static Map<String, Collection<Task>> doSelect(GradleInternal gradle, Iterable<String> paths) {
    Project defaultProject = gradle.getDefaultProject();

    Map<String, Collection<Task>> allProjectsTasksByName = null;

    Map<String, Collection<Task>> matches = new LinkedHashMap<String, Collection<Task>>();
    for (String path : paths) {
        Map<String, Collection<Task>> tasksByName;
        String baseName;//from   w  ww  .j  av a 2 s.c o  m
        String prefix;

        if (path.contains(Project.PATH_SEPARATOR)) {
            prefix = StringUtils.substringBeforeLast(path, Project.PATH_SEPARATOR);
            prefix = prefix.length() == 0 ? Project.PATH_SEPARATOR : prefix;
            Project project = defaultProject.findProject(prefix);
            if (project == null) {
                throw new TaskSelectionException(
                        String.format("Project '%s' not found in %s.", prefix, defaultProject));
            }
            baseName = StringUtils.substringAfterLast(path, Project.PATH_SEPARATOR);
            Task match = project.getTasks().findByName(baseName);
            if (match != null) {
                matches.put(path, Collections.singleton(match));
                continue;
            }

            tasksByName = new HashMap<String, Collection<Task>>();
            for (Task task : project.getTasks().getAll()) {
                tasksByName.put(task.getName(), Collections.singleton(task));
            }
            prefix = prefix + Project.PATH_SEPARATOR;

        } else {
            Set<Task> tasks = defaultProject.getTasksByName(path, true);
            if (!tasks.isEmpty()) {
                matches.put(path, tasks);
                continue;
            }
            if (allProjectsTasksByName == null) {
                allProjectsTasksByName = buildTaskMap(defaultProject);
            }
            tasksByName = allProjectsTasksByName;
            baseName = path;
            prefix = "";
        }

        Pattern pattern = getPatternForName(baseName);
        Set<String> patternCandidates = new TreeSet<String>();
        Set<String> typoCandidates = new TreeSet<String>();
        for (String candidate : tasksByName.keySet()) {
            if (pattern.matcher(candidate).matches()) {
                patternCandidates.add(candidate);
            }
            if (StringUtils.getLevenshteinDistance(baseName.toUpperCase(), candidate.toUpperCase()) <= Math
                    .min(3, baseName.length() / 2)) {
                typoCandidates.add(candidate);
            }
        }
        if (patternCandidates.size() == 1) {
            String actualName = patternCandidates.iterator().next();
            matches.put(prefix + actualName, tasksByName.get(actualName));
            continue;
        }

        if (!patternCandidates.isEmpty()) {
            throw new TaskSelectionException(String.format("Task '%s' is ambiguous in %s. Candidates are: %s.",
                    baseName, defaultProject, GUtil.toString(patternCandidates)));
        }
        if (!typoCandidates.isEmpty()) {
            throw new TaskSelectionException(
                    String.format("Task '%s' not found in %s. Some candidates are: %s.", baseName,
                            defaultProject, GUtil.toString(typoCandidates)));
        }
        throw new TaskSelectionException(String.format("Task '%s' not found in %s.", baseName, defaultProject));
    }

    return matches;
}

From source file:org.gradle.model.internal.registry.ModelPathSuggestionProvider.java

public List<ModelPath> transform(final ModelPath unavailable) {
    Iterable<Suggestion> suggestions = Iterables.transform(availablePaths,
            new Function<ModelPath, Suggestion>() {
                public Suggestion apply(ModelPath available) {
                    int distance = StringUtils.getLevenshteinDistance(unavailable.toString(),
                            available.toString());
                    boolean suggest = distance <= Math.min(3, unavailable.toString().length() / 2);
                    if (suggest) {
                        return new Suggestion(distance, available);
                    } else {
                        // avoid excess creation of Suggestion objects
                        return null;
                    }//from  ww  w. jav  a2s .c  o  m
                }
            });

    suggestions = Iterables.filter(suggestions, REMOVE_NULLS);
    List<Suggestion> sortedSuggestions = CollectionUtils.sort(suggestions);
    return CollectionUtils.collect(sortedSuggestions, Suggestion.EXTRACT_PATH);
}

From source file:org.gradle.util.NameMatcher.java

/**
 * Locates the best match for the given pattern in the given set of candidate items.
 *
 * @return The match if exactly 1 match found, null if no matches or multiple matches.
 *//*from w  w w .  j  a v a2 s . c  o  m*/
public String find(String pattern, Collection<String> items) {
    this.pattern = pattern;
    matches.clear();
    candidates.clear();

    if (items.contains(pattern)) {
        matches.add(pattern);
        return pattern;
    }

    if (pattern.length() == 0) {
        return null;
    }

    Pattern camelCasePattern = getPatternForName(pattern);
    Pattern normalisedCamelCasePattern = Pattern.compile(camelCasePattern.pattern(), Pattern.CASE_INSENSITIVE);
    String normalisedPattern = pattern.toUpperCase();

    Set<String> caseInsensitiveMatches = new TreeSet<String>();
    Set<String> caseSensitiveCamelCaseMatches = new TreeSet<String>();
    Set<String> caseInsensitiveCamelCaseMatches = new TreeSet<String>();

    for (String candidate : items) {
        if (candidate.equalsIgnoreCase(pattern)) {
            caseInsensitiveMatches.add(candidate);
        }
        if (camelCasePattern.matcher(candidate).matches()) {
            caseSensitiveCamelCaseMatches.add(candidate);
            continue;
        }
        if (normalisedCamelCasePattern.matcher(candidate).lookingAt()) {
            caseInsensitiveCamelCaseMatches.add(candidate);
            continue;
        }
        if (StringUtils.getLevenshteinDistance(normalisedPattern, candidate.toUpperCase()) <= Math.min(3,
                pattern.length() / 2)) {
            candidates.add(candidate);
        }
    }

    if (!caseInsensitiveMatches.isEmpty()) {
        matches.addAll(caseInsensitiveMatches);
    } else if (!caseSensitiveCamelCaseMatches.isEmpty()) {
        matches.addAll(caseSensitiveCamelCaseMatches);
    } else {
        matches.addAll(caseInsensitiveCamelCaseMatches);
    }

    if (matches.size() == 1) {
        return matches.first();
    }

    return null;
}

From source file:org.kew.rmf.matchers.LevenshteinMatcher.java

public Integer calculateLevenshtein(String s1, String s2) {
    numExecutions++;
    return new Integer(StringUtils.getLevenshteinDistance(s1, s2));
}

From source file:org.openbravo.advpaymentmngt.utility.FIN_BankStatementImport.java

private String closest(ScrollableResults businessPartners, String partnername) {
    String targetBusinessPartnerId = "";
    try {/*from   www .ja v a2  s.  co m*/
        businessPartners.beforeFirst();
        businessPartners.next();
        Object[] resultObject = (Object[]) businessPartners.get(0);

        String targetBusinessPartnerName = "";
        if (resultObject.getClass().isArray()) {
            final Object[] values = resultObject;
            targetBusinessPartnerId = (String) values[0];
            targetBusinessPartnerName = (String) values[1];
        }

        int distance = StringUtils.getLevenshteinDistance(partnername, targetBusinessPartnerName);
        String parsedPartnername = partnername.toLowerCase();
        // Remove exceptions
        for (String eliminate : stringExceptions) {
            parsedPartnername = parsedPartnername.replaceAll(eliminate.toLowerCase(), "");
        }

        // Remove Numeric characters
        char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
        for (char character : digits) {
            parsedPartnername = parsedPartnername.replace(character, ' ');
            parsedPartnername = parsedPartnername.trim();
        }

        businessPartners.beforeFirst();
        int i = 0;
        while (businessPartners.next()) {
            i++;
            String bpId = "";
            String bpName = "";
            resultObject = (Object[]) businessPartners.get(0);
            if (resultObject.getClass().isArray()) {
                final Object[] values = resultObject;
                bpId = (String) values[0];
                bpName = (String) values[1];
            }
            // Calculates distance between two strings meaning number of changes required for a string
            // to
            // convert in another string
            int bpDistance = StringUtils.getLevenshteinDistance(parsedPartnername, bpName.toLowerCase());
            if (bpDistance < distance) {
                distance = bpDistance;
                targetBusinessPartnerId = bpId;
            }
            if (i % 100 == 0) {
                OBDal.getInstance().flush();
                OBDal.getInstance().getSession().clear();
            }
        }
        return targetBusinessPartnerId;
    } catch (Exception e) {
        log4j.error(e.getStackTrace());
    } finally {
        return targetBusinessPartnerId;
    }

}

From source file:org.openmrs.module.muzima.handler.JsonEncounterQueueDataHandler.java

private Patient findPatient(final List<Patient> patients, final Patient unsavedPatient) {
    String unsavedGivenName = unsavedPatient.getGivenName();
    String unsavedFamilyName = unsavedPatient.getFamilyName();
    PersonName unsavedPersonName = unsavedPatient.getPersonName();
    for (Patient patient : patients) {
        // match it using the person name and gender, what about the dob?
        PersonName savedPersonName = patient.getPersonName();
        if (StringUtils.isNotBlank(savedPersonName.getFullName())
                && StringUtils.isNotBlank(unsavedPersonName.getFullName())) {
            String savedGivenName = savedPersonName.getGivenName();
            int givenNameEditDistance = StringUtils.getLevenshteinDistance(
                    StringUtils.lowerCase(savedGivenName), StringUtils.lowerCase(unsavedGivenName));
            String savedFamilyName = savedPersonName.getFamilyName();
            int familyNameEditDistance = StringUtils.getLevenshteinDistance(
                    StringUtils.lowerCase(savedFamilyName), StringUtils.lowerCase(unsavedFamilyName));
            if (givenNameEditDistance < 3 && familyNameEditDistance < 3) {
                if (StringUtils.equalsIgnoreCase(patient.getGender(), unsavedPatient.getGender())) {
                    if (patient.getBirthdate() != null && unsavedPatient.getBirthdate() != null
                            && DateUtils.isSameDay(patient.getBirthdate(), unsavedPatient.getBirthdate())) {
                        return patient;
                    }//  www  .  jav  a 2s  .com
                }
            }
        }
    }
    return null;
}