List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(String s, String t)
Find the Levenshtein distance between two Strings.
From source file:org.codesecure.dependencycheck.utils.SSDeep.java
static int score_strings(char[] s1, char[] s2, int block_size) { int score = 0; int len1, len2; len1 = s1.length;/*from w w w . j av a2 s. c om*/ len2 = s2.length; if (len1 > SPAMSUM_LENGTH || len2 > SPAMSUM_LENGTH) { /* not a real spamsum signature? */ return 0; } /* the two strings must have a common substring of length ROLLING_WINDOW to be candidates */ if (has_common_substring(s1, s2) == 0) { return 0; } /* compute the edit distance between the two strings. The edit distance gives us a pretty good idea of how closely related the two strings are */ score = StringUtils.getLevenshteinDistance(new String(s1), new String(s2)); /* scale the edit distance by the lengths of the two strings. This changes the score to be a measure of the proportion of the message that has changed rather than an absolute quantity. It also copes with the variability of the string lengths. */ score = (score * SPAMSUM_LENGTH) / (len1 + len2); /* at this stage the score occurs roughly on a 0-64 scale, * with 0 being a good match and 64 being a complete * mismatch */ /* rescale to a 0-100 scale (friendlier to humans) */ score = (100 * score) / 64; /* it is possible to get a score above 100 here, but it is a really terrible match */ if (score >= 100) return 0; /* now re-scale on a 0-100 scale with 0 being a poor match and 100 being a excellent match. */ score = 100 - score; // printf ("len1: %"PRIu32" len2: %"PRIu32"\n", len1, len2); /* when the blocksize is small we don't want to exaggerate the match size */ if (score > block_size / MIN_BLOCKSIZE * Math.min(len1, len2)) { score = block_size / MIN_BLOCKSIZE * Math.min(len1, len2); } return score; }
From source file:org.eclipse.jubula.rc.common.components.FindComponentBP.java
/** * computes the Equivalence of 2 names<p> * Example :<p>// w w w .j a va 2 s . c o m * jButton1 <=> jButton1 = 100.0<p> * jButton1 <=> jButton2 = 87.5<p> * jButton1 <=> jTextField1 = 20.0 <p> * jButton1 <=> jTextField2 = 10.0 <p> * @param name1 String * @param name2 String * @return percentage as double */ private double getNameEquivalence(String name1, String name2) { int diff = StringUtils.getLevenshteinDistance(name1, name2); double nameEquivalence = 1.0 / Math.max(name1.length(), name2.length()) * (Math.max(name1.length(), name2.length()) - diff); return nameEquivalence; }
From source file:org.eclipse.smila.connectivity.framework.crawler.web.metadata.Metadata.java
/** * Get the normalized name of meta data attribute name. This method tries to find a well-known meta data name (one of * the meta data names defined in this class) that matches the specified name. The matching is error tolerent. For * instance,/*from w w w .j a va2 s. c om*/ * <ul> * <li>content-type gives Content-Type</li> * <li>CoNtEntType gives Content-Type</li> * <li>ConTnTtYpe gives Content-Type</li> * </ul> * If no matching with a well-known meta data name is found, then the original name is returned. * * @param name * Meta data attribute name. * @return String */ public static String getNormalizedName(final String name) { final String searched = normalize(name); String value = s_namesIdx.get(searched); if ((value == null) && (s_normalized != null)) { final int threshold = searched.length() / THRESHOLD_FACTOR; for (int i = 0; i < s_normalized.length && value == null; i++) { if (StringUtils.getLevenshteinDistance(searched, s_normalized[i]) < threshold) { value = s_namesIdx.get(s_normalized[i]); } } } if (value != null) { return value; } else { return name; } }
From source file:org.eclipse.xtext.xbase.ui.quickfix.JavaTypeQuickfixes.java
protected boolean isSimilarTypeName(String s0, String s1) { double levenshteinDistance = StringUtils.getLevenshteinDistance(s0, s1); return levenshteinDistance <= 3; }
From source file:org.gradle.execution.TaskNameResolvingBuildExecuter.java
private static Map<String, Collection<Task>> doSelect(GradleInternal gradle, Iterable<String> paths) { Project defaultProject = gradle.getDefaultProject(); Map<String, Collection<Task>> allProjectsTasksByName = null; Map<String, Collection<Task>> matches = new LinkedHashMap<String, Collection<Task>>(); for (String path : paths) { Map<String, Collection<Task>> tasksByName; String baseName;//from w ww .j av a 2 s.c o m String prefix; if (path.contains(Project.PATH_SEPARATOR)) { prefix = StringUtils.substringBeforeLast(path, Project.PATH_SEPARATOR); prefix = prefix.length() == 0 ? Project.PATH_SEPARATOR : prefix; Project project = defaultProject.findProject(prefix); if (project == null) { throw new TaskSelectionException( String.format("Project '%s' not found in %s.", prefix, defaultProject)); } baseName = StringUtils.substringAfterLast(path, Project.PATH_SEPARATOR); Task match = project.getTasks().findByName(baseName); if (match != null) { matches.put(path, Collections.singleton(match)); continue; } tasksByName = new HashMap<String, Collection<Task>>(); for (Task task : project.getTasks().getAll()) { tasksByName.put(task.getName(), Collections.singleton(task)); } prefix = prefix + Project.PATH_SEPARATOR; } else { Set<Task> tasks = defaultProject.getTasksByName(path, true); if (!tasks.isEmpty()) { matches.put(path, tasks); continue; } if (allProjectsTasksByName == null) { allProjectsTasksByName = buildTaskMap(defaultProject); } tasksByName = allProjectsTasksByName; baseName = path; prefix = ""; } Pattern pattern = getPatternForName(baseName); Set<String> patternCandidates = new TreeSet<String>(); Set<String> typoCandidates = new TreeSet<String>(); for (String candidate : tasksByName.keySet()) { if (pattern.matcher(candidate).matches()) { patternCandidates.add(candidate); } if (StringUtils.getLevenshteinDistance(baseName.toUpperCase(), candidate.toUpperCase()) <= Math .min(3, baseName.length() / 2)) { typoCandidates.add(candidate); } } if (patternCandidates.size() == 1) { String actualName = patternCandidates.iterator().next(); matches.put(prefix + actualName, tasksByName.get(actualName)); continue; } if (!patternCandidates.isEmpty()) { throw new TaskSelectionException(String.format("Task '%s' is ambiguous in %s. Candidates are: %s.", baseName, defaultProject, GUtil.toString(patternCandidates))); } if (!typoCandidates.isEmpty()) { throw new TaskSelectionException( String.format("Task '%s' not found in %s. Some candidates are: %s.", baseName, defaultProject, GUtil.toString(typoCandidates))); } throw new TaskSelectionException(String.format("Task '%s' not found in %s.", baseName, defaultProject)); } return matches; }
From source file:org.gradle.model.internal.registry.ModelPathSuggestionProvider.java
public List<ModelPath> transform(final ModelPath unavailable) { Iterable<Suggestion> suggestions = Iterables.transform(availablePaths, new Function<ModelPath, Suggestion>() { public Suggestion apply(ModelPath available) { int distance = StringUtils.getLevenshteinDistance(unavailable.toString(), available.toString()); boolean suggest = distance <= Math.min(3, unavailable.toString().length() / 2); if (suggest) { return new Suggestion(distance, available); } else { // avoid excess creation of Suggestion objects return null; }//from ww w. jav a2s .c o m } }); suggestions = Iterables.filter(suggestions, REMOVE_NULLS); List<Suggestion> sortedSuggestions = CollectionUtils.sort(suggestions); return CollectionUtils.collect(sortedSuggestions, Suggestion.EXTRACT_PATH); }
From source file:org.gradle.util.NameMatcher.java
/** * Locates the best match for the given pattern in the given set of candidate items. * * @return The match if exactly 1 match found, null if no matches or multiple matches. *//*from w w w . j a v a2 s . c o m*/ public String find(String pattern, Collection<String> items) { this.pattern = pattern; matches.clear(); candidates.clear(); if (items.contains(pattern)) { matches.add(pattern); return pattern; } if (pattern.length() == 0) { return null; } Pattern camelCasePattern = getPatternForName(pattern); Pattern normalisedCamelCasePattern = Pattern.compile(camelCasePattern.pattern(), Pattern.CASE_INSENSITIVE); String normalisedPattern = pattern.toUpperCase(); Set<String> caseInsensitiveMatches = new TreeSet<String>(); Set<String> caseSensitiveCamelCaseMatches = new TreeSet<String>(); Set<String> caseInsensitiveCamelCaseMatches = new TreeSet<String>(); for (String candidate : items) { if (candidate.equalsIgnoreCase(pattern)) { caseInsensitiveMatches.add(candidate); } if (camelCasePattern.matcher(candidate).matches()) { caseSensitiveCamelCaseMatches.add(candidate); continue; } if (normalisedCamelCasePattern.matcher(candidate).lookingAt()) { caseInsensitiveCamelCaseMatches.add(candidate); continue; } if (StringUtils.getLevenshteinDistance(normalisedPattern, candidate.toUpperCase()) <= Math.min(3, pattern.length() / 2)) { candidates.add(candidate); } } if (!caseInsensitiveMatches.isEmpty()) { matches.addAll(caseInsensitiveMatches); } else if (!caseSensitiveCamelCaseMatches.isEmpty()) { matches.addAll(caseSensitiveCamelCaseMatches); } else { matches.addAll(caseInsensitiveCamelCaseMatches); } if (matches.size() == 1) { return matches.first(); } return null; }
From source file:org.kew.rmf.matchers.LevenshteinMatcher.java
public Integer calculateLevenshtein(String s1, String s2) { numExecutions++; return new Integer(StringUtils.getLevenshteinDistance(s1, s2)); }
From source file:org.openbravo.advpaymentmngt.utility.FIN_BankStatementImport.java
private String closest(ScrollableResults businessPartners, String partnername) { String targetBusinessPartnerId = ""; try {/*from www .ja v a2 s. co m*/ businessPartners.beforeFirst(); businessPartners.next(); Object[] resultObject = (Object[]) businessPartners.get(0); String targetBusinessPartnerName = ""; if (resultObject.getClass().isArray()) { final Object[] values = resultObject; targetBusinessPartnerId = (String) values[0]; targetBusinessPartnerName = (String) values[1]; } int distance = StringUtils.getLevenshteinDistance(partnername, targetBusinessPartnerName); String parsedPartnername = partnername.toLowerCase(); // Remove exceptions for (String eliminate : stringExceptions) { parsedPartnername = parsedPartnername.replaceAll(eliminate.toLowerCase(), ""); } // Remove Numeric characters char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; for (char character : digits) { parsedPartnername = parsedPartnername.replace(character, ' '); parsedPartnername = parsedPartnername.trim(); } businessPartners.beforeFirst(); int i = 0; while (businessPartners.next()) { i++; String bpId = ""; String bpName = ""; resultObject = (Object[]) businessPartners.get(0); if (resultObject.getClass().isArray()) { final Object[] values = resultObject; bpId = (String) values[0]; bpName = (String) values[1]; } // Calculates distance between two strings meaning number of changes required for a string // to // convert in another string int bpDistance = StringUtils.getLevenshteinDistance(parsedPartnername, bpName.toLowerCase()); if (bpDistance < distance) { distance = bpDistance; targetBusinessPartnerId = bpId; } if (i % 100 == 0) { OBDal.getInstance().flush(); OBDal.getInstance().getSession().clear(); } } return targetBusinessPartnerId; } catch (Exception e) { log4j.error(e.getStackTrace()); } finally { return targetBusinessPartnerId; } }
From source file:org.openmrs.module.muzima.handler.JsonEncounterQueueDataHandler.java
private Patient findPatient(final List<Patient> patients, final Patient unsavedPatient) { String unsavedGivenName = unsavedPatient.getGivenName(); String unsavedFamilyName = unsavedPatient.getFamilyName(); PersonName unsavedPersonName = unsavedPatient.getPersonName(); for (Patient patient : patients) { // match it using the person name and gender, what about the dob? PersonName savedPersonName = patient.getPersonName(); if (StringUtils.isNotBlank(savedPersonName.getFullName()) && StringUtils.isNotBlank(unsavedPersonName.getFullName())) { String savedGivenName = savedPersonName.getGivenName(); int givenNameEditDistance = StringUtils.getLevenshteinDistance( StringUtils.lowerCase(savedGivenName), StringUtils.lowerCase(unsavedGivenName)); String savedFamilyName = savedPersonName.getFamilyName(); int familyNameEditDistance = StringUtils.getLevenshteinDistance( StringUtils.lowerCase(savedFamilyName), StringUtils.lowerCase(unsavedFamilyName)); if (givenNameEditDistance < 3 && familyNameEditDistance < 3) { if (StringUtils.equalsIgnoreCase(patient.getGender(), unsavedPatient.getGender())) { if (patient.getBirthdate() != null && unsavedPatient.getBirthdate() != null && DateUtils.isSameDay(patient.getBirthdate(), unsavedPatient.getBirthdate())) { return patient; }// www . jav a 2s .com } } } } return null; }