List of usage examples for org.apache.commons.lang StringUtils getLevenshteinDistance
public static int getLevenshteinDistance(String s, String t)
Find the Levenshtein distance between two Strings.
From source file:com.iflytek.spider.metadata.SpellCheckedMetadata.java
/** * Get the normalized name of metadata attribute name. This method tries to * find a well-known metadata name (one of the metadata names defined in this * class) that matches the specified name. The matching is error tolerent. For * instance,//from w ww .j av a2s . c o m * <ul> * <li>content-type gives Content-Type</li> * <li>CoNtEntType gives Content-Type</li> * <li>ConTnTtYpe gives Content-Type</li> * </ul> * If no matching with a well-known metadata name is found, then the original * name is returned. * * @param name * Name to normalize * @return normalized name */ public static String getNormalizedName(final String name) { String searched = normalize(name); String value = NAMES_IDX.get(searched); if ((value == null) && (normalized != null)) { int threshold = searched.length() / TRESHOLD_DIVIDER; for (int i = 0; i < normalized.length && value == null; i++) { if (StringUtils.getLevenshteinDistance(searched, normalized[i]) < threshold) { value = NAMES_IDX.get(normalized[i]); } } } return (value != null) ? value : name; }
From source file:com.panet.imeta.core.row.ValueDataUtil.java
/**Levenshtein distance (LD) is a measure of the similarity between two strings, * which we will refer to as the source string (s) and the target string (t). * The distance is the number of deletions, insertions, or substitutions required to transform s into t. *//* w ww . jav a2 s.c o m*/ public static Long getLevenshtein_Distance(ValueMetaInterface metaA, Object dataA, ValueMetaInterface metaB, Object dataB) { if (dataA == null || dataB == null) return null; return new Long(StringUtils.getLevenshteinDistance(dataA.toString(), dataB.toString())); }
From source file:com.google.gerrit.server.util.MostSpecificComparator.java
private int distance(String pattern) { String example;//ww w . j av a 2 s . co m if (RefControl.isRE(pattern)) { example = RefControl.shortestExample(pattern); } else if (pattern.endsWith("/*")) { example = pattern.substring(0, pattern.length() - 1) + '1'; } else if (pattern.equals(refName)) { return 0; } else { return Math.max(pattern.length(), refName.length()); } return StringUtils.getLevenshteinDistance(example, refName); }
From source file:fr.ign.cogit.geoxygene.matching.dst.sources.text.LevenshteinDist.java
/** * Mesure de Levenshtein.// w ww .j ava 2 s.c o m * * @param s * @param t * @return int * as coefficient de similarit */ public double compute(String s, String t) { if (s == null || t == null) { throw new IllegalArgumentException("Strings must not be null"); } double l = StringUtils.getLevenshteinDistance(s.toLowerCase(), t.toLowerCase()); DST_LOGGER.info(" StringUtils.getLevenshteinDistance : " + l); // return 1.0 - l / Math.max (s.length(), t.length()); return l / Math.max(s.length(), t.length()); }
From source file:com.evanmclean.evlib.text.FuzzyLexicon.java
/** * Find the minimum difference between the word and the set of words in the * lexicon.// w w w.java 2 s .c om * * @param word * @param word_set * @return */ private int difference(final String word, final Set<String> word_set) { if ((word_set == null) || (word_set.size() <= 0)) return StringUtils.getLevenshteinDistance(word, ""); int diff = Integer.MAX_VALUE; for (String lex : word_set) diff = Math.min(diff, StringUtils.getLevenshteinDistance(word, lex)); return diff; }
From source file:edu.cmu.lti.oaqa.baseqa.answer.collective_score.scorers.EditDistanceCollectiveAnswerScorer.java
private double getDistance(String text1, String text2) { int distance = StringUtils.getLevenshteinDistance(text1, text2); return (double) distance / Math.max(text1.length(), text2.length()); }
From source file:com.mmounirou.spotirss.spotify.tracks.SpotifyHrefQuery.java
private int getLevenshteinDistance(XTracks xtrack, Track track) { int trackDistance = StringUtils.getLevenshteinDistance(xtrack.getCleanedTrackName(), track.getSong()); int artistDistance = getArtistDistance(track, xtrack); // System.out.println(" trackDistance = " + trackDistance + " artistDistance = " + artistDistance + " track = " + xtrack); return (trackDistance + 1) * (artistDistance + 1) + artistDistance; }
From source file:com.mmounirou.spotirss.spotify.tracks.SpotifyHrefQuery.java
private Integer getMinDistance(String strArtist, Set<String> artists) { Set<Integer> treeSet = new TreeSet<Integer>(); for (String artist : artists) { treeSet.add(StringUtils.getLevenshteinDistance(strArtist, artist)); }//w ww . j a v a 2 s.c o m return treeSet.iterator().next(); }
From source file:com.jroossien.boxx.util.Str.java
/** * Get the best matching value for the specified input out of the array of values. * This uses the levenshtein distance from {@link StringUtils} * If an exact match is found that match will be returned. * * @param input The input string to find a match for. * @param values Array of values to match with input string. * @return The best match from the specified values. (May be empty when there are no values or no match) */// w w w .ja v a2s . c om public static String bestMatch(String input, String... values) { String bestMatch = ""; int lowestDiff = input.length() - 1; for (String value : values) { int diff = StringUtils.getLevenshteinDistance(input, value); if (diff == 0) { return value; } if (diff < lowestDiff) { bestMatch = value; lowestDiff = diff; } } return bestMatch; }
From source file:com.ntua.cosmos.hackathonplanneraas.Planner.java
@Deprecated public JSONObject searchEventSolution(JSONObject obj) { StoredPaths pan = new StoredPaths(); JSONObject returned = new JSONObject(); long since = 0; long ts = 0;/*from w w w . j a v a 2 s .co m*/ ArrayList<String> names = new ArrayList<>(); ArrayList<String> values = new ArrayList<>(); String originalString = ""; double similarity = 0.0, current = 0.0; if (!obj.isEmpty()) { Set keys = obj.keySet(); Iterator iter = keys.iterator(); for (; iter.hasNext();) { String temporary = String.valueOf(iter.next()); if (temporary.startsWith("has")) names.add(temporary); } names.stream().forEach((name) -> { values.add(String.valueOf(obj.get(name))); }); } originalString = values.stream().map((value) -> value).reduce(originalString, String::concat); //Begin the initialisation process. OntModelSpec s = new OntModelSpec(PelletReasonerFactory.THE_SPEC); OntDocumentManager dm = OntDocumentManager.getInstance(); dm.setFileManager(FileManager.get()); s.setDocumentManager(dm); OntModel m = ModelFactory.createOntologyModel(s, null); InputStream in = FileManager.get().open(StoredPaths.casebasepath); if (in == null) { throw new IllegalArgumentException("File: " + StoredPaths.casebasepath + " not found"); } // read the file m.read(in, null); //begin building query string. String queryString = pan.prefixrdf + pan.prefixowl + pan.prefixxsd + pan.prefixrdfs + pan.prefixCasePath; queryString += "\nSELECT DISTINCT "; for (int i = 0; i < names.size(); i++) { queryString += "?param" + i + " "; } queryString += "?message ?handle ?URI WHERE {"; for (int i = 0; i < names.size(); i++) { queryString += "?event base:" + names.get(i) + " ?param" + i + " . "; } queryString += "?event base:isSolvedBy ?solution . ?solution base:exposesMessage ?message . ?solution base:eventHandledBy ?handle . ?solution base:URI ?URI}"; try { String testString = ""; Query query = QueryFactory.create(queryString); QueryExecution qe = QueryExecutionFactory.create(query, m); ResultSet results = qe.execSelect(); for (; results.hasNext();) { QuerySolution soln = results.nextSolution(); // Access variables: soln.get("x"); Literal lit; for (int i = 0; i < names.size(); i++) { lit = soln.getLiteral("param" + i);// Get a result variable by name. String temporary = String.valueOf(lit).substring(0, String.valueOf(lit).indexOf("^^")); testString += temporary; } String longer = testString, shorter = originalString; if (testString.length() < originalString.length()) { // longer should always have greater length longer = originalString; shorter = testString; } int longerLength = longer.length(); System.out.println("Similarity between:" + originalString + " and " + testString + " is:"); current = (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) / (double) longerLength; System.out.println(current + " out of 1.0."); if (similarity < current) { similarity = current; returned.clear(); returned.put("message", soln.getLiteral("message").getString()); returned.put("URI", soln.getLiteral("URI").getString()); returned.put("handle", soln.getLiteral("handle").getString()); } } } catch (Exception e) { System.out.println("Search is interrupted by an error."); } m.close(); return returned; }