List of usage examples for opennlp.tools.util.featuregen StringPattern recognize
public static StringPattern recognize(String token)
From source file:com.civis.utils.opennlp.features.AddressFeature.java
private Boolean checkWord(String token) { return StringUtils.isNotBlank(token) && StringPattern.recognize(token).isInitialCapitalLetter() && STREET_PATTERN.matcher(token).matches(); }
From source file:com.civis.utils.opennlp.features.AddressFeature.java
private Boolean checkCountry(String[] tokens, int maybeCountryIndex) { if (maybeCountryIndex < tokens.length) { String maybeCountryValue = tokens[maybeCountryIndex]; if (StringUtils.isNotBlank(maybeCountryValue)) { StringPattern stringPattern = StringPattern.recognize(maybeCountryValue); return stringPattern.isInitialCapitalLetter() && STREET_PATTERN.matcher(maybeCountryValue).matches(); }/* w w w . ja v a 2s. co m*/ } return false; }
From source file:com.civis.utils.opennlp.features.AddressFeature.java
private Boolean checkDigit(String token) { return StringUtils.isNotBlank(token) && StringPattern.recognize(token).isAllDigit(); }
From source file:com.civis.utils.opennlp.models.address.AddressFinderMe.java
private List<AddressSpan> tryToFindAddressByZip(String[] tokens) { tokens = removeAllSpecialChars(tokens); AddressSpan addressSpan = new AddressSpan(FindType.PATTERN); Set<String> zipSet = extractZips(); String zip = findSetValueInToken(tokens, zipSet); addressSpan.setZip(zip);/*from w w w . j av a 2s .com*/ addressSpan.setCountry(Constants.DEFAULT_COUNTRY); if (StringUtils.isNotBlank(zip)) { zipSet.clear(); Set<String> citySet = extractCitiesByZip(zip); String city = findSetValueInToken(tokens, citySet); addressSpan.setCity(city); if (StringUtils.isNotBlank(city)) { int zipIndex = tokenAt(tokens, zip); int streetNumberIndex = zipIndex - 1; int maxLoopSize = 3; int streetIndex = -1; // try to find street number for (int i = streetNumberIndex; i > -1; i--) { // start on zipIndex - 1 and loop back if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) { addressSpan.setStreetNumber(tokens[i]); streetIndex = i - 1; break; } maxLoopSize--; //3 loops from zip index and street number is not found, than break if (maxLoopSize <= 0) { return Collections.emptyList(); } } // try to find street maxLoopSize = 3; for (int i = streetIndex; i > -1; i--) { // start on streetIndex - 1 and loop back StringPattern stringPattern = StringPattern.recognize(tokens[streetIndex]); if (stringPattern.isInitialCapitalLetter()) { addressSpan.setStreet(tokens[streetIndex]); if (addressSpan.isValid()) { return Collections.singletonList(addressSpan); } } maxLoopSize--; //3 loops from street number index and street is not found, than break if (maxLoopSize <= 0) { return Collections.emptyList(); } } } } return Collections.emptyList(); }