List of usage examples for opennlp.tools.util.featuregen StringPattern isInitialCapitalLetter
public boolean isInitialCapitalLetter()
From source file:com.civis.utils.opennlp.features.AddressFeature.java
private Boolean checkCountry(String[] tokens, int maybeCountryIndex) { if (maybeCountryIndex < tokens.length) { String maybeCountryValue = tokens[maybeCountryIndex]; if (StringUtils.isNotBlank(maybeCountryValue)) { StringPattern stringPattern = StringPattern.recognize(maybeCountryValue); return stringPattern.isInitialCapitalLetter() && STREET_PATTERN.matcher(maybeCountryValue).matches(); }/*from www. ja v a2 s. co m*/ } return false; }
From source file:com.civis.utils.opennlp.models.address.AddressFinderMe.java
private List<AddressSpan> tryToFindAddressByZip(String[] tokens) { tokens = removeAllSpecialChars(tokens); AddressSpan addressSpan = new AddressSpan(FindType.PATTERN); Set<String> zipSet = extractZips(); String zip = findSetValueInToken(tokens, zipSet); addressSpan.setZip(zip);/*from w ww . j av a 2 s . c o m*/ addressSpan.setCountry(Constants.DEFAULT_COUNTRY); if (StringUtils.isNotBlank(zip)) { zipSet.clear(); Set<String> citySet = extractCitiesByZip(zip); String city = findSetValueInToken(tokens, citySet); addressSpan.setCity(city); if (StringUtils.isNotBlank(city)) { int zipIndex = tokenAt(tokens, zip); int streetNumberIndex = zipIndex - 1; int maxLoopSize = 3; int streetIndex = -1; // try to find street number for (int i = streetNumberIndex; i > -1; i--) { // start on zipIndex - 1 and loop back if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) { addressSpan.setStreetNumber(tokens[i]); streetIndex = i - 1; break; } maxLoopSize--; //3 loops from zip index and street number is not found, than break if (maxLoopSize <= 0) { return Collections.emptyList(); } } // try to find street maxLoopSize = 3; for (int i = streetIndex; i > -1; i--) { // start on streetIndex - 1 and loop back StringPattern stringPattern = StringPattern.recognize(tokens[streetIndex]); if (stringPattern.isInitialCapitalLetter()) { addressSpan.setStreet(tokens[streetIndex]); if (addressSpan.isValid()) { return Collections.singletonList(addressSpan); } } maxLoopSize--; //3 loops from street number index and street is not found, than break if (maxLoopSize <= 0) { return Collections.emptyList(); } } } } return Collections.emptyList(); }