Example usage for opennlp.tools.util.featuregen StringPattern recognize

List of usage examples for opennlp.tools.util.featuregen StringPattern recognize

Introduction

In this page you can find the example usage for opennlp.tools.util.featuregen StringPattern recognize.

Prototype

public static StringPattern recognize(String token) 

Source Link

Usage

From source file:com.civis.utils.opennlp.features.AddressFeature.java

private Boolean checkWord(String token) {
    return StringUtils.isNotBlank(token) && StringPattern.recognize(token).isInitialCapitalLetter()
            && STREET_PATTERN.matcher(token).matches();
}

From source file:com.civis.utils.opennlp.features.AddressFeature.java

private Boolean checkCountry(String[] tokens, int maybeCountryIndex) {
    if (maybeCountryIndex < tokens.length) {
        String maybeCountryValue = tokens[maybeCountryIndex];
        if (StringUtils.isNotBlank(maybeCountryValue)) {
            StringPattern stringPattern = StringPattern.recognize(maybeCountryValue);
            return stringPattern.isInitialCapitalLetter()
                    && STREET_PATTERN.matcher(maybeCountryValue).matches();
        }/* w  w  w .  ja  v  a  2s. co m*/
    }

    return false;
}

From source file:com.civis.utils.opennlp.features.AddressFeature.java

private Boolean checkDigit(String token) {
    return StringUtils.isNotBlank(token) && StringPattern.recognize(token).isAllDigit();
}

From source file:com.civis.utils.opennlp.models.address.AddressFinderMe.java

private List<AddressSpan> tryToFindAddressByZip(String[] tokens) {
    tokens = removeAllSpecialChars(tokens);
    AddressSpan addressSpan = new AddressSpan(FindType.PATTERN);
    Set<String> zipSet = extractZips();
    String zip = findSetValueInToken(tokens, zipSet);
    addressSpan.setZip(zip);/*from w w w  . j  av a 2s .com*/
    addressSpan.setCountry(Constants.DEFAULT_COUNTRY);
    if (StringUtils.isNotBlank(zip)) {
        zipSet.clear();
        Set<String> citySet = extractCitiesByZip(zip);
        String city = findSetValueInToken(tokens, citySet);
        addressSpan.setCity(city);
        if (StringUtils.isNotBlank(city)) {
            int zipIndex = tokenAt(tokens, zip);
            int streetNumberIndex = zipIndex - 1;
            int maxLoopSize = 3;
            int streetIndex = -1;
            // try to find street number
            for (int i = streetNumberIndex; i > -1; i--) {
                // start on zipIndex - 1 and loop back
                if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) {
                    addressSpan.setStreetNumber(tokens[i]);
                    streetIndex = i - 1;
                    break;
                }

                maxLoopSize--;
                //3 loops from zip index and street number is not found, than break
                if (maxLoopSize <= 0) {
                    return Collections.emptyList();
                }
            }

            // try to find street
            maxLoopSize = 3;
            for (int i = streetIndex; i > -1; i--) {
                // start on streetIndex - 1 and loop back
                StringPattern stringPattern = StringPattern.recognize(tokens[streetIndex]);
                if (stringPattern.isInitialCapitalLetter()) {
                    addressSpan.setStreet(tokens[streetIndex]);
                    if (addressSpan.isValid()) {
                        return Collections.singletonList(addressSpan);
                    }
                }

                maxLoopSize--;

                //3 loops from street number index and street is not found, than break
                if (maxLoopSize <= 0) {
                    return Collections.emptyList();
                }
            }
        }
    }

    return Collections.emptyList();
}