/*
* Copyright (C) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.i18n.addressinput;
import com.android.i18n.addressinput.LookupKey.ScriptType;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* Accesses address verification data used to verify components of an address.
* <p> Not all fields require all types of validation, although this could be done. In particular,
* the current implementation only provides known value verification for the hierarchical fields,
* and only provides format and match verification for the postal code field.
*/
public class FieldVerifier {
// Node data values are delimited by this symbol.
private static final String DATA_DELIMITER = "~";
// Keys are built up using this delimiter: eg data/US, data/US/CA.
private static final String KEY_DELIMITER = "/";
private String mId;
private DataSource mDataSource;
private Set<AddressField> mPossibleFields;
private Set<AddressField> mRequired;
// Known values. Can be either a key, a name in Latin, or a name in native script.
private Map<String, String> mCandidateValues;
// Keys for the subnodes of this verifier. For example, a key for the US would be CA, since
// there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the
// locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations
// exist.
private String[] mKeys;
// Names in Latin. These are only populated if the native/local names are in a script other than
// latin.
private String[] mLatinNames;
// Names in native script.
private String[] mLocalNames;
// Pattern representing the format of a postal code number.
private Pattern mFormat;
// Defines the valid range of a postal code number.
private Pattern mMatch;
/**
* Creates the root field verifier for a particular data source.
*/
public FieldVerifier(DataSource dataSource) {
mDataSource = dataSource;
populateRootVerifier();
}
/**
* Creates a field verifier based on its parent and on the new data for this node supplied by
* nodeData (which may be null).
*/
private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) {
// Most information is inherited from the parent.
mPossibleFields = parent.mPossibleFields;
mRequired = parent.mRequired;
mDataSource = parent.mDataSource;
mFormat = parent.mFormat;
mMatch = parent.mMatch;
// Here we add in any overrides from this particular node as well as information such as
// localNames, latinNames and keys.
populate(nodeData);
// candidateValues should never be inherited from the parent, but built up from the
// localNames in this node.
mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames);
}
/**
* Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This
* is a little messy at the moment since not all the appropriate information is actually under
* the root "data" node in the metadata. For example, "possibleFields" and "required" are not
* present there.
*/
private void populateRootVerifier() {
mId = "data";
// Keys come from the countries under "data".
AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data");
if (rootNode.containsKey(AddressDataKey.COUNTRIES)) {
mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER);
}
// candidateValues is just the set of keys.
mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null);
// Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping.
// TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this
// information instead.
AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ");
mPossibleFields = new HashSet<AddressField>();
if (defaultZZ.containsKey(AddressDataKey.FMT)) {
mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT));
}
mRequired = new HashSet<AddressField>();
if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) {
mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE));
}
}
/**
* Populates this verifier with data from the node data passed in. This may be null.
*/
private void populate(AddressVerificationNodeData nodeData) {
if (nodeData == null) {
return;
}
if (nodeData.containsKey(AddressDataKey.ID)) {
mId = nodeData.get(AddressDataKey.ID);
}
if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) {
mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER);
}
if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) {
mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER);
}
if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) {
mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER);
}
if (nodeData.containsKey(AddressDataKey.FMT)) {
mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT));
}
if (nodeData.containsKey(AddressDataKey.REQUIRE)) {
mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE));
}
if (nodeData.containsKey(AddressDataKey.XZIP)) {
mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE);
}
if (nodeData.containsKey(AddressDataKey.ZIP)) {
// This key has two different meanings, depending on whether this is a country-level key
// or not.
if (isCountryKey()) {
mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
Pattern.CASE_INSENSITIVE);
} else {
mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
Pattern.CASE_INSENSITIVE);
}
}
// If there are latin names but no local names, and there are the same number of latin names
// as there are keys, then we assume the local names are the same as the keys.
if (mKeys != null && mLocalNames == null && mLatinNames != null &&
mKeys.length == mLatinNames.length) {
mLocalNames = mKeys;
}
}
FieldVerifier refineVerifier(String sublevel) {
if (Util.trimToNull(sublevel) == null) {
return new FieldVerifier(this, null);
}
// If the parent node didn't exist, then the subLevelName will start with "null".
String subLevelName = mId + KEY_DELIMITER + sublevel;
// For names with no Latin equivalent, we can look up the sublevel name directly.
AddressVerificationNodeData nodeData = mDataSource.get(subLevelName);
if (nodeData != null) {
return new FieldVerifier(this, nodeData);
}
// If that failed, then we try to look up the local name equivalent of this latin name.
// First check these exist.
if (mLatinNames == null) {
return new FieldVerifier(this, null);
}
for (int n = 0; n < mLatinNames.length; n++) {
if (mLatinNames[n].equalsIgnoreCase(sublevel)) {
// We found a match - we should try looking up a key with the local name at the same
// index.
subLevelName = mId + KEY_DELIMITER + mLocalNames[n];
nodeData = mDataSource.get(subLevelName);
if (nodeData != null) {
return new FieldVerifier(this, nodeData);
}
}
}
// No sub-verifiers were found.
return new FieldVerifier(this, null);
}
/**
* Returns the ID of this verifier.
*/
public String toString() {
return mId;
}
/**
* Checks a value in a particular script for a particular field to see if it causes the problem
* specified. If so, this problem is added to the AddressProblems collection passed in. Returns
* true if no problem was found.
*/
protected boolean check(ScriptType script, AddressProblemType problem, AddressField field,
String value, AddressProblems problems) {
boolean problemFound = false;
String trimmedValue = Util.trimToNull(value);
switch (problem) {
case USING_UNUSED_FIELD:
if (trimmedValue != null && !mPossibleFields.contains(field)) {
problemFound = true;
}
break;
case MISSING_REQUIRED_FIELD:
if (mRequired.contains(field) && trimmedValue == null) {
problemFound = true;
}
break;
case UNKNOWN_VALUE:
// An empty string will never be an UNKNOWN_VALUE. It is invalid
// only when it appears in a required field (In that case it will
// be reported as MISSING_REQUIRED_FIELD).
if (trimmedValue == null) {
break;
}
problemFound = !isKnownInScript(script, trimmedValue);
break;
case UNRECOGNIZED_FORMAT:
if (trimmedValue != null && mFormat != null &&
!mFormat.matcher(trimmedValue).matches()) {
problemFound = true;
}
break;
case MISMATCHING_VALUE:
if (trimmedValue != null && mMatch != null &&
!mMatch.matcher(trimmedValue).lookingAt()) {
problemFound = true;
}
break;
default:
throw new RuntimeException("Unknown problem: " + problem);
}
if (problemFound) {
problems.add(field, problem);
}
return !problemFound;
}
/**
* Checks the value of a particular field in a particular script against the known values for
* this field. If script is null, it checks both the local and the latin values. Otherwise it
* checks only the values in the script specified.
*/
private boolean isKnownInScript(ScriptType script, String value) {
String trimmedValue = Util.trimToNull(value);
Util.checkNotNull(trimmedValue);
if (script == null) {
return (mCandidateValues == null ||
mCandidateValues.containsKey(trimmedValue.toLowerCase()));
}
// Otherwise, if we know the script, we want to restrict the candidates to only names in
// that script.
String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames;
Set<String> candidates = new HashSet<String>();
if (namesToConsider != null) {
for (String name : namesToConsider) {
candidates.add(name.toLowerCase());
}
}
if (mKeys != null) {
for (String name : mKeys) {
candidates.add(name.toLowerCase());
}
}
if (candidates.size() == 0 || trimmedValue == null) {
return true;
}
return candidates.contains(value.toLowerCase());
}
/**
* Parses the value of the "fmt" key in the data to see which fields are used for a particular
* country. Returns a list of all fields found. Country is always assumed to be present. Skips
* characters that indicate new-lines in the format information, as well as any characters not
* escaped with "%".
*/
private static Set<AddressField> parseAddressFields(String value) {
EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
boolean escaped = false;
for (char c : value.toCharArray()) {
if (escaped) {
escaped = false;
if (c == 'n') {
continue;
}
AddressField f = AddressField.of(c);
if (f == null) {
throw new RuntimeException(
"Unrecognized character '" + c + "' in format pattern: " + value);
}
result.add(f);
} else if (c == '%') {
escaped = true;
}
}
// These fields are not mentioned in the metadata at the moment since there is an effort to
// move away from STREET_ADDRESS and use these fields instead. This means they have to be
// removed here.
result.remove(AddressField.ADDRESS_LINE_1);
result.remove(AddressField.ADDRESS_LINE_2);
return result;
}
/**
* Parses the value of the "required" key in the data. Adds country as well as any other field
* mentioned in the string.
*/
private static Set<AddressField> parseRequireString(String value) {
// Country is always required
EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
for (char c : value.toCharArray()) {
AddressField f = AddressField.of(c);
if (f == null) {
throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: "
+ value);
}
result.add(f);
}
// These fields are not mentioned in the metadata at the moment since there is an effort to
// move away from STREET_ADDRESS and use these fields instead. This means they have to be
// removed here.
result.remove(AddressField.ADDRESS_LINE_1);
result.remove(AddressField.ADDRESS_LINE_2);
return result;
}
/**
* Returns true if this key represents a country. We assume all keys with only one delimiter are
* at the country level (such as "data/US").
*/
private boolean isCountryKey() {
Util.checkNotNull(mId, "Cannot use null as key");
return mId.split(KEY_DELIMITER).length == 2;
}
}
|