Java tutorial
// ============================================================================ // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // https://github.com/Talend/data-prep/blob/master/LICENSE // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataprep.api.filter; import static org.apache.commons.lang.StringUtils.isEmpty; import static org.talend.dataprep.util.NumericHelper.isBigDecimal; import java.text.ParseException; import java.time.DateTimeException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Iterator; import java.util.List; import java.util.function.Predicate; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.math.NumberUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.talend.daikon.exception.TalendRuntimeException; import org.talend.daikon.number.BigDecimalParser; import org.talend.dataprep.BaseErrorCodes; import org.talend.dataprep.api.dataset.ColumnMetadata; import org.talend.dataprep.api.dataset.RowMetadata; import org.talend.dataprep.api.dataset.row.DataSetRow; import org.talend.dataprep.api.type.Type; import org.talend.dataprep.date.DateManipulator; import org.talend.dataprep.quality.AnalyzerService; import org.talend.dataprep.transformation.actions.Providers; import org.talend.dataprep.transformation.actions.date.DateParser; import org.talend.dataprep.util.NumericHelper; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; public class SimpleFilterService implements FilterService { private static final String EQ = "eq"; private static final String GT = "gt"; private static final String LT = "lt"; private static final String GTE = "gte"; private static final String LTE = "lte"; private static final String CONTAINS = "contains"; private static final String MATCHES = "matches"; private static final String INVALID = "invalid"; private static final String VALID = "valid"; private static final String EMPTY = "empty"; private static final String RANGE = "range"; private static final String AND = "and"; private static final String OR = "or"; private static final String NOT = "not"; private static final Logger LOGGER = LoggerFactory.getLogger(SimpleFilterService.class); private final DateManipulator dateManipulator = new DateManipulator(); private DateParser dateParser; private static Predicate<DataSetRow> safeDate(Predicate<DataSetRow> inner) { return r -> { try { return inner.test(r); } catch (DateTimeException e) { // thrown by DateParser LOGGER.debug("Unable to parse date.", e); return false; } }; } @Override public Predicate<DataSetRow> build(String filterAsString, RowMetadata rowMetadata) { if (isEmpty(filterAsString)) { return r -> true; } try { ObjectMapper mapper = new ObjectMapper(); final JsonNode root = mapper.reader().readTree(filterAsString); final Iterator<JsonNode> elements = root.elements(); if (!elements.hasNext()) { throw new IllegalArgumentException("Malformed filter: " + filterAsString); } else { return buildFilter(root, rowMetadata); } } catch (Exception e) { throw new TalendRuntimeException(BaseErrorCodes.UNABLE_TO_PARSE_FILTER, e); } } private Predicate<DataSetRow> buildFilter(JsonNode currentNode, RowMetadata rowMetadata) { final Iterator<JsonNode> children = currentNode.elements(); final JsonNode operationContent = children.next(); final String columnId = operationContent.has("field") ? operationContent.get("field").asText() : null; final String value = operationContent.has("value") ? operationContent.get("value").asText() : null; final Iterator<String> propertiesIterator = currentNode.fieldNames(); if (!propertiesIterator.hasNext()) { throw new UnsupportedOperationException( "Unsupported query, empty filter definition: " + currentNode.toString()); } final String operation = propertiesIterator.next(); if (columnId == null && allowFullFilter(operation)) { // Full data set filter (no column) final List<ColumnMetadata> columns = rowMetadata.getColumns(); Predicate<DataSetRow> predicate = null; if (!columns.isEmpty()) { predicate = buildOperationFilter(currentNode, rowMetadata, columns.get(0).getId(), operation, value); for (int i = 1; i < columns.size(); i++) { predicate = predicate.or(buildOperationFilter(currentNode, rowMetadata, columns.get(i).getId(), operation, value)); } } return predicate; } else { return buildOperationFilter(currentNode, rowMetadata, columnId, operation, value); } } private static boolean allowFullFilter(String operation) { switch (operation) { case EQ: case GT: case LT: case GTE: case LTE: case CONTAINS: case MATCHES: case INVALID: case VALID: case EMPTY: case RANGE: return true; case AND: case OR: case NOT: default: return false; } } private Predicate<DataSetRow> buildOperationFilter(JsonNode currentNode, // RowMetadata rowMetadata, // String columnId, // String operation, // String value) { switch (operation) { case EQ: return createEqualsPredicate(currentNode, columnId, value); case GT: return createGreaterThanPredicate(currentNode, columnId, value); case LT: return createLowerThanPredicate(currentNode, columnId, value); case GTE: return createGreaterOrEqualsPredicate(currentNode, columnId, value); case LTE: return createLowerOrEqualsPredicate(currentNode, columnId, value); case CONTAINS: return createContainsPredicate(currentNode, columnId, value); case MATCHES: return createMatchesPredicate(currentNode, columnId, value); case INVALID: return createInvalidPredicate(columnId); case VALID: return createValidPredicate(columnId); case EMPTY: return createEmptyPredicate(columnId); case RANGE: return createRangePredicate(columnId, currentNode.elements().next(), rowMetadata); case AND: return createAndPredicate(currentNode.elements().next(), rowMetadata); case OR: return createOrPredicate(currentNode.elements().next(), rowMetadata); case NOT: return createNotPredicate(currentNode.elements().next(), rowMetadata); default: throw new UnsupportedOperationException( "Unsupported query, unknown filter '" + operation + "': " + currentNode.toString()); } } /** * Create a predicate that do a logical AND between 2 filters * * @param nodeContent The node content * @param rowMetadata Row metadata to used to obtain information (valid/invalid, types...) * @return the AND predicate */ private Predicate<DataSetRow> createAndPredicate(final JsonNode nodeContent, RowMetadata rowMetadata) { checkValidMultiPredicate(nodeContent); final Predicate<DataSetRow> leftFilter = buildFilter(nodeContent.get(0), rowMetadata); final Predicate<DataSetRow> rightFilter = buildFilter(nodeContent.get(1), rowMetadata); return leftFilter.and(rightFilter); } /** * Create a predicate that do a logical OR between 2 filters * * @param nodeContent The node content * @param rowMetadata Row metadata to used to obtain information (valid/invalid, types...) * @return the OR predicate */ private Predicate<DataSetRow> createOrPredicate(final JsonNode nodeContent, RowMetadata rowMetadata) { checkValidMultiPredicate(nodeContent); final Predicate<DataSetRow> leftFilter = buildFilter(nodeContent.get(0), rowMetadata); final Predicate<DataSetRow> rightFilter = buildFilter(nodeContent.get(1), rowMetadata); return leftFilter.or(rightFilter); } /** * Create a predicate that negates a filter * * @param nodeContent The node content * @param rowMetadata Row metadata to used to obtain information (valid/invalid, types...) * @return The NOT predicate */ private Predicate<DataSetRow> createNotPredicate(final JsonNode nodeContent, RowMetadata rowMetadata) { if (!nodeContent.isObject()) { throw new IllegalArgumentException("Unsupported query, malformed 'not' (expected 1 object child)."); } if (nodeContent.size() == 0) { throw new IllegalArgumentException("Unsupported query, malformed 'not' (object child is empty)."); } return buildFilter(nodeContent, rowMetadata).negate(); } /** * Create a predicate that checks if the var is equals to a value. * * It first tries String comparison, and if not 'true' uses number comparison. * * @param node The filter node * @param columnId The column id * @param value The compare value * @return The eq predicate */ private Predicate<DataSetRow> createEqualsPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> { if (StringUtils.equals(r.get(columnId), value)) { return true; } else { return isBigDecimal(r.get(columnId)) // && isBigDecimal(value) // && NumberUtils.compare(toBigDecimal(r.get(columnId)), toBigDecimal(value)) == 0; } }; } /** * Create a predicate that checks if the var is greater than a value * * @param node The filter node * @param columnId The column id * @param value The compare value * @return The gt predicate */ private Predicate<DataSetRow> createGreaterThanPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> isBigDecimal(r.get(columnId)) // && isBigDecimal(value) // && toBigDecimal(r.get(columnId)) > toBigDecimal(value); } /** * Create a predicate that checks if the var is lower than a value * * @param node The filter node * @param columnId The column id * @param value The compare value * @return The lt predicate */ private Predicate<DataSetRow> createLowerThanPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> isBigDecimal(r.get(columnId)) // && isBigDecimal(value) // && toBigDecimal(r.get(columnId)) < toBigDecimal(value); } /** * Create a predicate that checks if the var is greater than or equals to a value * * @param node The filter node * @param columnId The column id * @param value The compare value * @return The gte predicate */ private Predicate<DataSetRow> createGreaterOrEqualsPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> isBigDecimal(r.get(columnId)) // && isBigDecimal(value) // && toBigDecimal(r.get(columnId)) >= toBigDecimal(value); } /** * Create a predicate that checks if the var is lower than or equals to a value * * @param node The filter node * @param columnId The column id * @param value The compare value * @return The lte predicate */ private Predicate<DataSetRow> createLowerOrEqualsPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> isBigDecimal(r.get(columnId)) // && isBigDecimal(value) // && toBigDecimal(r.get(columnId)) <= toBigDecimal(value); } /** * Create a predicate that checks if the var contains a value * * @param node The filter node * @param columnId The column id * @param value The contained value * @return The contains predicate */ private Predicate<DataSetRow> createContainsPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> StringUtils.containsIgnoreCase(r.get(columnId), value); } /** * Create a predicate that checks if the var match a value * * @param node The filter node * @param columnId The column id * @param value The value to match * @return The match predicate */ private Predicate<DataSetRow> createMatchesPredicate(final JsonNode node, final String columnId, final String value) { checkValidValue(node, value); return r -> matches(r.get(columnId), value); } /** * Create a predicate that checks if the value is invalid * * @param columnId The column id * @return The invalid value predicate */ private Predicate<DataSetRow> createInvalidPredicate(final String columnId) { return r -> r.isInvalid(columnId); } /** * Create a predicate that checks if the value is value (not empty and not invalid) * * @param columnId The column id * @return The valid value predicate */ private Predicate<DataSetRow> createValidPredicate(final String columnId) { return r -> !r.isInvalid(columnId) && !isEmpty(r.get(columnId)); } /** * Create a predicate that checks if the value is empty * * @param columnId The column id * @return The empty value predicate */ private Predicate<DataSetRow> createEmptyPredicate(final String columnId) { return r -> isEmpty(r.get(columnId)); } /** * Create a predicate that checks if the value is within a range [min, max[ * * @param columnId The column id * @param nodeContent The node content that contains min/max values * @return The range predicate */ private Predicate<DataSetRow> createRangePredicate(final String columnId, final JsonNode nodeContent, final RowMetadata rowMetadata) { final String start = nodeContent.get("start").asText(); final String end = nodeContent.get("end").asText(); return r -> { final String columnType = rowMetadata.getById(columnId).getType(); Type parsedType = Type.get(columnType); if (Type.DATE.isAssignableFrom(parsedType)) { return createDateRangePredicate(columnId, start, end, rowMetadata).test(r); } else { // Assume range can be parsed as number (may happen if column is currently marked as string, but will // contain some numbers). return createNumberRangePredicate(columnId, start, end).test(r); } }; } /** * Create a predicate that checks if the date value is within a range [min, max[ * * @param columnId The column id * @param start The start value * @param end The end value * @return The date range predicate */ private Predicate<DataSetRow> createDateRangePredicate(final String columnId, final String start, final String end, final RowMetadata rowMetadata) { try { final long minTimestamp = Long.parseLong(start); final long maxTimestamp = Long.parseLong(end); final LocalDateTime minDate = dateManipulator.fromEpochMillisecondsWithSystemOffset(minTimestamp); final LocalDateTime maxDate = dateManipulator.fromEpochMillisecondsWithSystemOffset(maxTimestamp); return safeDate(r -> { final ColumnMetadata columnMetadata = rowMetadata.getById(columnId); final LocalDateTime columnValue = getDateParser().parse(r.get(columnId), columnMetadata); return minDate.compareTo(columnValue) == 0 || (minDate.isBefore(columnValue) && maxDate.isAfter(columnValue)); }); } catch (Exception e) { LOGGER.debug("Unable to create date range predicate.", e); throw new IllegalArgumentException( "Unsupported query, malformed date 'range' (expected timestamps in min and max properties)."); } } private synchronized DateParser getDateParser() { if (dateParser == null) { dateParser = new DateParser(Providers.get(AnalyzerService.class)); } return dateParser; } // Intentionally left with package modifier since only used by unit test (in same package) void setDateParser(final DateParser dateParser) { this.dateParser = dateParser; } /** * Create a predicate that checks if the number value is within a range [min, max[ * * @param columnId The column id * @param start The start value * @param end The end value * @return The number range predicate */ private Predicate<DataSetRow> createNumberRangePredicate(final String columnId, final String start, final String end) { try { final double min = toBigDecimal(start); final double max = toBigDecimal(end); return r -> { final String value = r.get(columnId); if (NumericHelper.isBigDecimal(value)) { final double columnValue = toBigDecimal(value); return NumberUtils.compare(columnValue, min) == 0 || (columnValue > min && columnValue < max); } else { return false; } }; } catch (Exception e) { LOGGER.debug("Unable to create number range predicate.", e); throw new IllegalArgumentException( "Unsupported query, malformed 'range' (expected number min and max properties)."); } } /** * check if the node has a non null value * * @param node The node to test * @param value The node 'value' property * @throws IllegalArgumentException If the node has not a 'value' property */ private void checkValidValue(final JsonNode node, final String value) { if (value == null) { throw new UnsupportedOperationException( "Unsupported query, the filter needs a value : " + node.toString()); } } /** * Check if the node has exactly 2 children. Used to safe check binary operator (and, or) * * @param nodeContent The node content * @throws IllegalArgumentException If the node has not exactly 2 children */ private void checkValidMultiPredicate(final JsonNode nodeContent) { if (nodeContent.size() != 2) { throw new IllegalArgumentException("Unsupported query, malformed 'and' (expected 2 children)."); } } /** * Test a string value against a pattern returned during value analysis. * * @param value A string value. May be null. * @param pattern A pattern as returned in value analysis. * @return <code>true</code> if value matches, <code>false</code> otherwise. */ private boolean matches(String value, String pattern) { if (value == null && pattern == null) { return true; } if (value == null) { return false; } // Character based patterns if (StringUtils.containsAny(pattern, new char[] { 'A', 'a', '9' })) { if (value.length() != pattern.length()) { return false; } final char[] valueArray = value.toCharArray(); final char[] patternArray = pattern.toCharArray(); for (int i = 0; i < valueArray.length; i++) { if (patternArray[i] == 'A') { if (!Character.isUpperCase(valueArray[i])) { return false; } } else if (patternArray[i] == 'a') { if (!Character.isLowerCase(valueArray[i])) { return false; } } else if (patternArray[i] == '9') { if (!Character.isDigit(valueArray[i])) { return false; } } else { if (valueArray[i] != patternArray[i]) { return false; } } } } else { final DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern); try { formatter.toFormat().parseObject(value); } catch (ParseException e) { return false; } } return true; } /** * Simple wrapper to call BigDecimalParser to simplify code above. */ private double toBigDecimal(String value) { return BigDecimalParser.toBigDecimal(value).doubleValue(); } }