org.apache.phoenix.pherf.rules.RulesApplier.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.phoenix.pherf.rules.RulesApplier.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 *   or more contributor license agreements.  See the NOTICE file
 *   distributed with this work for additional information
 *   regarding copyright ownership.  The ASF licenses this file
 *   to you under the Apache License, Version 2.0 (the
 *   "License"); you may not use this file except in compliance
 *   with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */

package org.apache.phoenix.pherf.rules;

import com.google.common.base.Preconditions;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.math3.random.RandomDataGenerator;
import org.apache.phoenix.pherf.PherfConstants;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang3.RandomUtils;
import org.apache.phoenix.pherf.configuration.*;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

public class RulesApplier {
    private static final Logger logger = LoggerFactory.getLogger(RulesApplier.class);
    private static final AtomicLong COUNTER = new AtomicLong(100);

    // Used to bail out of random distribution if it takes too long
    // This should never happen when distributions add up to 100
    private static final int OH_SHIT_LIMIT = 1000;

    private final Random rndNull;
    private final Random rndVal;
    private final RandomDataGenerator randomDataGenerator;

    private final XMLConfigParser parser;
    private final List<Map> modelList;

    public RulesApplier(XMLConfigParser parser) {
        this(parser, System.currentTimeMillis());
    }

    public RulesApplier(XMLConfigParser parser, long seed) {
        this.parser = parser;
        this.modelList = new ArrayList<Map>();
        this.rndNull = new Random(seed);
        this.rndVal = new Random(seed);
        this.randomDataGenerator = new RandomDataGenerator();
        populateModelList();
    }

    public List<Map> getModelList() {
        return Collections.unmodifiableList(this.modelList);
    }

    /**
     * Get a data value based on rules.
     *
     * @param scenario      {@link org.apache.phoenix.pherf.configuration.Scenario} We are getting data for
     * @param phxMetaColumn {@link org.apache.phoenix.pherf.configuration.Column}
     *                      From Phoenix MetaData that are
     *                      generating data for. It defines the
     *                      type we are trying to match.
     * @return
     * @throws Exception
     */
    public DataValue getDataForRule(Scenario scenario, Column phxMetaColumn) throws Exception {
        // TODO Make a Set of Rules that have already been applied so that so we don't generate for every value

        List<Scenario> scenarios = parser.getScenarios();
        DataValue value = null;
        if (scenarios.contains(scenario)) {
            logger.debug("We found a correct Scenario");
            // Assume the first rule map
            Map<DataTypeMapping, List> ruleMap = modelList.get(0);
            List<Column> ruleList = ruleMap.get(phxMetaColumn.getType());

            // Make sure Column from Phoenix Metadata matches a rule column
            if (ruleList.contains(phxMetaColumn)) {
                // Generate some random data based on this rule
                logger.debug("We found a correct column rule");
                Column columnRule = getColumnForRule(ruleList, phxMetaColumn);

                value = getDataValue(columnRule);
            } else {
                logger.warn("Attempted to apply rule to data, but could not find a rule to match type:"
                        + phxMetaColumn.getType());
            }

        }
        return value;
    }

    /**
     * Get data value based on the supplied rule
     *
     * @param column {@link org.apache.phoenix.pherf.configuration.Column} Column rule to get data for
     * @return {@link org.apache.phoenix.pherf.rules.DataValue} Container Type --> Value mapping
     */
    public DataValue getDataValue(Column column) throws Exception {
        DataValue data = null;
        String prefix = "";
        int length = column.getLength();
        int nullChance = column.getNullChance();
        List<DataValue> dataValues = column.getDataValues();

        // Return an empty value if we fall within the configured probability of null
        if ((nullChance != Integer.MIN_VALUE) && (isValueNull(nullChance))) {
            return new DataValue(column.getType(), "");
        }

        if (column.getPrefix() != null) {
            prefix = column.getPrefix();
        }

        if ((prefix.length() >= length) && (length > 0)) {
            logger.warn("You are attempting to generate data with a prefix (" + prefix + ") "
                    + "That is longer than expected overall field length (" + length + "). "
                    + "This will certainly lead to unexpected data values.");
        }

        switch (column.getType()) {
        case VARCHAR:
            // Use the specified data values from configs if they exist
            if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
                data = pickDataValueFromList(dataValues);
            } else {
                Preconditions.checkArgument(length > 0, "length needs to be > 0");
                if (column.getDataSequence() == DataSequence.SEQUENTIAL) {
                    data = getSequentialDataValue(column);
                } else {
                    data = getRandomDataValue(column);
                }
            }
            break;
        case CHAR:
            if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
                data = pickDataValueFromList(dataValues);
            } else {
                Preconditions.checkArgument(length > 0, "length needs to be > 0");
                if (column.getDataSequence() == DataSequence.SEQUENTIAL) {
                    data = getSequentialDataValue(column);
                } else {
                    data = getRandomDataValue(column);
                }
            }
            break;
        case DECIMAL:
            if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
                data = pickDataValueFromList(dataValues);
            } else {
                int precision = column.getPrecision();
                double minDbl = column.getMinValue();
                Preconditions.checkArgument((precision > 0) && (precision <= 18),
                        "Precision must be between 0 and 18");
                Preconditions.checkArgument(minDbl >= 0, "minvalue must be set in configuration");
                Preconditions.checkArgument(column.getMaxValue() > 0, "maxValue must be set in configuration");
                StringBuilder maxValueStr = new StringBuilder();

                for (int i = 0; i < precision; i++) {
                    maxValueStr.append(9);
                }

                double maxDbl = Math.min(column.getMaxValue(), Double.parseDouble(maxValueStr.toString()));
                final double dbl = RandomUtils.nextDouble(minDbl, maxDbl);
                data = new DataValue(column.getType(), String.valueOf(dbl));
            }
            break;
        case INTEGER:
            if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
                data = pickDataValueFromList(dataValues);
            } else {
                int minInt = column.getMinValue();
                int maxInt = column.getMaxValue();
                Preconditions.checkArgument((minInt > 0) && (maxInt > 0),
                        "min and max values need to be set in configuration");
                int intVal = RandomUtils.nextInt(minInt, maxInt);
                data = new DataValue(column.getType(), String.valueOf(intVal));
            }
            break;
        case DATE:
            if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
                data = pickDataValueFromList(dataValues);
                // Check if date has right format or not
                data.setValue(checkDatePattern(data.getValue()));
            } else if (column.getUseCurrentDate() != true) {
                int minYear = column.getMinValue();
                int maxYear = column.getMaxValue();
                Preconditions.checkArgument((minYear > 0) && (maxYear > 0),
                        "min and max values need to be set in configuration");

                String dt = generateRandomDate(minYear, maxYear);
                data = new DataValue(column.getType(), dt);
                data.setMaxValue(String.valueOf(minYear));
                data.setMinValue(String.valueOf(maxYear));
            } else {
                String dt = getCurrentDate();
                data = new DataValue(column.getType(), dt);
            }
            break;
        default:
            break;
        }
        Preconditions.checkArgument(data != null,
                "Data value could not be generated for some reason. Please check configs");
        return data;
    }

    // Convert years into standard date format yyyy-MM-dd HH:mm:ss.SSS z
    public String generateRandomDate(int min, int max) throws Exception {
        String mindt = min + "-01-01 00:00:00.000"; // set min date as starting of min year
        String maxdt = max + "-12-31 23:59:59.999"; // set max date as end of max year
        return generateRandomDate(mindt, maxdt);
    }

    public String generateRandomDate(String min, String max) throws Exception {
        DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN)
                .withZone(DateTimeZone.UTC);
        DateTime minDt;
        DateTime maxDt;
        DateTime dt;

        minDt = fmtr.parseDateTime(checkDatePattern(min));
        maxDt = fmtr.parseDateTime(checkDatePattern(max));

        // Get Ms Date between min and max
        synchronized (randomDataGenerator) {
            //Make sure date generated is exactly between the passed limits
            long rndLong = randomDataGenerator.nextLong(minDt.getMillis() + 1, maxDt.getMillis() - 1);
            dt = new DateTime(rndLong, PherfConstants.DEFAULT_TIME_ZONE);
        }

        return fmtr.print(dt);
    }

    public String getCurrentDate() {
        DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN)
                .withZone(DateTimeZone.UTC);
        ;
        DateTime dt = new DateTime(PherfConstants.DEFAULT_TIME_ZONE);
        return fmtr.print(dt);
    }

    /**
     * Given an int chance [0-100] inclusive, this method will return true if a winner is selected, otherwise false.
     *
     * @param chance Percentage as an int while number.
     * @return boolean if we pick a number within range
     */
    private boolean isValueNull(int chance) {
        return (rndNull.nextInt(100) < chance);
    }

    private DataValue pickDataValueFromList(List<DataValue> values) throws Exception {
        DataValue generatedDataValue = null;
        int sum = 0, count = 0;

        // Verify distributions add up to 100 if they exist
        for (DataValue value : values) {
            int dist = value.getDistribution();
            sum += dist;
        }
        Preconditions.checkArgument((sum == 100) || (sum == 0),
                "Distributions need to add up to 100 or not exist.");

        // Spin the wheel until we get a value.
        while (generatedDataValue == null) {

            // Give an equal chance at picking any one rule to test
            // This prevents rules at the beginning of the list from getting more chances to get picked
            int rndIndex = rndVal.nextInt(values.size());
            DataValue valueRule = values.get(rndIndex);

            generatedDataValue = pickDataValueFromList(valueRule);

            // While it's possible to get here if you have a bunch of really small distributions,
            // It's just really unlikely. This is just a safety just so we actually pick a value.
            if (count++ == OH_SHIT_LIMIT) {
                logger.info("We generated a value from hitting our OH_SHIT_LIMIT: " + OH_SHIT_LIMIT);
                generatedDataValue = valueRule;
            }

        }
        return generatedDataValue;
    }

    private DataValue pickDataValueFromList(final DataValue valueRule) throws Exception {
        DataValue retValue = new DataValue(valueRule);

        // Path taken when configuration specifies a specific value to be taken with the <value> tag
        if (valueRule.getValue() != null) {
            int chance = (valueRule.getDistribution() == 0) ? 100 : valueRule.getDistribution();
            return (rndVal.nextInt(100) <= chance) ? retValue : null;
        }

        // Path taken when configuration specifies to use current date
        if (valueRule.getUseCurrentDate() == true) {
            int chance = (valueRule.getDistribution() == 0) ? 100 : valueRule.getDistribution();
            retValue.setValue(getCurrentDate());
            return (rndVal.nextInt(100) <= chance) ? retValue : null;
        }

        // Later we can add support fo other data types if needed.Right now, we just do this for dates
        Preconditions.checkArgument((retValue.getMinValue() != null) || (retValue.getMaxValue() != null),
                "Both min/maxValue tags must be set if value tag is not used");
        Preconditions.checkArgument((retValue.getType() == DataTypeMapping.DATE),
                "Currently on DATE is supported for ranged random values");

        retValue.setValue(generateRandomDate(retValue.getMinValue(), retValue.getMaxValue()));

        retValue.setValue(generateRandomDate(retValue.getMinValue(), retValue.getMaxValue()));
        retValue.setMinValue(checkDatePattern(valueRule.getMinValue()));
        retValue.setMaxValue(checkDatePattern(valueRule.getMaxValue()));
        return retValue;
    }

    // Checks if date is in defult pattern
    public String checkDatePattern(String date) {
        DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN)
                .withZone(DateTimeZone.UTC);
        ;
        DateTime parsedDate = fmtr.parseDateTime(date);
        return fmtr.print(parsedDate);
    }

    /**
     * Top level {@link java.util.List} {@link java.util.Map}. This will likely only have one entry until we have
     * multiple files.
     * <p/>
     * <p/>
     * Each Map entry in the List is:
     * {@link java.util.Map} of
     * {@link org.apache.phoenix.pherf.configuration.DataTypeMapping} -->
     * List of {@link org.apache.phoenix.pherf.configuration.Column
     * Build the initial Map with all the general rules.
     * These are contained in:
     * <datamode><datamapping><column>...</column></datamapping></datamode>
     * <p/>
     * <p/>
     * Unsupported until V2
     * Build the overrides by appending them to the list of rules that match the column type
     */
    private void populateModelList() {
        if (!modelList.isEmpty()) {
            return;
        }

        // Support for multiple models, but rules are only relevant each model
        for (DataModel model : parser.getDataModels()) {

            // Step 1
            final Map<DataTypeMapping, List> ruleMap = new HashMap<DataTypeMapping, List>();
            for (Column column : model.getDataMappingColumns()) {
                List<Column> cols;
                DataTypeMapping type = column.getType();
                if (ruleMap.containsKey(type)) {
                    ruleMap.get(type).add(column);
                } else {
                    cols = new LinkedList<Column>();
                    cols.add(column);
                    ruleMap.put(type, cols);
                }
            }

            this.modelList.add(ruleMap);
        }
    }

    public Column getRule(Column phxMetaColumn) {
        // Assume the first rule map
        Map<DataTypeMapping, List> ruleMap = modelList.get(0);

        List<Column> ruleList = ruleMap.get(phxMetaColumn.getType());
        return getColumnForRule(ruleList, phxMetaColumn);
    }

    private Column getColumnForRule(List<Column> ruleList, Column phxMetaColumn) {

        // Column pointer to head of list
        Column ruleAppliedColumn = new Column(ruleList.get(0));

        // Then we apply each rule override as a mutation to the column
        for (Column columnRule : ruleList) {

            // Check if user defined column rules match the column data type we are generating
            // We don't want to apply the rule if name doesn't match the column from Phoenix
            if (columnRule.isUserDefined() && !columnRule.getName().equals(phxMetaColumn.getName())) {
                continue;
            }
            ruleAppliedColumn.mutate(columnRule);
        }

        return ruleAppliedColumn;
    }

    /**
     * Add a numerically increasing counter onto the and of a random string.
     * Incremented counter should be thread safe.
     *
     * @param column {@link org.apache.phoenix.pherf.configuration.Column}
     * @return {@link org.apache.phoenix.pherf.rules.DataValue}
     */
    private DataValue getSequentialDataValue(Column column) {
        DataValue data = null;
        long inc = COUNTER.getAndIncrement();
        String strInc = String.valueOf(inc);
        String varchar = RandomStringUtils.randomAlphanumeric(column.getLength() - strInc.length());
        varchar = (column.getPrefix() != null) ? column.getPrefix() + strInc + varchar : strInc + varchar;

        // Truncate string back down if it exceeds length
        varchar = StringUtils.left(varchar, column.getLength());
        data = new DataValue(column.getType(), varchar);
        return data;
    }

    private DataValue getRandomDataValue(Column column) {
        String varchar = RandomStringUtils.randomAlphanumeric(column.getLength());
        varchar = (column.getPrefix() != null) ? column.getPrefix() + varchar : varchar;

        // Truncate string back down if it exceeds length
        varchar = StringUtils.left(varchar, column.getLength());
        return new DataValue(column.getType(), varchar);
    }
}