Example usage for org.apache.commons.math3.distribution ZipfDistribution probability

List of usage examples for org.apache.commons.math3.distribution ZipfDistribution probability

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution ZipfDistribution probability.

Prototype

public double probability(final int x) 

Source Link

Usage

From source file:io.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java

private void initDistribution() {
    BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();/* w  ww.j  a  v  a2 s.c om*/
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();

    switch (distributionType) {
    case SEQUENTIAL:
        // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
        distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(),
                schema.getEnumeratedValues());
        break;
    case UNIFORM:
        distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
        break;
    case DISCRETE_UNIFORM:
        if (enumeratedValues == null) {
            enumeratedValues = new ArrayList<>();
            for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                Object val = convertType(i, type);
                enumeratedValues.add(val);
            }
        }
        // give them all equal probability, the library will normalize probabilities to sum to 1.0
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), 0.1));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case NORMAL:
        distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        break;
    case ROUNDED_NORMAL:
        NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        distribution = new RealRoundingDistribution(normalDist);
        break;
    case ZIPF:
        int cardinality;
        if (enumeratedValues == null) {
            Integer startInt = schema.getStartInt();
            cardinality = schema.getEndInt() - startInt;
            ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
            }
        } else {
            cardinality = enumeratedValues.size();
            ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
            }
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case ENUMERATED:
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;

    default:
        throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }

    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof EnumeratedDistribution) {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}

From source file:org.apache.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java

private void initDistribution() {
    BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();/*from   www  . j  av a 2 s. co  m*/
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();

    switch (distributionType) {
    case SEQUENTIAL:
        // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
        distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(),
                schema.getEnumeratedValues());
        break;
    case UNIFORM:
        distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
        break;
    case DISCRETE_UNIFORM:
        if (enumeratedValues == null) {
            enumeratedValues = new ArrayList<>();
            for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                Object val = convertType(i, type);
                enumeratedValues.add(val);
            }
        }
        // give them all equal probability, the library will normalize probabilities to sum to 1.0
        for (Object enumeratedValue : enumeratedValues) {
            probabilities.add(new Pair<>(enumeratedValue, 0.1));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case NORMAL:
        distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        break;
    case ROUNDED_NORMAL:
        NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        distribution = new RealRoundingDistribution(normalDist);
        break;
    case ZIPF:
        int cardinality;
        if (enumeratedValues == null) {
            Integer startInt = schema.getStartInt();
            cardinality = schema.getEndInt() - startInt;
            ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
            }
        } else {
            cardinality = enumeratedValues.size();
            ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
            }
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case ENUMERATED:
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;

    default:
        throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }

    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}