Example usage for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability.

Prototype

public double cumulativeProbability(double x)

Source Link

Document

If x is more than 40 standard deviations from the mean, 0 or 1 is returned, as in these cases the actual value is within Double.MIN_VALUE of 0 or 1.

Usage

From source file:com.mapr.synth.samplers.VectorSamplerTest.java

private boolean isNormal(double[] vx, double mean, double sd) {
    Arrays.sort(vx);/* ww  w  .j a v a 2s . c  om*/
    NormalDistribution n = new NormalDistribution(mean, sd);
    double diff = 0;
    for (int i = 0; i < vx.length; i++) {
        double q = (double) i / (vx.length - 1);
        diff = Math.max(diff, Math.abs(q - n.cumulativeProbability(vx[i])));
    }

    return diff < 5.0 / Math.sqrt(vx.length);
}

From source file:com.mapr.synth.TermGeneratorTest.java

@Test
public void distinctVocabularies() {
    TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8);
    final Multiset<String> k1 = HashMultiset.create();
    for (int i = 0; i < 50000; i++) {
        k1.add(x1.sample());//w w w.  j  a  v  a2  s  .c  o m
    }

    TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8);
    final Multiset<String> k2 = HashMultiset.create();
    for (int i = 0; i < 50000; i++) {
        k2.add(x2.sample());
    }

    final NormalDistribution normal = new NormalDistribution();
    List<Double> scores = Ordering.natural()
            .sortedCopy(Iterables.transform(k1.elementSet(), new Function<String, Double>() {
                public Double apply(String s) {
                    return normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s),
                            50000 - k1.count(s), k2.count(s), 50000 - k2.count(s)));
                }
            }));
    int n = scores.size();
    //        System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1));
    int i = 0;
    for (Double score : scores) {
        if (i % 10 == 0) {
            System.out.printf("%.6f\t%.6f\n", (double) i / n, score);
        }

        i++;
    }
}

From source file:com.mapr.synth.samplers.RandomWalkSamplerTest.java

@Test
public void testBasics() throws IOException {
    // this sampler has four variables
    // g1 is gamma distributed with alpha = 0.2, beta = 0.2
    // v1 is unit normal
    // v2 is normal with mean = 0, sd = 2
    // v3 is gamma-normal with dof=2, mean = 0.
    SchemaSampler s = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read());

    TDigest tdG1 = new AVLTreeDigest(500);
    TDigest tdG2 = new AVLTreeDigest(500);
    TDigest td1 = new AVLTreeDigest(500);
    TDigest td2 = new AVLTreeDigest(500);
    TDigest td3 = new AVLTreeDigest(500);

    double x1 = 0;
    double x2 = 0;
    double x3 = 0;

    for (int i = 0; i < 1000000; i++) {
        JsonNode r = s.sample();/* w  w w .j a va2s . c  o m*/
        tdG1.add(r.get("g1").asDouble());
        tdG2.add(r.get("g2").asDouble());

        double step1 = r.get("v1").get("step").asDouble();
        td1.add(step1);
        x1 += step1;
        assertEquals(x1, r.get("v1").get("value").asDouble(), 0);
        assertEquals(x1, r.get("v1-bare").asDouble(), 0);

        double step2 = r.get("v2").get("step").asDouble();
        td2.add(step2);
        x2 += step2;
        assertEquals(x2, r.get("v2").get("value").asDouble(), 0);

        double step3 = r.get("v3").get("step").asDouble();
        td3.add(step3);
        x3 += step3;
        assertEquals(x3, r.get("v3").get("value").asDouble(), 0);
    }

    // now compare against reference distributions to test accuracy of the observed step distributions
    NormalDistribution normalDistribution = new NormalDistribution();
    GammaDistribution gd1 = new GammaDistribution(0.2, 5);
    GammaDistribution gd2 = new GammaDistribution(1, 1);
    TDistribution tDistribution = new TDistribution(2);
    for (double q : new double[] { 0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99 }) {
        double uG1 = gd1.cumulativeProbability(tdG1.quantile(q));
        assertEquals(q, uG1, (1 - q) * q * 10e-2);

        double uG2 = gd2.cumulativeProbability(tdG2.quantile(q));
        assertEquals(q, uG2, (1 - q) * q * 10e-2);

        double u1 = normalDistribution.cumulativeProbability(td1.quantile(q));
        assertEquals(q, u1, (1 - q) * q * 10e-2);

        double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2);
        assertEquals(q, u2, (1 - q) * q * 10e-2);

        double u3 = tDistribution.cumulativeProbability(td3.quantile(q));
        assertEquals(q, u3, (1 - q) * q * 10e-2);
    }
}

From source file:jasima.core.experiment.OCBAExperiment.java

protected double[] calcPCSPriosPerConfiguration() {
    final SummaryStat best = stats[currBest];
    final double bestMean = best.mean();

    double bestNormVariance = best.variance() / best.numObs();

    double[] prodTerms = new double[stats.length];
    for (int i = 0; i < stats.length; i++) {
        if (i == currBest)
            continue;

        SummaryStat vs = stats[i];/*  w  w w. j ava2  s  .c  o m*/
        prodTerms[i] = (bestMean - vs.mean()) / Math.sqrt(bestNormVariance + vs.variance() / vs.numObs());
    }

    NormalDistribution normalDist = new NormalDistribution();

    for (int i = 0; i < stats.length; i++) {
        if (i == currBest)
            continue;

        prodTerms[i] = normalDist.cumulativeProbability(prodTerms[i]);
        if (getProblemType() == ProblemType.MINIMIZE)
            prodTerms[i] = 1.0 - prodTerms[i];
    }

    return prodTerms;
}

From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java

private double gaussianNegativeProbability(double mean, double stddev) {
    NormalDistribution distribution = new NormalDistribution(mean, stddev);
    return distribution.cumulativeProbability(0.0);
}

From source file:net.demilich.metastone.game.behaviour.decicionTreeBheavior.DecisionDataBase.java

private synchronized void makeNewStat(List<Example> featureTrue, int feature, double totalGood,
        NormalDistribution norm, double numSamples, int value) {
    double numGoodTrue = (double) featureTrue.stream().filter(e -> e.classification == 2).count();

    double pValue = getZScore(numSamples, totalGood, featureTrue.size(), numGoodTrue);
    pValue = norm.cumulativeProbability(pValue);
    //if(pValue<.5){
    //    pValue = 1.0-pValue;
    // }/*w ww  . ja  v a  2  s .c o m*/
    //if(numGoodTrue/featureTrue.size() > .5){
    //   pValue = pValue+.02;
    // }
    //System.err.println("stats is " + numGoodTrue+ " size is " + featureTrue.size());
    //public FeatureStats(int feature, double pValue, double percentGood, int value, String featureName){
    stats.add(new FeatureStats(feature, pValue, numGoodTrue / featureTrue.size(), value,
            ((double) featureTrue.size()) / numSamples, featureNames[feature], this.percentGood));
}

From source file:edu.cmu.tetrad.util.StatUtils.java

public static double getZForAlpha(double alpha) {
    double low = 0.0;
    double high = 20.0;
    double mid = 5.0;
    NormalDistribution dist = new NormalDistribution(0, 1);

    while (high - low > 1e-4) {
        mid = (high + low) / 2.0;/*  ww w  . j a  v  a 2 s . c  o m*/
        double _alpha = 2.0 * (1.0 - dist.cumulativeProbability(Math.abs(mid)));

        if (_alpha > alpha) {
            low = mid;
        } else {
            high = mid;
        }
    }
    return mid;
}

From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java

@VisibleForTesting
List<DataTreeNode> modelFitAnomalyDetection(long targetEpoch, int numObservations, boolean doubleToLongBits,
        boolean raw, double percentile, int minMeasurement) {
    int measurement;
    int count = 0;
    int min = Integer.MAX_VALUE;

    if (targetEpoch < 0) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (numObservations <= 0) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (reservoir == null) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (targetEpoch < minEpoch) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (targetEpoch >= minEpoch + reservoir.length) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (numObservations > (reservoir.length - 1)) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    }/*from www  .  j  a v a 2 s. c  o  m*/

    /**
     * Fitting to a geometric distribution uses the mean value of the sample.
     *
     * Fitting to a normal distribution uses the Apache Commons Math implementation.
     */
    double mean = 0.0;
    double m2 = 0.0;
    double stddev;
    double gaussianNegative = -1.0;
    Map<Integer, Integer> frequencies = new HashMap<>();
    double threshold;
    double measurePercentile = -100.0;

    int index = reservoir.length - 1;
    long currentEpoch = minEpoch + index;

    while (currentEpoch != targetEpoch) {
        index--;
        currentEpoch--;
    }

    measurement = reservoir[index--];
    currentEpoch--;

    while (count < numObservations && index >= 0) {
        int value = reservoir[index--];
        if (value < min) {
            min = value;
        }
        updateFrequencies(frequencies, value);
        count++;
        double delta = value - mean;
        mean += delta / count;
        m2 += delta * (value - mean);
    }

    while (count < numObservations) {
        int value = 0;
        if (value < min) {
            min = value;
        }
        updateFrequencies(frequencies, value);
        count++;
        double delta = value - mean;
        mean += delta / count;
        m2 += delta * (value - mean);
    }

    if (count < 2) {
        stddev = 0.0;
    } else {
        stddev = Math.sqrt(m2 / count);
    }

    int mode = -1;
    int modeCount = -1;

    for (Map.Entry<Integer, Integer> entry : frequencies.entrySet()) {
        int key = entry.getKey();
        int value = entry.getValue();
        if (value > modeCount || (value == modeCount && key > mode)) {
            mode = key;
            modeCount = value;
        }
    }

    if (mean > 0.0 && stddev > 0.0) {
        gaussianNegative = gaussianNegativeProbability(mean, stddev);
    }

    if (mean == 0.0) {
        threshold = 0.0;
    } else if (stddev == 0.0) {
        threshold = mean;
    } else if (mean > 1.0) {
        NormalDistribution distribution = new NormalDistribution(mean, stddev);
        double badProbability = distribution.cumulativeProbability(0.0);
        double goodProbability = badProbability + (1.0 - badProbability) * (percentile / 100.0);
        threshold = distribution.inverseCumulativeProbability(goodProbability);
        measurePercentile = distribution.probability(0.0, measurement) / (1.0 - badProbability) * 100.0;
    } else {
        double p = 1.0 / (1.0 + mean);
        GeometricDistribution distribution = new GeometricDistribution(p);
        threshold = distribution.inverseCumulativeProbability(percentile / 100.0);
        measurePercentile = distribution.cumulativeProbability(measurement) * 100.0;
    }

    List<DataTreeNode> result = new ArrayList<>();
    VirtualTreeNode vchild, vparent;

    if (measurement >= minMeasurement && (measurement > threshold || percentile == 0.0)) {
        vchild = new VirtualTreeNode("gaussianNegative", doubleToLong(gaussianNegative, doubleToLongBits));
        vparent = new VirtualTreeNode("percentile", doubleToLong(measurePercentile, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("mode", mode, generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("stddev", doubleToLong(stddev, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("mean", doubleToLong(mean, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("measurement", measurement, generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("delta", doubleToLong(measurement - threshold, doubleToLongBits),
                generateSingletonArray(vchild));
        result.add(vparent);
        if (raw) {
            addRawObservations(result, targetEpoch, numObservations);
        }
    } else {
        makeDefaultNodes(raw, targetEpoch, numObservations);
    }
    return result;
}

From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java

public boolean isSimilarProportion(double[] valuesA, double[] valuesB) {
    double alpha = 0.05;

    // Change data a bit for avoiding issues with booleans 0/1
    /*for (int i=0; i<valuesA.length; i++)
    {/*from  w w w . jav  a  2  s.c  om*/
       valuesA[i] = valuesA[i] + 1.0;
    }
    for (int i=0; i<valuesB.length; i++)
    {
       valuesB[i] = valuesB[i] + 1.0;
    }*/

    // Calculate region of acceptance
    NormalDistribution myNormal = new NormalDistribution(0, 1);
    double myZLeft = -1 * Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));
    double myZRight = Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));

    logger.debug("Boundaries: " + myZLeft + " to " + myZRight);

    // Calculate proportion for valuesA dataset
    int nA = valuesA.length;
    double successA = 0;
    for (int i = 0; i < nA; i++) {
        successA = successA + valuesA[i];
    }

    logger.debug("Success number for dataset A: " + successA);
    logger.debug("Number of records for A: " + nA);

    double pA = successA / nA;

    // Calculate proportion for valuesB dataset
    int nB = valuesB.length;
    double successB = 0;
    for (int i = 0; i < nB; i++) {
        successB = successB + valuesB[i];
    }

    logger.debug("Success number for dataset B: " + successB);
    logger.debug("Number of records for B: " + nB);

    double pB = successB / nB;

    // Calculate proportion similarity
    double pPool = (nA * pA + nB * pB) / (nA + nB);
    double zComp = (pA - pB) / Math.sqrt(pPool * (1.0 - pPool) * (1.0 / nA + 1.0 / nB));

    logger.debug("pPooled = " + pPool);
    logger.debug("Z value = " + zComp);
    logger.debug("p-value = " + (1.0 - myNormal.cumulativeProbability(zComp)) * 2);

    // Determine if z score is in the region of acceptance
    if ((myZLeft <= zComp) && (zComp <= myZRight)) {
        return true;
    }

    return false;
}

From source file:org.apache.solr.client.solrj.io.stream.eval.CumulativeProbabilityEvaluatorTest.java

@Test
public void test() throws IOException {
    values.clear();/*w w  w  .  j  a  v a 2s. c  o m*/
    values.put("l1", 3);
    values.put("l2", 7);

    NormalDistribution actual = new NormalDistribution(3, 7);
    Assert.assertEquals(actual.cumulativeProbability(2),
            factory.constructEvaluator("prob(norm(l1,l2),2)").evaluate(new Tuple(values)));
}