Example usage for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability

List of usage examples for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability.

Prototype

public double cumulativeProbability(double x) 

Source Link

Document

If x is more than 40 standard deviations from the mean, 0 or 1 is returned, as in these cases the actual value is within Double.MIN_VALUE of 0 or 1.

Usage

From source file:com.mapr.synth.samplers.VectorSamplerTest.java

private boolean isNormal(double[] vx, double mean, double sd) {
    Arrays.sort(vx);/* ww  w  .j a v a 2s . c  om*/
    NormalDistribution n = new NormalDistribution(mean, sd);
    double diff = 0;
    for (int i = 0; i < vx.length; i++) {
        double q = (double) i / (vx.length - 1);
        diff = Math.max(diff, Math.abs(q - n.cumulativeProbability(vx[i])));
    }

    return diff < 5.0 / Math.sqrt(vx.length);
}

From source file:com.mapr.synth.TermGeneratorTest.java

@Test
public void distinctVocabularies() {
    TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8);
    final Multiset<String> k1 = HashMultiset.create();
    for (int i = 0; i < 50000; i++) {
        k1.add(x1.sample());//w w w.  j  a  v  a2  s  .c  o m
    }

    TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8);
    final Multiset<String> k2 = HashMultiset.create();
    for (int i = 0; i < 50000; i++) {
        k2.add(x2.sample());
    }

    final NormalDistribution normal = new NormalDistribution();
    List<Double> scores = Ordering.natural()
            .sortedCopy(Iterables.transform(k1.elementSet(), new Function<String, Double>() {
                public Double apply(String s) {
                    return normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s),
                            50000 - k1.count(s), k2.count(s), 50000 - k2.count(s)));
                }
            }));
    int n = scores.size();
    //        System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1));
    int i = 0;
    for (Double score : scores) {
        if (i % 10 == 0) {
            System.out.printf("%.6f\t%.6f\n", (double) i / n, score);
        }

        i++;
    }
}

From source file:com.mapr.synth.samplers.RandomWalkSamplerTest.java

@Test
public void testBasics() throws IOException {
    // this sampler has four variables
    // g1 is gamma distributed with alpha = 0.2, beta = 0.2
    // v1 is unit normal
    // v2 is normal with mean = 0, sd = 2
    // v3 is gamma-normal with dof=2, mean = 0.
    SchemaSampler s = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read());

    TDigest tdG1 = new AVLTreeDigest(500);
    TDigest tdG2 = new AVLTreeDigest(500);
    TDigest td1 = new AVLTreeDigest(500);
    TDigest td2 = new AVLTreeDigest(500);
    TDigest td3 = new AVLTreeDigest(500);

    double x1 = 0;
    double x2 = 0;
    double x3 = 0;

    for (int i = 0; i < 1000000; i++) {
        JsonNode r = s.sample();/* w  w w .j a va2s . c  o m*/
        tdG1.add(r.get("g1").asDouble());
        tdG2.add(r.get("g2").asDouble());

        double step1 = r.get("v1").get("step").asDouble();
        td1.add(step1);
        x1 += step1;
        assertEquals(x1, r.get("v1").get("value").asDouble(), 0);
        assertEquals(x1, r.get("v1-bare").asDouble(), 0);

        double step2 = r.get("v2").get("step").asDouble();
        td2.add(step2);
        x2 += step2;
        assertEquals(x2, r.get("v2").get("value").asDouble(), 0);

        double step3 = r.get("v3").get("step").asDouble();
        td3.add(step3);
        x3 += step3;
        assertEquals(x3, r.get("v3").get("value").asDouble(), 0);
    }

    // now compare against reference distributions to test accuracy of the observed step distributions
    NormalDistribution normalDistribution = new NormalDistribution();
    GammaDistribution gd1 = new GammaDistribution(0.2, 5);
    GammaDistribution gd2 = new GammaDistribution(1, 1);
    TDistribution tDistribution = new TDistribution(2);
    for (double q : new double[] { 0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99 }) {
        double uG1 = gd1.cumulativeProbability(tdG1.quantile(q));
        assertEquals(q, uG1, (1 - q) * q * 10e-2);

        double uG2 = gd2.cumulativeProbability(tdG2.quantile(q));
        assertEquals(q, uG2, (1 - q) * q * 10e-2);

        double u1 = normalDistribution.cumulativeProbability(td1.quantile(q));
        assertEquals(q, u1, (1 - q) * q * 10e-2);

        double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2);
        assertEquals(q, u2, (1 - q) * q * 10e-2);

        double u3 = tDistribution.cumulativeProbability(td3.quantile(q));
        assertEquals(q, u3, (1 - q) * q * 10e-2);
    }
}

From source file:jasima.core.experiment.OCBAExperiment.java

protected double[] calcPCSPriosPerConfiguration() {
    final SummaryStat best = stats[currBest];
    final double bestMean = best.mean();

    double bestNormVariance = best.variance() / best.numObs();

    double[] prodTerms = new double[stats.length];
    for (int i = 0; i < stats.length; i++) {
        if (i == currBest)
            continue;

        SummaryStat vs = stats[i];/*  w  w w. j ava2  s  .c  o m*/
        prodTerms[i] = (bestMean - vs.mean()) / Math.sqrt(bestNormVariance + vs.variance() / vs.numObs());
    }

    NormalDistribution normalDist = new NormalDistribution();

    for (int i = 0; i < stats.length; i++) {
        if (i == currBest)
            continue;

        prodTerms[i] = normalDist.cumulativeProbability(prodTerms[i]);
        if (getProblemType() == ProblemType.MINIMIZE)
            prodTerms[i] = 1.0 - prodTerms[i];
    }

    return prodTerms;
}

From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java

private double gaussianNegativeProbability(double mean, double stddev) {
    NormalDistribution distribution = new NormalDistribution(mean, stddev);
    return distribution.cumulativeProbability(0.0);
}

From source file:net.demilich.metastone.game.behaviour.decicionTreeBheavior.DecisionDataBase.java

private synchronized void makeNewStat(List<Example> featureTrue, int feature, double totalGood,
        NormalDistribution norm, double numSamples, int value) {
    double numGoodTrue = (double) featureTrue.stream().filter(e -> e.classification == 2).count();

    double pValue = getZScore(numSamples, totalGood, featureTrue.size(), numGoodTrue);
    pValue = norm.cumulativeProbability(pValue);
    //if(pValue<.5){
    //    pValue = 1.0-pValue;
    // }/*w ww  . ja  v a  2  s .c o m*/
    //if(numGoodTrue/featureTrue.size() > .5){
    //   pValue = pValue+.02;
    // }
    //System.err.println("stats is " + numGoodTrue+ " size is " + featureTrue.size());
    //public FeatureStats(int feature, double pValue, double percentGood, int value, String featureName){
    stats.add(new FeatureStats(feature, pValue, numGoodTrue / featureTrue.size(), value,
            ((double) featureTrue.size()) / numSamples, featureNames[feature], this.percentGood));
}

From source file:edu.cmu.tetrad.util.StatUtils.java

public static double getZForAlpha(double alpha) {
    double low = 0.0;
    double high = 20.0;
    double mid = 5.0;
    NormalDistribution dist = new NormalDistribution(0, 1);

    while (high - low > 1e-4) {
        mid = (high + low) / 2.0;/*  ww w  . j a  v  a 2 s . c  o m*/
        double _alpha = 2.0 * (1.0 - dist.cumulativeProbability(Math.abs(mid)));

        if (_alpha > alpha) {
            low = mid;
        } else {
            high = mid;
        }
    }
    return mid;
}

From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java

@VisibleForTesting
List<DataTreeNode> modelFitAnomalyDetection(long targetEpoch, int numObservations, boolean doubleToLongBits,
        boolean raw, double percentile, int minMeasurement) {
    int measurement;
    int count = 0;
    int min = Integer.MAX_VALUE;

    if (targetEpoch < 0) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (numObservations <= 0) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (reservoir == null) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (targetEpoch < minEpoch) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (targetEpoch >= minEpoch + reservoir.length) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    } else if (numObservations > (reservoir.length - 1)) {
        return makeDefaultNodes(raw, targetEpoch, numObservations);
    }/*from www  .  j  a v a 2 s. c  o  m*/

    /**
     * Fitting to a geometric distribution uses the mean value of the sample.
     *
     * Fitting to a normal distribution uses the Apache Commons Math implementation.
     */
    double mean = 0.0;
    double m2 = 0.0;
    double stddev;
    double gaussianNegative = -1.0;
    Map<Integer, Integer> frequencies = new HashMap<>();
    double threshold;
    double measurePercentile = -100.0;

    int index = reservoir.length - 1;
    long currentEpoch = minEpoch + index;

    while (currentEpoch != targetEpoch) {
        index--;
        currentEpoch--;
    }

    measurement = reservoir[index--];
    currentEpoch--;

    while (count < numObservations && index >= 0) {
        int value = reservoir[index--];
        if (value < min) {
            min = value;
        }
        updateFrequencies(frequencies, value);
        count++;
        double delta = value - mean;
        mean += delta / count;
        m2 += delta * (value - mean);
    }

    while (count < numObservations) {
        int value = 0;
        if (value < min) {
            min = value;
        }
        updateFrequencies(frequencies, value);
        count++;
        double delta = value - mean;
        mean += delta / count;
        m2 += delta * (value - mean);
    }

    if (count < 2) {
        stddev = 0.0;
    } else {
        stddev = Math.sqrt(m2 / count);
    }

    int mode = -1;
    int modeCount = -1;

    for (Map.Entry<Integer, Integer> entry : frequencies.entrySet()) {
        int key = entry.getKey();
        int value = entry.getValue();
        if (value > modeCount || (value == modeCount && key > mode)) {
            mode = key;
            modeCount = value;
        }
    }

    if (mean > 0.0 && stddev > 0.0) {
        gaussianNegative = gaussianNegativeProbability(mean, stddev);
    }

    if (mean == 0.0) {
        threshold = 0.0;
    } else if (stddev == 0.0) {
        threshold = mean;
    } else if (mean > 1.0) {
        NormalDistribution distribution = new NormalDistribution(mean, stddev);
        double badProbability = distribution.cumulativeProbability(0.0);
        double goodProbability = badProbability + (1.0 - badProbability) * (percentile / 100.0);
        threshold = distribution.inverseCumulativeProbability(goodProbability);
        measurePercentile = distribution.probability(0.0, measurement) / (1.0 - badProbability) * 100.0;
    } else {
        double p = 1.0 / (1.0 + mean);
        GeometricDistribution distribution = new GeometricDistribution(p);
        threshold = distribution.inverseCumulativeProbability(percentile / 100.0);
        measurePercentile = distribution.cumulativeProbability(measurement) * 100.0;
    }

    List<DataTreeNode> result = new ArrayList<>();
    VirtualTreeNode vchild, vparent;

    if (measurement >= minMeasurement && (measurement > threshold || percentile == 0.0)) {
        vchild = new VirtualTreeNode("gaussianNegative", doubleToLong(gaussianNegative, doubleToLongBits));
        vparent = new VirtualTreeNode("percentile", doubleToLong(measurePercentile, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("mode", mode, generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("stddev", doubleToLong(stddev, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("mean", doubleToLong(mean, doubleToLongBits),
                generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("measurement", measurement, generateSingletonArray(vchild));
        vchild = vparent;
        vparent = new VirtualTreeNode("delta", doubleToLong(measurement - threshold, doubleToLongBits),
                generateSingletonArray(vchild));
        result.add(vparent);
        if (raw) {
            addRawObservations(result, targetEpoch, numObservations);
        }
    } else {
        makeDefaultNodes(raw, targetEpoch, numObservations);
    }
    return result;
}

From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java

public boolean isSimilarProportion(double[] valuesA, double[] valuesB) {
    double alpha = 0.05;

    // Change data a bit for avoiding issues with booleans 0/1
    /*for (int i=0; i<valuesA.length; i++)
    {/*from  w w w . jav  a  2  s.c  om*/
       valuesA[i] = valuesA[i] + 1.0;
    }
    for (int i=0; i<valuesB.length; i++)
    {
       valuesB[i] = valuesB[i] + 1.0;
    }*/

    // Calculate region of acceptance
    NormalDistribution myNormal = new NormalDistribution(0, 1);
    double myZLeft = -1 * Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));
    double myZRight = Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));

    logger.debug("Boundaries: " + myZLeft + " to " + myZRight);

    // Calculate proportion for valuesA dataset
    int nA = valuesA.length;
    double successA = 0;
    for (int i = 0; i < nA; i++) {
        successA = successA + valuesA[i];
    }

    logger.debug("Success number for dataset A: " + successA);
    logger.debug("Number of records for A: " + nA);

    double pA = successA / nA;

    // Calculate proportion for valuesB dataset
    int nB = valuesB.length;
    double successB = 0;
    for (int i = 0; i < nB; i++) {
        successB = successB + valuesB[i];
    }

    logger.debug("Success number for dataset B: " + successB);
    logger.debug("Number of records for B: " + nB);

    double pB = successB / nB;

    // Calculate proportion similarity
    double pPool = (nA * pA + nB * pB) / (nA + nB);
    double zComp = (pA - pB) / Math.sqrt(pPool * (1.0 - pPool) * (1.0 / nA + 1.0 / nB));

    logger.debug("pPooled = " + pPool);
    logger.debug("Z value = " + zComp);
    logger.debug("p-value = " + (1.0 - myNormal.cumulativeProbability(zComp)) * 2);

    // Determine if z score is in the region of acceptance
    if ((myZLeft <= zComp) && (zComp <= myZRight)) {
        return true;
    }

    return false;
}

From source file:org.apache.solr.client.solrj.io.stream.eval.CumulativeProbabilityEvaluatorTest.java

@Test
public void test() throws IOException {
    values.clear();/*w w  w  .  j  a  v a 2s. c  o m*/
    values.put("l1", 3);
    values.put("l2", 7);

    NormalDistribution actual = new NormalDistribution(3, 7);
    Assert.assertEquals(actual.cumulativeProbability(2),
            factory.constructEvaluator("prob(norm(l1,l2),2)").evaluate(new Tuple(values)));
}