List of usage examples for org.apache.commons.math3.distribution NormalDistribution cumulativeProbability
public double cumulativeProbability(double x)
From source file:com.mapr.synth.samplers.VectorSamplerTest.java
private boolean isNormal(double[] vx, double mean, double sd) { Arrays.sort(vx);/* ww w .j a v a 2s . c om*/ NormalDistribution n = new NormalDistribution(mean, sd); double diff = 0; for (int i = 0; i < vx.length; i++) { double q = (double) i / (vx.length - 1); diff = Math.max(diff, Math.abs(q - n.cumulativeProbability(vx[i]))); } return diff < 5.0 / Math.sqrt(vx.length); }
From source file:com.mapr.synth.TermGeneratorTest.java
@Test public void distinctVocabularies() { TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8); final Multiset<String> k1 = HashMultiset.create(); for (int i = 0; i < 50000; i++) { k1.add(x1.sample());//w w w. j a v a2 s .c o m } TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8); final Multiset<String> k2 = HashMultiset.create(); for (int i = 0; i < 50000; i++) { k2.add(x2.sample()); } final NormalDistribution normal = new NormalDistribution(); List<Double> scores = Ordering.natural() .sortedCopy(Iterables.transform(k1.elementSet(), new Function<String, Double>() { public Double apply(String s) { return normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s), 50000 - k1.count(s), k2.count(s), 50000 - k2.count(s))); } })); int n = scores.size(); // System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1)); int i = 0; for (Double score : scores) { if (i % 10 == 0) { System.out.printf("%.6f\t%.6f\n", (double) i / n, score); } i++; } }
From source file:com.mapr.synth.samplers.RandomWalkSamplerTest.java
@Test public void testBasics() throws IOException { // this sampler has four variables // g1 is gamma distributed with alpha = 0.2, beta = 0.2 // v1 is unit normal // v2 is normal with mean = 0, sd = 2 // v3 is gamma-normal with dof=2, mean = 0. SchemaSampler s = new SchemaSampler( Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read()); TDigest tdG1 = new AVLTreeDigest(500); TDigest tdG2 = new AVLTreeDigest(500); TDigest td1 = new AVLTreeDigest(500); TDigest td2 = new AVLTreeDigest(500); TDigest td3 = new AVLTreeDigest(500); double x1 = 0; double x2 = 0; double x3 = 0; for (int i = 0; i < 1000000; i++) { JsonNode r = s.sample();/* w w w .j a va2s . c o m*/ tdG1.add(r.get("g1").asDouble()); tdG2.add(r.get("g2").asDouble()); double step1 = r.get("v1").get("step").asDouble(); td1.add(step1); x1 += step1; assertEquals(x1, r.get("v1").get("value").asDouble(), 0); assertEquals(x1, r.get("v1-bare").asDouble(), 0); double step2 = r.get("v2").get("step").asDouble(); td2.add(step2); x2 += step2; assertEquals(x2, r.get("v2").get("value").asDouble(), 0); double step3 = r.get("v3").get("step").asDouble(); td3.add(step3); x3 += step3; assertEquals(x3, r.get("v3").get("value").asDouble(), 0); } // now compare against reference distributions to test accuracy of the observed step distributions NormalDistribution normalDistribution = new NormalDistribution(); GammaDistribution gd1 = new GammaDistribution(0.2, 5); GammaDistribution gd2 = new GammaDistribution(1, 1); TDistribution tDistribution = new TDistribution(2); for (double q : new double[] { 0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99 }) { double uG1 = gd1.cumulativeProbability(tdG1.quantile(q)); assertEquals(q, uG1, (1 - q) * q * 10e-2); double uG2 = gd2.cumulativeProbability(tdG2.quantile(q)); assertEquals(q, uG2, (1 - q) * q * 10e-2); double u1 = normalDistribution.cumulativeProbability(td1.quantile(q)); assertEquals(q, u1, (1 - q) * q * 10e-2); double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2); assertEquals(q, u2, (1 - q) * q * 10e-2); double u3 = tDistribution.cumulativeProbability(td3.quantile(q)); assertEquals(q, u3, (1 - q) * q * 10e-2); } }
From source file:jasima.core.experiment.OCBAExperiment.java
protected double[] calcPCSPriosPerConfiguration() { final SummaryStat best = stats[currBest]; final double bestMean = best.mean(); double bestNormVariance = best.variance() / best.numObs(); double[] prodTerms = new double[stats.length]; for (int i = 0; i < stats.length; i++) { if (i == currBest) continue; SummaryStat vs = stats[i];/* w w w. j ava2 s .c o m*/ prodTerms[i] = (bestMean - vs.mean()) / Math.sqrt(bestNormVariance + vs.variance() / vs.numObs()); } NormalDistribution normalDist = new NormalDistribution(); for (int i = 0; i < stats.length; i++) { if (i == currBest) continue; prodTerms[i] = normalDist.cumulativeProbability(prodTerms[i]); if (getProblemType() == ProblemType.MINIMIZE) prodTerms[i] = 1.0 - prodTerms[i]; } return prodTerms; }
From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java
private double gaussianNegativeProbability(double mean, double stddev) { NormalDistribution distribution = new NormalDistribution(mean, stddev); return distribution.cumulativeProbability(0.0); }
From source file:net.demilich.metastone.game.behaviour.decicionTreeBheavior.DecisionDataBase.java
private synchronized void makeNewStat(List<Example> featureTrue, int feature, double totalGood, NormalDistribution norm, double numSamples, int value) { double numGoodTrue = (double) featureTrue.stream().filter(e -> e.classification == 2).count(); double pValue = getZScore(numSamples, totalGood, featureTrue.size(), numGoodTrue); pValue = norm.cumulativeProbability(pValue); //if(pValue<.5){ // pValue = 1.0-pValue; // }/*w ww . ja v a 2 s .c o m*/ //if(numGoodTrue/featureTrue.size() > .5){ // pValue = pValue+.02; // } //System.err.println("stats is " + numGoodTrue+ " size is " + featureTrue.size()); //public FeatureStats(int feature, double pValue, double percentGood, int value, String featureName){ stats.add(new FeatureStats(feature, pValue, numGoodTrue / featureTrue.size(), value, ((double) featureTrue.size()) / numSamples, featureNames[feature], this.percentGood)); }
From source file:edu.cmu.tetrad.util.StatUtils.java
public static double getZForAlpha(double alpha) { double low = 0.0; double high = 20.0; double mid = 5.0; NormalDistribution dist = new NormalDistribution(0, 1); while (high - low > 1e-4) { mid = (high + low) / 2.0;/* ww w . j a v a 2 s . c o m*/ double _alpha = 2.0 * (1.0 - dist.cumulativeProbability(Math.abs(mid))); if (_alpha > alpha) { low = mid; } else { high = mid; } } return mid; }
From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java
@VisibleForTesting List<DataTreeNode> modelFitAnomalyDetection(long targetEpoch, int numObservations, boolean doubleToLongBits, boolean raw, double percentile, int minMeasurement) { int measurement; int count = 0; int min = Integer.MAX_VALUE; if (targetEpoch < 0) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (numObservations <= 0) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (reservoir == null) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (targetEpoch < minEpoch) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (targetEpoch >= minEpoch + reservoir.length) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (numObservations > (reservoir.length - 1)) { return makeDefaultNodes(raw, targetEpoch, numObservations); }/*from www . j a v a 2 s. c o m*/ /** * Fitting to a geometric distribution uses the mean value of the sample. * * Fitting to a normal distribution uses the Apache Commons Math implementation. */ double mean = 0.0; double m2 = 0.0; double stddev; double gaussianNegative = -1.0; Map<Integer, Integer> frequencies = new HashMap<>(); double threshold; double measurePercentile = -100.0; int index = reservoir.length - 1; long currentEpoch = minEpoch + index; while (currentEpoch != targetEpoch) { index--; currentEpoch--; } measurement = reservoir[index--]; currentEpoch--; while (count < numObservations && index >= 0) { int value = reservoir[index--]; if (value < min) { min = value; } updateFrequencies(frequencies, value); count++; double delta = value - mean; mean += delta / count; m2 += delta * (value - mean); } while (count < numObservations) { int value = 0; if (value < min) { min = value; } updateFrequencies(frequencies, value); count++; double delta = value - mean; mean += delta / count; m2 += delta * (value - mean); } if (count < 2) { stddev = 0.0; } else { stddev = Math.sqrt(m2 / count); } int mode = -1; int modeCount = -1; for (Map.Entry<Integer, Integer> entry : frequencies.entrySet()) { int key = entry.getKey(); int value = entry.getValue(); if (value > modeCount || (value == modeCount && key > mode)) { mode = key; modeCount = value; } } if (mean > 0.0 && stddev > 0.0) { gaussianNegative = gaussianNegativeProbability(mean, stddev); } if (mean == 0.0) { threshold = 0.0; } else if (stddev == 0.0) { threshold = mean; } else if (mean > 1.0) { NormalDistribution distribution = new NormalDistribution(mean, stddev); double badProbability = distribution.cumulativeProbability(0.0); double goodProbability = badProbability + (1.0 - badProbability) * (percentile / 100.0); threshold = distribution.inverseCumulativeProbability(goodProbability); measurePercentile = distribution.probability(0.0, measurement) / (1.0 - badProbability) * 100.0; } else { double p = 1.0 / (1.0 + mean); GeometricDistribution distribution = new GeometricDistribution(p); threshold = distribution.inverseCumulativeProbability(percentile / 100.0); measurePercentile = distribution.cumulativeProbability(measurement) * 100.0; } List<DataTreeNode> result = new ArrayList<>(); VirtualTreeNode vchild, vparent; if (measurement >= minMeasurement && (measurement > threshold || percentile == 0.0)) { vchild = new VirtualTreeNode("gaussianNegative", doubleToLong(gaussianNegative, doubleToLongBits)); vparent = new VirtualTreeNode("percentile", doubleToLong(measurePercentile, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("mode", mode, generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("stddev", doubleToLong(stddev, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("mean", doubleToLong(mean, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("measurement", measurement, generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("delta", doubleToLong(measurement - threshold, doubleToLongBits), generateSingletonArray(vchild)); result.add(vparent); if (raw) { addRawObservations(result, targetEpoch, numObservations); } } else { makeDefaultNodes(raw, targetEpoch, numObservations); } return result; }
From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java
public boolean isSimilarProportion(double[] valuesA, double[] valuesB) { double alpha = 0.05; // Change data a bit for avoiding issues with booleans 0/1 /*for (int i=0; i<valuesA.length; i++) {/*from w w w . jav a 2 s.c om*/ valuesA[i] = valuesA[i] + 1.0; } for (int i=0; i<valuesB.length; i++) { valuesB[i] = valuesB[i] + 1.0; }*/ // Calculate region of acceptance NormalDistribution myNormal = new NormalDistribution(0, 1); double myZLeft = -1 * Math.abs(myNormal.inverseCumulativeProbability(alpha / 2)); double myZRight = Math.abs(myNormal.inverseCumulativeProbability(alpha / 2)); logger.debug("Boundaries: " + myZLeft + " to " + myZRight); // Calculate proportion for valuesA dataset int nA = valuesA.length; double successA = 0; for (int i = 0; i < nA; i++) { successA = successA + valuesA[i]; } logger.debug("Success number for dataset A: " + successA); logger.debug("Number of records for A: " + nA); double pA = successA / nA; // Calculate proportion for valuesB dataset int nB = valuesB.length; double successB = 0; for (int i = 0; i < nB; i++) { successB = successB + valuesB[i]; } logger.debug("Success number for dataset B: " + successB); logger.debug("Number of records for B: " + nB); double pB = successB / nB; // Calculate proportion similarity double pPool = (nA * pA + nB * pB) / (nA + nB); double zComp = (pA - pB) / Math.sqrt(pPool * (1.0 - pPool) * (1.0 / nA + 1.0 / nB)); logger.debug("pPooled = " + pPool); logger.debug("Z value = " + zComp); logger.debug("p-value = " + (1.0 - myNormal.cumulativeProbability(zComp)) * 2); // Determine if z score is in the region of acceptance if ((myZLeft <= zComp) && (zComp <= myZRight)) { return true; } return false; }
From source file:org.apache.solr.client.solrj.io.stream.eval.CumulativeProbabilityEvaluatorTest.java
@Test public void test() throws IOException { values.clear();/*w w w . j a v a 2s. c o m*/ values.put("l1", 3); values.put("l2", 7); NormalDistribution actual = new NormalDistribution(3, 7); Assert.assertEquals(actual.cumulativeProbability(2), factory.constructEvaluator("prob(norm(l1,l2),2)").evaluate(new Tuple(values))); }