List of usage examples for org.apache.commons.math3.distribution NormalDistribution NormalDistribution
public NormalDistribution(double mean, double sd) throws NotStrictlyPositiveException
From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java
private double gaussianNegativeProbability(double mean, double stddev) { NormalDistribution distribution = new NormalDistribution(mean, stddev); return distribution.cumulativeProbability(0.0); }
From source file:edu.jhuapl.bsp.detector.OpenMath.java
public static double normcdf(double stat, double m, double s) { double result = 0; NormalDistribution normdist = new NormalDistribution(m, s); result = normdist.cumulativeProbability(stat); return result; }
From source file:com.addthis.hydra.data.tree.prop.DataReservoir.java
@VisibleForTesting List<DataTreeNode> modelFitAnomalyDetection(long targetEpoch, int numObservations, boolean doubleToLongBits, boolean raw, double percentile, int minMeasurement) { int measurement; int count = 0; int min = Integer.MAX_VALUE; if (targetEpoch < 0) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (numObservations <= 0) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (reservoir == null) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (targetEpoch < minEpoch) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (targetEpoch >= minEpoch + reservoir.length) { return makeDefaultNodes(raw, targetEpoch, numObservations); } else if (numObservations > (reservoir.length - 1)) { return makeDefaultNodes(raw, targetEpoch, numObservations); }//from ww w. ja v a 2 s .c o m /** * Fitting to a geometric distribution uses the mean value of the sample. * * Fitting to a normal distribution uses the Apache Commons Math implementation. */ double mean = 0.0; double m2 = 0.0; double stddev; double gaussianNegative = -1.0; Map<Integer, Integer> frequencies = new HashMap<>(); double threshold; double measurePercentile = -100.0; int index = reservoir.length - 1; long currentEpoch = minEpoch + index; while (currentEpoch != targetEpoch) { index--; currentEpoch--; } measurement = reservoir[index--]; currentEpoch--; while (count < numObservations && index >= 0) { int value = reservoir[index--]; if (value < min) { min = value; } updateFrequencies(frequencies, value); count++; double delta = value - mean; mean += delta / count; m2 += delta * (value - mean); } while (count < numObservations) { int value = 0; if (value < min) { min = value; } updateFrequencies(frequencies, value); count++; double delta = value - mean; mean += delta / count; m2 += delta * (value - mean); } if (count < 2) { stddev = 0.0; } else { stddev = Math.sqrt(m2 / count); } int mode = -1; int modeCount = -1; for (Map.Entry<Integer, Integer> entry : frequencies.entrySet()) { int key = entry.getKey(); int value = entry.getValue(); if (value > modeCount || (value == modeCount && key > mode)) { mode = key; modeCount = value; } } if (mean > 0.0 && stddev > 0.0) { gaussianNegative = gaussianNegativeProbability(mean, stddev); } if (mean == 0.0) { threshold = 0.0; } else if (stddev == 0.0) { threshold = mean; } else if (mean > 1.0) { NormalDistribution distribution = new NormalDistribution(mean, stddev); double badProbability = distribution.cumulativeProbability(0.0); double goodProbability = badProbability + (1.0 - badProbability) * (percentile / 100.0); threshold = distribution.inverseCumulativeProbability(goodProbability); measurePercentile = distribution.probability(0.0, measurement) / (1.0 - badProbability) * 100.0; } else { double p = 1.0 / (1.0 + mean); GeometricDistribution distribution = new GeometricDistribution(p); threshold = distribution.inverseCumulativeProbability(percentile / 100.0); measurePercentile = distribution.cumulativeProbability(measurement) * 100.0; } List<DataTreeNode> result = new ArrayList<>(); VirtualTreeNode vchild, vparent; if (measurement >= minMeasurement && (measurement > threshold || percentile == 0.0)) { vchild = new VirtualTreeNode("gaussianNegative", doubleToLong(gaussianNegative, doubleToLongBits)); vparent = new VirtualTreeNode("percentile", doubleToLong(measurePercentile, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("mode", mode, generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("stddev", doubleToLong(stddev, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("mean", doubleToLong(mean, doubleToLongBits), generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("measurement", measurement, generateSingletonArray(vchild)); vchild = vparent; vparent = new VirtualTreeNode("delta", doubleToLong(measurement - threshold, doubleToLongBits), generateSingletonArray(vchild)); result.add(vparent); if (raw) { addRawObservations(result, targetEpoch, numObservations); } } else { makeDefaultNodes(raw, targetEpoch, numObservations); } return result; }
From source file:jeplus.JEPlusProject.java
private String[] defaultLHSdistributionSample(int n, String funcstr, int type, Random randomsrc) { // Trim off brackets int start = funcstr.indexOf("(") + 1; int end = funcstr.indexOf(")"); funcstr = funcstr.substring(start, end).trim(); ArrayList<String> list = new ArrayList<>(); String[] params = funcstr.split("\\s*,\\s*"); // For integer/double types, returns randomized N samples conforming // a specified distribution, currently 'gaussian'/'normal'/'n', // 'uniform'/'u', 'triangular'/'tr', or 'discrete'/'d' // for examples: @sample(gaussian, 0, 1.5, 20), with mean, sd and N // or @sample(uniform, -10, 10, 20), with lb, ub and N // of @sample(triangular, -1.0, 0.3, 1.0, 20), with lb, mode, ub and N // of @sample(discrete, option_A, 0.3, option_B, 0.5, option_C, 0.2, 20), with lb, mode, ub and N String distribution = params[0].toLowerCase(); switch (distribution) { case "uniform": case "u": // requires lb, ub, n double lb = Double.parseDouble(params[1]); double ub = Double.parseDouble(params[2]); for (int i = 0; i < n; i++) { if (type == ParameterItem.DOUBLE) { double bin = (ub - lb) / n; double v = randomsrc.nextDouble() * bin + lb + i * bin; list.add(Double.toString(v)); } else if (type == ParameterItem.INTEGER) { double bin = (ub + 1. - lb) / n; double v = randomsrc.nextDouble() * bin + lb + i * bin; list.add(Integer.toString((int) Math.floor(v))); }//from w w w . ja v a 2s . co m } break; case "gaussian": case "normal": case "n": { // requires mean, sd, n double mean = Double.parseDouble(params[1]); double sd = Double.parseDouble(params[2]); NormalDistribution Dist = new NormalDistribution(mean, sd); double bin = 1.0 / n; for (int i = 0; i < n; i++) { double a = Dist.inverseCumulativeProbability((i == 0) ? bin / 10 : i * bin); // lb of each bin double b = Dist.inverseCumulativeProbability((i == n - 1) ? 1. - bin / n : (i + 1) * bin); // ub of each bin double v = randomsrc.nextDouble() * (b - a) + a; if (type == ParameterItem.DOUBLE) { list.add(Double.toString(v)); } else if (type == ParameterItem.INTEGER) { // Warning: for integer, binomial distribution should be used. // the following function is provided just for convenience list.add(Long.toString(Math.round(v))); } } break; } case "lognormal": case "ln": { // requires mean, sd, n double mean = Double.parseDouble(params[1]); double sd = Double.parseDouble(params[2]); LogNormalDistribution Dist = new LogNormalDistribution(mean, sd); double bin = 1.0 / n; for (int i = 0; i < n; i++) { double a = Dist.inverseCumulativeProbability((i == 0) ? bin / 10 : i * bin); // lb of each bin double b = Dist.inverseCumulativeProbability((i == n - 1) ? 1. - bin / n : (i + 1) * bin); // ub of each bin double v = randomsrc.nextDouble() * (b - a) + a; if (type == ParameterItem.DOUBLE) { list.add(Double.toString(v)); } else if (type == ParameterItem.INTEGER) { // Warning: for integer, binomial distribution should be used. // the following function is provided just for convenience list.add(Long.toString(Math.round(v))); } } break; } case "exponential": case "e": { // requires mean, sd, n double mean = Double.parseDouble(params[1]); ExponentialDistribution Dist = new ExponentialDistribution(mean); double bin = 1.0 / n; for (int i = 0; i < n; i++) { double a = Dist.inverseCumulativeProbability((i == 0) ? bin / 10 : i * bin); // lb of each bin double b = Dist.inverseCumulativeProbability((i == n - 1) ? 1. - bin / n : (i + 1) * bin); // ub of each bin double v = randomsrc.nextDouble() * (b - a) + a; if (type == ParameterItem.DOUBLE) { list.add(Double.toString(v)); } else if (type == ParameterItem.INTEGER) { // Warning: for integer, binomial distribution should be used. // the following function is provided just for convenience list.add(Long.toString(Math.round(v))); } } break; } case "triangular": case "tr": { // requires a(lb), c(mode), b(ub), n double a = Double.parseDouble(params[1]); double c = Double.parseDouble(params[2]); double b = Double.parseDouble(params[3]); TriangularDistribution Dist = new TriangularDistribution(a, c, b); double bin = 1.0 / n; for (int i = 0; i < n; i++) { a = Dist.inverseCumulativeProbability(i * bin); // lb of each bin b = Dist.inverseCumulativeProbability((i + 1) * bin); // ub of each bin double v = randomsrc.nextDouble() * (b - a) + a; if (type == ParameterItem.DOUBLE) { list.add(Double.toString(v)); } else if (type == ParameterItem.INTEGER) { // Warning: for integer, user defined discrete distribution should be used. // the following function is provided just for convenience list.add(Long.toString(Math.round(v))); } } break; } case "discrete": case "d": { // requires op1, prob1, op2, prob2, ..., n int nOptions = params.length / 2 - 1; String[] options = new String[nOptions]; double[] probabilities = new double[nOptions]; double sum = 0; for (int i = 0; i < nOptions; i++) { options[i] = params[2 * i + 1]; try { probabilities[i] = Double.parseDouble(params[2 * i + 2]); } catch (NumberFormatException nfe) { probabilities[i] = 0.1; } sum += probabilities[i]; } RouletteWheel Wheel = new RouletteWheel(probabilities, randomsrc); double bin = sum / n; for (int i = 0; i < n; i++) { double a = i * bin; // lb of each bin double b = (i + 1) * bin; // ub of each bin int sel = Wheel.spin(a, b); list.add(options[sel]); } break; } case "custom": break; } return list.toArray(new String[0]); }
From source file:edu.cmu.tetrad.util.StatUtils.java
public static double getZForAlpha(double alpha) { double low = 0.0; double high = 20.0; double mid = 5.0; NormalDistribution dist = new NormalDistribution(0, 1); while (high - low > 1e-4) { mid = (high + low) / 2.0;//w w w. j a v a 2s . c o m double _alpha = 2.0 * (1.0 - dist.cumulativeProbability(Math.abs(mid))); if (_alpha > alpha) { low = mid; } else { high = mid; } } return mid; }
From source file:org.apache.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java
private void initDistribution() { BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType(); ValueType type = schema.getType();/*from www. j a v a 2 s. c om*/ List<Object> enumeratedValues = schema.getEnumeratedValues(); List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities(); List<Pair<Object, Double>> probabilities = new ArrayList<>(); switch (distributionType) { case SEQUENTIAL: // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues()); break; case UNIFORM: distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble()); break; case DISCRETE_UNIFORM: if (enumeratedValues == null) { enumeratedValues = new ArrayList<>(); for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) { Object val = convertType(i, type); enumeratedValues.add(val); } } // give them all equal probability, the library will normalize probabilities to sum to 1.0 for (Object enumeratedValue : enumeratedValues) { probabilities.add(new Pair<>(enumeratedValue, 0.1)); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case NORMAL: distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); break; case ROUNDED_NORMAL: NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); distribution = new RealRoundingDistribution(normalDist); break; case ZIPF: int cardinality; if (enumeratedValues == null) { Integer startInt = schema.getStartInt(); cardinality = schema.getEndInt() - startInt; ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i))); } } else { cardinality = enumeratedValues.size(); ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i))); } } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case ENUMERATED: for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i))); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; default: throw new UnsupportedOperationException("Unknown distribution type: " + distributionType); } if (distribution instanceof AbstractIntegerDistribution) { ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof AbstractRealDistribution) { ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed); } else { ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed); } }
From source file:org.apache.solr.client.solrj.io.eval.NormalDistributionEvaluator.java
@Override public Object doWork(Object first, Object second) throws IOException { if (null == first) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory))); }/*from ww w .j av a 2 s . c o m*/ if (null == second) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory))); } Number mean = (Number) first; Number standardDeviation = (Number) second; return new NormalDistribution(mean.doubleValue(), standardDeviation.doubleValue()); }
From source file:org.apache.solr.client.solrj.io.stream.eval.CumulativeProbabilityEvaluatorTest.java
@Test public void test() throws IOException { values.clear();//from w ww . jav a 2 s . c o m values.put("l1", 3); values.put("l2", 7); NormalDistribution actual = new NormalDistribution(3, 7); Assert.assertEquals(actual.cumulativeProbability(2), factory.constructEvaluator("prob(norm(l1,l2),2)").evaluate(new Tuple(values))); }
From source file:org.apache.solr.client.solrj.io.stream.eval.NormalDistributionEvaluatorTest.java
@Test public void test() throws IOException { values.clear();//from w w w .j a v a2 s . co m values.put("l1", 3); values.put("l2", 7); NormalDistribution dist = new NormalDistribution(3, 7); Assert.assertEquals(dist.getNumericalMean(), ((NormalDistribution) factory.constructEvaluator("norm(l1,l2)").evaluate(new Tuple(values))) .getNumericalMean()); }
From source file:org.apache.solr.client.solrj.io.stream.StreamExpressionTest.java
@Test public void fakeTest() { NormalDistribution a = new NormalDistribution(10, 2); NormalDistribution c = new NormalDistribution(100, 6); double[] d = c.sample(250); KolmogorovSmirnovTest ks = new KolmogorovSmirnovTest(); double pv = ks.kolmogorovSmirnovStatistic(a, d); String s = ""; }