List of usage examples for org.apache.commons.math3.distribution NormalDistribution NormalDistribution
public NormalDistribution(double mean, double sd) throws NotStrictlyPositiveException
From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java
public boolean calculateRunsTest(double[] values) { double alpha = 0.05; double n1 = 0.0; double n2 = 0.0; double runs = 1.0; double median = StatUtils.percentile(values, 50); boolean positive = true; //Starting variable for calculating runs (positive or negative) if (values[0] < median) { positive = false;/* ww w .j a v a2 s. c om*/ n2++; } else { positive = true; n1++; } // Look for runs and count positive/negative values for (int i = 1; i < values.length; i++) { if (values[i] < median) { n2++; if (positive) { runs++; positive = false; } } else { n1++; if (!positive) { runs++; positive = true; } } } // Calculate Z value double expectedRuns = (2.0 * n1 * n2 / (n1 + n2)) + 1.0; double sR = Math .sqrt((2.0 * n1 * n2 * (2.0 * n1 * n2 - n1 - n2)) / (Math.pow((n1 + n2), 2) * (n1 + n2 - 1.0))); double Z = (runs - expectedRuns) / sR; logger.debug("Runs = " + runs); logger.debug("Positive values = " + n1); logger.debug("Negative values = " + n2); logger.debug("Expected Runs = " + expectedRuns); logger.debug("sR = " + sR); logger.debug("Z score = " + Z); if ((runs - expectedRuns) == 0.0) { //H1 -> Data was not produced in a random manner (because expected runs are ok) logger.debug("Runs = Expected Runs --> Not random data"); return false; } // Calculate region of acceptance NormalDistribution myNormal = new NormalDistribution(0, 1); double myZRight = Math.abs(myNormal.inverseCumulativeProbability(1 - alpha / 2)); logger.debug("Reject H0 if |Z|> " + myZRight); if (Math.abs(Z) > myZRight) { //H1 -> Data was not produced in a random manner return false; } //H0 -> Data was produced in a random manner return true; }
From source file:mtsar.processors.answer.KOSAggregator.java
private Map<Integer, Double> converge(Table<Integer, Integer, Short> graph, int kMax) { final RealDistribution distribution = new NormalDistribution(1, 1); Table<Integer, Integer, Double> ys = HashBasedTable.create(graph.rowKeySet().size(), graph.columnKeySet().size()); for (final Table.Cell<Integer, Integer, Short> cell : graph.cellSet()) { ys.put(cell.getRowKey(), cell.getColumnKey(), distribution.sample()); }//from w w w . j a v a 2 s . c om for (int k = 1; k <= kMax; k++) { final Table<Integer, Integer, Double> xs = tasksUpdate(graph, ys); if (k < kMax) ys = workersUpdate(graph, xs); } final Map<Integer, Double> estimations = new HashMap<>(); for (final Integer taskId : graph.rowKeySet()) { double sumProduct = 0.0; final Map<Integer, Double> workers = ys.row(taskId); for (final Map.Entry<Integer, Double> worker : workers.entrySet()) { sumProduct += graph.get(taskId, worker.getKey()) * worker.getValue(); } estimations.put(taskId, sumProduct); } return estimations; }
From source file:com.mapr.synth.samplers.VectorSamplerTest.java
private boolean isNormal(double[] vx, double mean, double sd) { Arrays.sort(vx);/*from w ww . j ava 2 s .c om*/ NormalDistribution n = new NormalDistribution(mean, sd); double diff = 0; for (int i = 0; i < vx.length; i++) { double q = (double) i / (vx.length - 1); diff = Math.max(diff, Math.abs(q - n.cumulativeProbability(vx[i]))); } return diff < 5.0 / Math.sqrt(vx.length); }
From source file:gedi.util.math.stat.distributions.NormalMixtureDistribution.java
public static NormalMixtureDistribution fit(NormalMixtureDistribution initialMixture, double[] data, final int maxIterations, final double threshold) { if (maxIterations < 1) { throw new NotStrictlyPositiveException(maxIterations); }/*from w ww .j a va 2s. c o m*/ if (threshold < Double.MIN_VALUE) { throw new NotStrictlyPositiveException(threshold); } final int n = data.length; final int k = initialMixture.getNumComponents(); if (k == 1) return new NormalMixtureDistribution(new NormalDistribution[] { new NormalDistribution(new Mean().evaluate(data), new StandardDeviation().evaluate(data)) }, new double[] { 1 }); int numIterations = 0; double previousLogLikelihood = 0d; double logLikelihood = Double.NEGATIVE_INFINITY; // Initialize model to fit to initial mixture. NormalMixtureDistribution fittedModel = new NormalMixtureDistribution(initialMixture.components, initialMixture.mixing); while (numIterations++ <= maxIterations && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { previousLogLikelihood = logLikelihood; logLikelihood = 0d; // E-step: compute the data dependent parameters of the expectation // function. // The percentage of row's total density between a row and a // component final double[][] gamma = new double[n][k]; // Sum of gamma for each component final double[] gammaSums = new double[k]; for (int i = 0; i < n; i++) { final double rowDensity = fittedModel.density(data[i]); logLikelihood += FastMath.log(rowDensity); for (int j = 0; j < k; j++) { gamma[i][j] = fittedModel.mixing[j] * fittedModel.components[j].density(data[i]) / rowDensity; gammaSums[j] += gamma[i][j]; } } logLikelihood /= n; // System.out.println(logLikelihood); // M-step: compute the new parameters based on the expectation // function. final double[] newWeights = gammaSums.clone(); ArrayUtils.mult(newWeights, 1.0 / n); NormalDistribution[] comp = new NormalDistribution[k]; for (int j = 0; j < k; j++) { double m = 0; for (int i = 0; i < n; i++) { m += gamma[i][j] * data[i]; } m /= gammaSums[j]; double var = 0; for (int i = 0; i < n; i++) { double d = m - data[i]; var += gamma[i][j] * d * d; } var /= gammaSums[j]; comp[j] = new NormalDistribution(m, Math.sqrt(var)); } // Update current model fittedModel = new NormalMixtureDistribution(comp, newWeights); } if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { // Did not converge before the maximum number of iterations throw new ConvergenceException(); } return fittedModel; }
From source file:io.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java
private void initDistribution() { BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType(); ValueType type = schema.getType();/*from w w w .j ava 2 s.com*/ List<Object> enumeratedValues = schema.getEnumeratedValues(); List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities(); List<Pair<Object, Double>> probabilities = new ArrayList<>(); switch (distributionType) { case SEQUENTIAL: // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues()); break; case UNIFORM: distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble()); break; case DISCRETE_UNIFORM: if (enumeratedValues == null) { enumeratedValues = new ArrayList<>(); for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) { Object val = convertType(i, type); enumeratedValues.add(val); } } // give them all equal probability, the library will normalize probabilities to sum to 1.0 for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), 0.1)); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case NORMAL: distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); break; case ROUNDED_NORMAL: NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation()); distribution = new RealRoundingDistribution(normalDist); break; case ZIPF: int cardinality; if (enumeratedValues == null) { Integer startInt = schema.getStartInt(); cardinality = schema.getEndInt() - startInt; ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i))); } } else { cardinality = enumeratedValues.size(); ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent()); for (int i = 0; i < cardinality; i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i))); } } distribution = new EnumeratedTreeDistribution<>(probabilities); break; case ENUMERATED: for (int i = 0; i < enumeratedValues.size(); i++) { probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i))); } distribution = new EnumeratedTreeDistribution<>(probabilities); break; default: throw new UnsupportedOperationException("Unknown distribution type: " + distributionType); } if (distribution instanceof AbstractIntegerDistribution) { ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof AbstractRealDistribution) { ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed); } else if (distribution instanceof EnumeratedDistribution) { ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed); } }
From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java
public boolean isSimilarProportion(double[] valuesA, double[] valuesB) { double alpha = 0.05; // Change data a bit for avoiding issues with booleans 0/1 /*for (int i=0; i<valuesA.length; i++) {/*from w w w.j a va2 s. co m*/ valuesA[i] = valuesA[i] + 1.0; } for (int i=0; i<valuesB.length; i++) { valuesB[i] = valuesB[i] + 1.0; }*/ // Calculate region of acceptance NormalDistribution myNormal = new NormalDistribution(0, 1); double myZLeft = -1 * Math.abs(myNormal.inverseCumulativeProbability(alpha / 2)); double myZRight = Math.abs(myNormal.inverseCumulativeProbability(alpha / 2)); logger.debug("Boundaries: " + myZLeft + " to " + myZRight); // Calculate proportion for valuesA dataset int nA = valuesA.length; double successA = 0; for (int i = 0; i < nA; i++) { successA = successA + valuesA[i]; } logger.debug("Success number for dataset A: " + successA); logger.debug("Number of records for A: " + nA); double pA = successA / nA; // Calculate proportion for valuesB dataset int nB = valuesB.length; double successB = 0; for (int i = 0; i < nB; i++) { successB = successB + valuesB[i]; } logger.debug("Success number for dataset B: " + successB); logger.debug("Number of records for B: " + nB); double pB = successB / nB; // Calculate proportion similarity double pPool = (nA * pA + nB * pB) / (nA + nB); double zComp = (pA - pB) / Math.sqrt(pPool * (1.0 - pPool) * (1.0 / nA + 1.0 / nB)); logger.debug("pPooled = " + pPool); logger.debug("Z value = " + zComp); logger.debug("p-value = " + (1.0 - myNormal.cumulativeProbability(zComp)) * 2); // Determine if z score is in the region of acceptance if ((myZLeft <= zComp) && (zComp <= myZRight)) { return true; } return false; }
From source file:edu.byu.nlp.al.ActiveMeasurementSelector.java
public Collection<FlatInstance<SparseFeatureVector, Integer>> selectNext(int batchSize) { State currentModel = modelTrainedOn(dataset, trainingOperations, null); // ClassificationMeasurementModelExpectations expectations = ClassificationMeasurementModelExpectations.from(currentModel); ArgMinMaxTracker<Double, FlatInstance<SparseFeatureVector, Integer>> candidateTracker = new ArgMinMaxTracker<>( rnd, batchSize);//from w ww. j av a2 s .c om int candidatesConsidered = 0; while (candidatesConsidered < minCandidates) { for (FlatInstance<SparseFeatureVector, Integer> candidate : candidates) { // don't repeat an answer we already have if (candidateTracker.argmax().contains(candidate)) { continue; } // skip a random subset of the available candidates (ensuring we evaluate SOMEONE) if (rnd.nextDouble() > thinningRate) { continue; } candidatesConsidered += 1; int annotatorIndex = candidate.getMeasurement().getAnnotator(); String rawAnnotator = dataset.getInfo().getAnnotatorIdIndexer().get(annotatorIndex); MeasurementExpectation<Integer> candExpectation = ClassificationMeasurementExpectations .fromMeasurement(candidate.getMeasurement(), dataset, currentModel.getInstanceIndices(), currentModel.getLogNuY()); // calculate parameters to p(tau|x,y,w) double mean_jk = candExpectation.sumOfExpectedValuesOfSigma(); double alpha = currentModel.getNuSigma2()[annotatorIndex][0], beta = currentModel.getNuSigma2()[annotatorIndex][1]; double var_jk = beta / (alpha - 1); // point estimate (ignoring uncertainty in w) double mean_utility_jk = 0; for (int t = 0; t < numSamples; t++) { // double var_jk = 1.0/new GammaDistribution(alpha, beta).sample(); // sample variance (integrating over w). note: probably incorrect double tau_jkt = new NormalDistribution(mean_jk, Math.sqrt(var_jk)).sample(); MeasurementPojo speculativeMeasurementPojo = candidate.getMeasurement().getPojo().copy(); speculativeMeasurementPojo.value = tau_jkt; Measurement speculativeMeasurement = ClassificationMeasurementParser.pojoToMeasurement( speculativeMeasurementPojo, rawAnnotator, candidate.getSource(), candidate.getStartTimestamp(), candidate.getEndTimestamp(), dataset.getInfo().getIndexers()); FlatInstance<SparseFeatureVector, Integer> speculativeMeasurementInst = new BasicFlatInstance<SparseFeatureVector, Integer>( candidate.getInstanceId(), candidate.getSource(), annotatorIndex, candidate.getAnnotation(), speculativeMeasurement, candidate.getStartTimestamp(), candidate.getEndTimestamp()); // add the speculative measurement and train Datasets.addAnnotationToDataset(dataset, speculativeMeasurementInst); State model = modelTrainedOn(dataset, CANDIDATE_TRAINING_OPS, currentModel); // remove the speculative measurement Datasets.removeAnnotationFromDataset(dataset, speculativeMeasurementInst); // calculate utility U=R-C of this model (where reward = accuracy or equivalently, negative hamming loss) // and cost is constant double[][] logNuY = model.getLogNuY(); for (int i = 0; i < logNuY.length; i++) { mean_utility_jk += Math.exp(DoubleArrays.max(logNuY[i])); } } mean_utility_jk /= numSamples; candidateTracker.offer(mean_utility_jk, candidate); } } // return top k (and remove from future candidates) logger.info("\n**********************************************************\n" + "******* Selected batch of size " + candidateTracker.argmax().size() + " *******\n" + "**********************************************************\n"); candidates.removeAll(candidateTracker.argmax()); return candidateTracker.argmax(); }
From source file:edu.cmu.tetrad.search.IndTestRegressionAD.java
/** * Determines whether variable x is independent of variable y given a list of conditioning variables z. * * @param xVar the one variable being compared. * @param yVar the second variable being compared. * @param zList the list of conditioning variables. * @return true iff x _||_ y | z./* w ww . ja v a 2s.c o m*/ * @throws RuntimeException if a matrix singularity is encountered. */ public boolean isIndependent(Node xVar, Node yVar, List<Node> zList) { if (zList == null) { throw new NullPointerException(); } for (Node node : zList) { if (node == null) { throw new NullPointerException(); } } TetradVector v1, v2; try { List<Node> regressors = new ArrayList<Node>(); regressors.add(dataSet.getVariable(yVar.getName())); for (Node zVar : zList) { regressors.add(dataSet.getVariable(zVar.getName())); } RegressionDataset regression = new RegressionDataset(dataSet); RegressionResult result = regression.regress(xVar, regressors); v1 = result.getResiduals(); v2 = regression.getResidualsWithoutFirstRegressor(); // regressors.remove(dataSet.getVariable(yVar.getName())); // regression = new RegressionDataset(dataSet); // result = regression.regress(xVar, regressors); // v2 = result.getResiduals(); } catch (Exception e) { throw e; } List<Double> d1 = new ArrayList<>(); for (int i = 0; i < v1.size(); i++) d1.add(v1.get(i)); List<Double> d2 = new ArrayList<>(); double[] f2 = new double[v2.size()]; for (int i = 0; i < v2.size(); i++) { d2.add(v2.get(i)); f2[i] = v2.get(i); } double sd = StatUtils.sd(f2); // RealDistribution c2 = new EmpiricalCdf(d2); RealDistribution c2 = new NormalDistribution(0, sd); GeneralAndersonDarlingTest test = new GeneralAndersonDarlingTest(d1, c2); double aSquaredStar = test.getASquaredStar(); System.out.println("A squared star = " + aSquaredStar + " p = " + test.getP()); double p = test.getP(); double aa2 = 1 - tanh(aSquaredStar); boolean independent = p > alpha; this.pvalue = aa2; if (independent) { TetradLogger.getInstance().log("independencies", SearchLogUtils.independenceFactMsg(xVar, yVar, zList, 0.)); } else { TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(xVar, yVar, zList, 0.)); } return independent; }
From source file:es.csic.iiia.planes.generator.Generator.java
private void addTasks(DProblem p) { ArrayList<DTask> tasks = new ArrayList<DTask>(); // Create the tasks, randomly located for (int i = 0; i < config.getNum_tasks(); i++) { DTask t = new DTask(); t.setX(r.nextInt(p.getWidth())); t.setY(r.nextInt(p.getHeight())); tasks.add(t);/*from w ww.j a va 2 s . c o m*/ p.getOperators().get(r.nextInt(config.getNum_operators())).getTasks().add(t); } // Set task times. Use the crisis model for now. // How is it done? // 1.a Create a "base" uniform distribution between 0 and duration RealDistribution[] timeDistributions = new RealDistribution[config.getNum_crisis()]; timeDistributions[0] = new UniformRealDistribution(0, config.getDuration()); timeDistributions[0].reseedRandomGenerator(r.nextLong()); // 1.b Create a "base" uniform distribution for the 2d space MultivariateRealDistribution[] spaceDistributions = new MultivariateRealDistribution[config .getNum_crisis()]; spaceDistributions[0] = new MultivariateUniformDistribution(new double[] { 0, 0 }, new double[] { p.getWidth(), p.getHeight() }); spaceDistributions[0].reseedRandomGenerator(r.nextLong()); // 2.a Create one gaussian distribution for each crisis, trying to // spread them out through time. for (int i = 1; i < config.getNum_crisis(); i++) { double mean = r.nextDouble() * config.getDuration(); double std = (config.getDuration() / (double) config.getNum_crisis()) * 0.05; timeDistributions[i] = new NormalDistribution(mean, std); timeDistributions[i].reseedRandomGenerator(r.nextLong()); } // 2.b Create one distribution for each crisis for (int i = 1; i < config.getNum_crisis(); i++) { spaceDistributions[i] = config.getTaskDistributionFactory().buildDistribution(config, r); } // 3. Uniformly sample tasks from these distributions int i = 0; for (DTask t : tasks) { final int j = (int) (r.nextDouble() * (config.getNum_crisis())); t.setnCrisis(j); // Time sampling /** UNCOMMENT TO MAKE TIMES RANDOMLY DISTRIBUTED long time = (long)timeDistributions[i].sample(); while (time < 0 || time > config.getDuration()) { time = (long)timeDistributions[i].sample(); } */ // Set all tasks to appear at the start of the simulation. To change // this, delete the 0 and replace it with the long variable "time" t.setTime(0); // Divide simulation space into (a x a) sized blocks final Location[][] blocks = Location.buildBlocks(config.getBlockSize(), config.getWidthRegions(), config.getHeightRegions()); // Position sampling double[] position = spaceDistributions[j].sample(); /* * Sample a point until its position is not conflicting with * any previous point positions (i.e. it is not located in the same block * as a previously assigned point), AND it is a valid position * that falls inside the simulation space. */ while (invalidPosition(position[0], position[1], p)) { //|| blockConflict(blocks, position[0], position[1], tasks, i)) { position = spaceDistributions[j].sample(); } // int k = 0; // for (DTask t2: tasks) { // if(k < i) { // // Check if the position sampled is within the simulation space // while (invalidPosition(position[0], position[1], p) // || sameBlocks(blocks, position[0], position[1], t2)) { // position = spaceDistributions[j].sample(); // } // } // else { // while (invalidPosition(position[0], position[1], p)) { // position = spaceDistributions[j].sample(); // } // } // k++; // } // while (invalidPosition(position[0], position[1], p)) { // position = spaceDistributions[j].sample(); // } // int posX; // int posY; // if (i < blocks[0].length) { // posX = (int)blocks[0][i].getX(); // posY = (int)blocks[0][i].getY(); // } // else { // posX = (int)blocks[1][0].getX(); // posY = (int)blocks[1][0].getY(); // } t.setX((int) position[0]); t.setY((int) position[1]); // t.setX(posX); // t.setY(posY); i++; } // 4. Debug stuff //printTaskHistogram(tasks); }
From source file:com.itemanalysis.psychometrics.factoranalysis.GPArotation.java
private RealMatrix randomStart(int ncol) { NormalDistribution norm = new NormalDistribution(0.0, 1.0); RealMatrix T = new Array2DRowRealMatrix(ncol, ncol); for (int i = 0; i < ncol; i++) { for (int j = 0; j < ncol; j++) { T.setEntry(i, j, norm.sample()); }// w w w. ja v a 2 s . c o m } QRDecomposition qr = new QRDecomposition(T); return qr.getQ(); }