Example usage for org.apache.commons.math3.distribution NormalDistribution NormalDistribution

List of usage examples for org.apache.commons.math3.distribution NormalDistribution NormalDistribution

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution NormalDistribution NormalDistribution.

Prototype

public NormalDistribution(double mean, double sd) throws NotStrictlyPositiveException 

Source Link

Document

Create a normal distribution using the given mean and standard deviation.

Usage

From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java

public boolean calculateRunsTest(double[] values) {
    double alpha = 0.05;
    double n1 = 0.0;
    double n2 = 0.0;
    double runs = 1.0;
    double median = StatUtils.percentile(values, 50);
    boolean positive = true;

    //Starting variable for calculating runs (positive or negative)
    if (values[0] < median) {
        positive = false;/*  ww  w  .j  a  v a2 s. c  om*/
        n2++;
    } else {
        positive = true;
        n1++;
    }

    // Look for runs and count positive/negative values
    for (int i = 1; i < values.length; i++) {
        if (values[i] < median) {
            n2++;
            if (positive) {
                runs++;
                positive = false;
            }
        } else {
            n1++;
            if (!positive) {
                runs++;
                positive = true;
            }
        }
    }

    // Calculate Z value
    double expectedRuns = (2.0 * n1 * n2 / (n1 + n2)) + 1.0;
    double sR = Math
            .sqrt((2.0 * n1 * n2 * (2.0 * n1 * n2 - n1 - n2)) / (Math.pow((n1 + n2), 2) * (n1 + n2 - 1.0)));
    double Z = (runs - expectedRuns) / sR;

    logger.debug("Runs = " + runs);
    logger.debug("Positive values = " + n1);
    logger.debug("Negative values = " + n2);
    logger.debug("Expected Runs = " + expectedRuns);
    logger.debug("sR = " + sR);
    logger.debug("Z score = " + Z);

    if ((runs - expectedRuns) == 0.0) {
        //H1 -> Data was not produced in a random manner (because expected runs are ok)
        logger.debug("Runs = Expected Runs --> Not random data");
        return false;
    }

    // Calculate region of acceptance
    NormalDistribution myNormal = new NormalDistribution(0, 1);
    double myZRight = Math.abs(myNormal.inverseCumulativeProbability(1 - alpha / 2));

    logger.debug("Reject H0 if |Z|> " + myZRight);

    if (Math.abs(Z) > myZRight) {
        //H1 -> Data was not produced in a random manner
        return false;
    }

    //H0 -> Data was produced in a random manner
    return true;
}

From source file:mtsar.processors.answer.KOSAggregator.java

private Map<Integer, Double> converge(Table<Integer, Integer, Short> graph, int kMax) {
    final RealDistribution distribution = new NormalDistribution(1, 1);

    Table<Integer, Integer, Double> ys = HashBasedTable.create(graph.rowKeySet().size(),
            graph.columnKeySet().size());

    for (final Table.Cell<Integer, Integer, Short> cell : graph.cellSet()) {
        ys.put(cell.getRowKey(), cell.getColumnKey(), distribution.sample());
    }//from w w  w  . j a  v a  2 s . c om

    for (int k = 1; k <= kMax; k++) {
        final Table<Integer, Integer, Double> xs = tasksUpdate(graph, ys);
        if (k < kMax)
            ys = workersUpdate(graph, xs);
    }

    final Map<Integer, Double> estimations = new HashMap<>();

    for (final Integer taskId : graph.rowKeySet()) {
        double sumProduct = 0.0;

        final Map<Integer, Double> workers = ys.row(taskId);
        for (final Map.Entry<Integer, Double> worker : workers.entrySet()) {
            sumProduct += graph.get(taskId, worker.getKey()) * worker.getValue();
        }

        estimations.put(taskId, sumProduct);
    }

    return estimations;
}

From source file:com.mapr.synth.samplers.VectorSamplerTest.java

private boolean isNormal(double[] vx, double mean, double sd) {
    Arrays.sort(vx);/*from w ww  .  j ava 2 s .c om*/
    NormalDistribution n = new NormalDistribution(mean, sd);
    double diff = 0;
    for (int i = 0; i < vx.length; i++) {
        double q = (double) i / (vx.length - 1);
        diff = Math.max(diff, Math.abs(q - n.cumulativeProbability(vx[i])));
    }

    return diff < 5.0 / Math.sqrt(vx.length);
}

From source file:gedi.util.math.stat.distributions.NormalMixtureDistribution.java

public static NormalMixtureDistribution fit(NormalMixtureDistribution initialMixture, double[] data,
        final int maxIterations, final double threshold) {

    if (maxIterations < 1) {
        throw new NotStrictlyPositiveException(maxIterations);
    }/*from  w  ww .j  a va  2s. c o m*/

    if (threshold < Double.MIN_VALUE) {
        throw new NotStrictlyPositiveException(threshold);
    }

    final int n = data.length;

    final int k = initialMixture.getNumComponents();

    if (k == 1)
        return new NormalMixtureDistribution(new NormalDistribution[] {
                new NormalDistribution(new Mean().evaluate(data), new StandardDeviation().evaluate(data)) },
                new double[] { 1 });

    int numIterations = 0;
    double previousLogLikelihood = 0d;

    double logLikelihood = Double.NEGATIVE_INFINITY;

    // Initialize model to fit to initial mixture.
    NormalMixtureDistribution fittedModel = new NormalMixtureDistribution(initialMixture.components,
            initialMixture.mixing);

    while (numIterations++ <= maxIterations
            && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        previousLogLikelihood = logLikelihood;
        logLikelihood = 0d;

        // E-step: compute the data dependent parameters of the expectation
        // function.
        // The percentage of row's total density between a row and a
        // component
        final double[][] gamma = new double[n][k];
        // Sum of gamma for each component
        final double[] gammaSums = new double[k];

        for (int i = 0; i < n; i++) {
            final double rowDensity = fittedModel.density(data[i]);
            logLikelihood += FastMath.log(rowDensity);

            for (int j = 0; j < k; j++) {
                gamma[i][j] = fittedModel.mixing[j] * fittedModel.components[j].density(data[i]) / rowDensity;
                gammaSums[j] += gamma[i][j];
            }
        }
        logLikelihood /= n;
        //         System.out.println(logLikelihood);

        // M-step: compute the new parameters based on the expectation
        // function.
        final double[] newWeights = gammaSums.clone();
        ArrayUtils.mult(newWeights, 1.0 / n);

        NormalDistribution[] comp = new NormalDistribution[k];
        for (int j = 0; j < k; j++) {
            double m = 0;
            for (int i = 0; i < n; i++) {
                m += gamma[i][j] * data[i];
            }
            m /= gammaSums[j];

            double var = 0;
            for (int i = 0; i < n; i++) {
                double d = m - data[i];
                var += gamma[i][j] * d * d;
            }
            var /= gammaSums[j];

            comp[j] = new NormalDistribution(m, Math.sqrt(var));
        }

        // Update current model
        fittedModel = new NormalMixtureDistribution(comp, newWeights);
    }

    if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        // Did not converge before the maximum number of iterations
        throw new ConvergenceException();
    }

    return fittedModel;
}

From source file:io.druid.benchmark.datagen.BenchmarkColumnValueGenerator.java

private void initDistribution() {
    BenchmarkColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();/*from   w  w w .j ava 2 s.com*/
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();

    switch (distributionType) {
    case SEQUENTIAL:
        // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
        distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(),
                schema.getEnumeratedValues());
        break;
    case UNIFORM:
        distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
        break;
    case DISCRETE_UNIFORM:
        if (enumeratedValues == null) {
            enumeratedValues = new ArrayList<>();
            for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                Object val = convertType(i, type);
                enumeratedValues.add(val);
            }
        }
        // give them all equal probability, the library will normalize probabilities to sum to 1.0
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), 0.1));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case NORMAL:
        distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        break;
    case ROUNDED_NORMAL:
        NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
        distribution = new RealRoundingDistribution(normalDist);
        break;
    case ZIPF:
        int cardinality;
        if (enumeratedValues == null) {
            Integer startInt = schema.getStartInt();
            cardinality = schema.getEndInt() - startInt;
            ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
            }
        } else {
            cardinality = enumeratedValues.size();
            ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
            for (int i = 0; i < cardinality; i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
            }
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;
    case ENUMERATED:
        for (int i = 0; i < enumeratedValues.size(); i++) {
            probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
        }
        distribution = new EnumeratedTreeDistribution<>(probabilities);
        break;

    default:
        throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }

    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof EnumeratedDistribution) {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}

From source file:eu.betaas.taas.securitymanager.taastrustmanager.taastrustcalculator.StatisticsCalculator.java

public boolean isSimilarProportion(double[] valuesA, double[] valuesB) {
    double alpha = 0.05;

    // Change data a bit for avoiding issues with booleans 0/1
    /*for (int i=0; i<valuesA.length; i++)
    {/*from w  w w.j a va2  s.  co  m*/
       valuesA[i] = valuesA[i] + 1.0;
    }
    for (int i=0; i<valuesB.length; i++)
    {
       valuesB[i] = valuesB[i] + 1.0;
    }*/

    // Calculate region of acceptance
    NormalDistribution myNormal = new NormalDistribution(0, 1);
    double myZLeft = -1 * Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));
    double myZRight = Math.abs(myNormal.inverseCumulativeProbability(alpha / 2));

    logger.debug("Boundaries: " + myZLeft + " to " + myZRight);

    // Calculate proportion for valuesA dataset
    int nA = valuesA.length;
    double successA = 0;
    for (int i = 0; i < nA; i++) {
        successA = successA + valuesA[i];
    }

    logger.debug("Success number for dataset A: " + successA);
    logger.debug("Number of records for A: " + nA);

    double pA = successA / nA;

    // Calculate proportion for valuesB dataset
    int nB = valuesB.length;
    double successB = 0;
    for (int i = 0; i < nB; i++) {
        successB = successB + valuesB[i];
    }

    logger.debug("Success number for dataset B: " + successB);
    logger.debug("Number of records for B: " + nB);

    double pB = successB / nB;

    // Calculate proportion similarity
    double pPool = (nA * pA + nB * pB) / (nA + nB);
    double zComp = (pA - pB) / Math.sqrt(pPool * (1.0 - pPool) * (1.0 / nA + 1.0 / nB));

    logger.debug("pPooled = " + pPool);
    logger.debug("Z value = " + zComp);
    logger.debug("p-value = " + (1.0 - myNormal.cumulativeProbability(zComp)) * 2);

    // Determine if z score is in the region of acceptance
    if ((myZLeft <= zComp) && (zComp <= myZRight)) {
        return true;
    }

    return false;
}

From source file:edu.byu.nlp.al.ActiveMeasurementSelector.java

public Collection<FlatInstance<SparseFeatureVector, Integer>> selectNext(int batchSize) {
    State currentModel = modelTrainedOn(dataset, trainingOperations, null);
    //    ClassificationMeasurementModelExpectations expectations = ClassificationMeasurementModelExpectations.from(currentModel);
    ArgMinMaxTracker<Double, FlatInstance<SparseFeatureVector, Integer>> candidateTracker = new ArgMinMaxTracker<>(
            rnd, batchSize);//from w ww. j  av a2  s  .c om

    int candidatesConsidered = 0;
    while (candidatesConsidered < minCandidates) {
        for (FlatInstance<SparseFeatureVector, Integer> candidate : candidates) {
            // don't repeat an answer we already have
            if (candidateTracker.argmax().contains(candidate)) {
                continue;
            }
            // skip a random subset of the available candidates (ensuring we evaluate SOMEONE) 
            if (rnd.nextDouble() > thinningRate) {
                continue;
            }
            candidatesConsidered += 1;

            int annotatorIndex = candidate.getMeasurement().getAnnotator();
            String rawAnnotator = dataset.getInfo().getAnnotatorIdIndexer().get(annotatorIndex);
            MeasurementExpectation<Integer> candExpectation = ClassificationMeasurementExpectations
                    .fromMeasurement(candidate.getMeasurement(), dataset, currentModel.getInstanceIndices(),
                            currentModel.getLogNuY());

            // calculate parameters to p(tau|x,y,w)
            double mean_jk = candExpectation.sumOfExpectedValuesOfSigma();
            double alpha = currentModel.getNuSigma2()[annotatorIndex][0],
                    beta = currentModel.getNuSigma2()[annotatorIndex][1];
            double var_jk = beta / (alpha - 1); // point estimate (ignoring uncertainty in w)

            double mean_utility_jk = 0;
            for (int t = 0; t < numSamples; t++) {
                //        double var_jk = 1.0/new GammaDistribution(alpha, beta).sample(); // sample variance (integrating over w). note: probably incorrect
                double tau_jkt = new NormalDistribution(mean_jk, Math.sqrt(var_jk)).sample();
                MeasurementPojo speculativeMeasurementPojo = candidate.getMeasurement().getPojo().copy();
                speculativeMeasurementPojo.value = tau_jkt;
                Measurement speculativeMeasurement = ClassificationMeasurementParser.pojoToMeasurement(
                        speculativeMeasurementPojo, rawAnnotator, candidate.getSource(),
                        candidate.getStartTimestamp(), candidate.getEndTimestamp(),
                        dataset.getInfo().getIndexers());
                FlatInstance<SparseFeatureVector, Integer> speculativeMeasurementInst = new BasicFlatInstance<SparseFeatureVector, Integer>(
                        candidate.getInstanceId(), candidate.getSource(), annotatorIndex,
                        candidate.getAnnotation(), speculativeMeasurement, candidate.getStartTimestamp(),
                        candidate.getEndTimestamp());

                // add the speculative measurement and train
                Datasets.addAnnotationToDataset(dataset, speculativeMeasurementInst);
                State model = modelTrainedOn(dataset, CANDIDATE_TRAINING_OPS, currentModel);
                // remove the speculative measurement
                Datasets.removeAnnotationFromDataset(dataset, speculativeMeasurementInst);

                // calculate utility U=R-C of this model (where reward = accuracy or equivalently, negative hamming loss)
                // and cost is constant
                double[][] logNuY = model.getLogNuY();
                for (int i = 0; i < logNuY.length; i++) {
                    mean_utility_jk += Math.exp(DoubleArrays.max(logNuY[i]));
                }
            }
            mean_utility_jk /= numSamples;
            candidateTracker.offer(mean_utility_jk, candidate);

        }
    }

    // return top k (and remove from future candidates)
    logger.info("\n**********************************************************\n"
            + "******* Selected batch of size " + candidateTracker.argmax().size() + " *******\n"
            + "**********************************************************\n");
    candidates.removeAll(candidateTracker.argmax());
    return candidateTracker.argmax();
}

From source file:edu.cmu.tetrad.search.IndTestRegressionAD.java

/**
 * Determines whether variable x is independent of variable y given a list of conditioning variables z.
 *
 * @param xVar  the one variable being compared.
 * @param yVar  the second variable being compared.
 * @param zList the list of conditioning variables.
 * @return true iff x _||_ y | z./* w ww  .  ja  v  a 2s.c  o  m*/
 * @throws RuntimeException if a matrix singularity is encountered.
 */
public boolean isIndependent(Node xVar, Node yVar, List<Node> zList) {
    if (zList == null) {
        throw new NullPointerException();
    }

    for (Node node : zList) {
        if (node == null) {
            throw new NullPointerException();
        }
    }

    TetradVector v1, v2;

    try {
        List<Node> regressors = new ArrayList<Node>();
        regressors.add(dataSet.getVariable(yVar.getName()));

        for (Node zVar : zList) {
            regressors.add(dataSet.getVariable(zVar.getName()));
        }

        RegressionDataset regression = new RegressionDataset(dataSet);
        RegressionResult result = regression.regress(xVar, regressors);
        v1 = result.getResiduals();

        v2 = regression.getResidualsWithoutFirstRegressor();

        //            regressors.remove(dataSet.getVariable(yVar.getName()));
        //            regression = new RegressionDataset(dataSet);
        //            result = regression.regress(xVar, regressors);
        //            v2 = result.getResiduals();
    } catch (Exception e) {
        throw e;
    }

    List<Double> d1 = new ArrayList<>();
    for (int i = 0; i < v1.size(); i++)
        d1.add(v1.get(i));

    List<Double> d2 = new ArrayList<>();
    double[] f2 = new double[v2.size()];
    for (int i = 0; i < v2.size(); i++) {
        d2.add(v2.get(i));
        f2[i] = v2.get(i);
    }

    double sd = StatUtils.sd(f2);

    //        RealDistribution c2 = new EmpiricalCdf(d2);
    RealDistribution c2 = new NormalDistribution(0, sd);

    GeneralAndersonDarlingTest test = new GeneralAndersonDarlingTest(d1, c2);
    double aSquaredStar = test.getASquaredStar();
    System.out.println("A squared star = " + aSquaredStar + " p = " + test.getP());
    double p = test.getP();
    double aa2 = 1 - tanh(aSquaredStar);
    boolean independent = p > alpha;

    this.pvalue = aa2;

    if (independent) {
        TetradLogger.getInstance().log("independencies",
                SearchLogUtils.independenceFactMsg(xVar, yVar, zList, 0.));
    } else {
        TetradLogger.getInstance().log("dependencies", SearchLogUtils.dependenceFactMsg(xVar, yVar, zList, 0.));
    }

    return independent;
}

From source file:es.csic.iiia.planes.generator.Generator.java

private void addTasks(DProblem p) {
    ArrayList<DTask> tasks = new ArrayList<DTask>();

    // Create the tasks, randomly located
    for (int i = 0; i < config.getNum_tasks(); i++) {
        DTask t = new DTask();
        t.setX(r.nextInt(p.getWidth()));
        t.setY(r.nextInt(p.getHeight()));
        tasks.add(t);/*from w ww.j  a va  2  s .  c o  m*/
        p.getOperators().get(r.nextInt(config.getNum_operators())).getTasks().add(t);
    }

    // Set task times. Use the crisis model for now.

    // How is it done?

    // 1.a Create a "base" uniform distribution between 0 and duration
    RealDistribution[] timeDistributions = new RealDistribution[config.getNum_crisis()];
    timeDistributions[0] = new UniformRealDistribution(0, config.getDuration());
    timeDistributions[0].reseedRandomGenerator(r.nextLong());

    // 1.b Create a "base" uniform distribution for the 2d space
    MultivariateRealDistribution[] spaceDistributions = new MultivariateRealDistribution[config
            .getNum_crisis()];
    spaceDistributions[0] = new MultivariateUniformDistribution(new double[] { 0, 0 },
            new double[] { p.getWidth(), p.getHeight() });
    spaceDistributions[0].reseedRandomGenerator(r.nextLong());

    // 2.a Create one gaussian distribution for each crisis, trying to
    //    spread them out through time.
    for (int i = 1; i < config.getNum_crisis(); i++) {
        double mean = r.nextDouble() * config.getDuration();
        double std = (config.getDuration() / (double) config.getNum_crisis()) * 0.05;
        timeDistributions[i] = new NormalDistribution(mean, std);
        timeDistributions[i].reseedRandomGenerator(r.nextLong());
    }

    // 2.b Create one distribution for each crisis
    for (int i = 1; i < config.getNum_crisis(); i++) {
        spaceDistributions[i] = config.getTaskDistributionFactory().buildDistribution(config, r);
    }

    // 3. Uniformly sample tasks from these distributions
    int i = 0;
    for (DTask t : tasks) {
        final int j = (int) (r.nextDouble() * (config.getNum_crisis()));
        t.setnCrisis(j);

        // Time sampling
        /** UNCOMMENT TO MAKE TIMES RANDOMLY DISTRIBUTED
        long time = (long)timeDistributions[i].sample();
        while (time < 0 || time > config.getDuration()) {
        time = (long)timeDistributions[i].sample();
        }
        */
        // Set all tasks to appear at the start of the simulation. To change
        // this, delete the 0 and replace it with the long variable "time"
        t.setTime(0);

        // Divide simulation space into (a x a) sized blocks
        final Location[][] blocks = Location.buildBlocks(config.getBlockSize(), config.getWidthRegions(),
                config.getHeightRegions());

        // Position sampling
        double[] position = spaceDistributions[j].sample();
        /*
        * Sample a point until its position is not conflicting with
        * any previous point positions (i.e. it is not located in the same block
        * as a previously assigned point), AND it is a valid position
        * that falls inside the simulation space.
        */

        while (invalidPosition(position[0], position[1], p)) {
            //|| blockConflict(blocks, position[0], position[1], tasks, i)) {
            position = spaceDistributions[j].sample();
        }
        //            int k = 0;
        //            for (DTask t2: tasks) {
        //                if(k < i) {
        //                    // Check if the position sampled is within the simulation space
        //                    while (invalidPosition(position[0], position[1], p)
        //                            || sameBlocks(blocks, position[0], position[1], t2)) {
        //                        position = spaceDistributions[j].sample();
        //                    }
        //                }
        //                else {
        //                    while (invalidPosition(position[0], position[1], p)) {
        //                        position = spaceDistributions[j].sample();
        //                    }
        //                }
        //                k++;
        //            }
        //            while (invalidPosition(position[0], position[1], p)) {
        //                position = spaceDistributions[j].sample();
        //            }

        //            int posX;
        //            int posY;
        //            if (i < blocks[0].length) {
        //                posX = (int)blocks[0][i].getX();
        //                posY = (int)blocks[0][i].getY();
        //            }
        //            else {
        //                posX = (int)blocks[1][0].getX();
        //                posY = (int)blocks[1][0].getY();
        //            }

        t.setX((int) position[0]);
        t.setY((int) position[1]);

        //            t.setX(posX);
        //            t.setY(posY);
        i++;
    }

    // 4. Debug stuff
    //printTaskHistogram(tasks);
}

From source file:com.itemanalysis.psychometrics.factoranalysis.GPArotation.java

private RealMatrix randomStart(int ncol) {
    NormalDistribution norm = new NormalDistribution(0.0, 1.0);
    RealMatrix T = new Array2DRowRealMatrix(ncol, ncol);
    for (int i = 0; i < ncol; i++) {
        for (int j = 0; j < ncol; j++) {
            T.setEntry(i, j, norm.sample());
        }//  w  w w.  ja  v a  2 s  . c o  m
    }
    QRDecomposition qr = new QRDecomposition(T);
    return qr.getQ();
}