Example usage for org.apache.commons.math.distribution ChiSquaredDistributionImpl ChiSquaredDistributionImpl

List of usage examples for org.apache.commons.math.distribution ChiSquaredDistributionImpl ChiSquaredDistributionImpl

Introduction

In this page you can find the example usage for org.apache.commons.math.distribution ChiSquaredDistributionImpl ChiSquaredDistributionImpl.

Prototype

public ChiSquaredDistributionImpl(double df) 

Source Link

Document

Create a Chi-Squared distribution with the given degrees of freedom.

Usage

From source file:geogebra.common.kernel.statistics.AlgoChiSquaredTest.java

/**
 * @param df/* w  w w.  j a  va 2  s.c o m*/
 *            degree of freedom
 * @return implementation of ChiSquaredDistribution for given degree of
 *         freedom
 */
ChiSquaredDistribution getChiSquaredDistribution(double df) {
    if (chisquared == null || chisquared.getDegreesOfFreedom() != df)
        chisquared = new ChiSquaredDistributionImpl(df);

    return chisquared;
}

From source file:mlflex.EvaluationMetrics.java

private double CalculateLogRankStatisticTwoGroups(SurvivalGroups survivalsList) throws Exception {
    ArrayList<Double> allDiff1 = new ArrayList<Double>();
    ArrayList<Double> allVar = new ArrayList<Double>();

    for (double time : new ArrayList<Double>(
            Lists.Sort(new ArrayList(new HashSet<Double>(survivalsList.GetObservedTimes()))))) {
        double n = Lists.GreaterThan(survivalsList.GetAllTimes(), time, true).size();
        double n1 = Lists.GreaterThan(survivalsList.GetGroup(0).GetAllTimes(), time, true).size();
        double n2 = Lists.GreaterThan(survivalsList.GetGroup(1).GetAllTimes(), time, true).size();
        double e = Lists.GetNumEqualTo(survivalsList.GetObservedTimes(), time);
        double e1 = Lists.GetNumEqualTo(survivalsList.GetGroup(0).GetObservedTimes(), time);
        double exp1 = e * (n1 / n);
        double var = n <= 1.0 ? 0.0 : (n1 * n2 * e * (n - e)) / (n * n * (n - 1));

        allDiff1.add(e1 - exp1);/*from  ww  w  .ja v  a  2s. c o  m*/
        allVar.add(var);
    }

    double logRankStatistic = Math.pow(MathUtility.Sum(allDiff1), 2.0) / MathUtility.Sum(allVar);

    org.apache.commons.math.distribution.ChiSquaredDistributionImpl chi = new ChiSquaredDistributionImpl(
            survivalsList.Size() - 1);
    double chiSquareP = 1 - chi.cumulativeProbability(logRankStatistic);

    return chiSquareP;
}

From source file:edu.utexas.cs.tactex.servercustomers.factoredcustomer.ProbabilityDistribution.java

ProbabilityDistribution(FactoredCustomerService service, Element xml) {
    if (null == randomSeedRepo)
        randomSeedRepo = (RandomSeedRepo) SpringApplicationContext.getBean("randomSeedRepo");

    type = Enum.valueOf(DistType.class, xml.getAttribute("distribution"));
    switch (type) {
    case POINTMASS:
    case DEGENERATE:
        param1 = Double.parseDouble(xml.getAttribute("value"));
        sampler = new DegenerateSampler(param1);
        break;/*from w w  w .  ja v  a 2  s .  c o  m*/
    case UNIFORM:
        param1 = Double.parseDouble(xml.getAttribute("low"));
        param2 = Double.parseDouble(xml.getAttribute("high"));
        sampler = new UniformSampler(param1, param2);
        break;
    case INTERVAL:
        param1 = Double.parseDouble(xml.getAttribute("mean"));
        param2 = Double.parseDouble(xml.getAttribute("stdDev"));
        param3 = Double.parseDouble(xml.getAttribute("low"));
        param4 = Double.parseDouble(xml.getAttribute("high"));
        sampler = new IntervalSampler(param1, param2, param3, param4);
        break;
    case NORMAL:
    case GAUSSIAN:
        param1 = Double.parseDouble(xml.getAttribute("mean"));
        param2 = Double.parseDouble(xml.getAttribute("stdDev"));
        sampler = new ContinuousSampler(new NormalDistributionImpl(param1, param2));
        break;
    case STDNORMAL:
        param1 = 0;
        param2 = 1;
        sampler = new ContinuousSampler(new NormalDistributionImpl(param1, param2));
        break;
    case LOGNORMAL:
        param1 = Double.parseDouble(xml.getAttribute("expMean"));
        param2 = Double.parseDouble(xml.getAttribute("expStdDev"));
        sampler = new LogNormalSampler(param1, param2);
        break;
    case CAUCHY:
        param1 = Double.parseDouble(xml.getAttribute("median"));
        param2 = Double.parseDouble(xml.getAttribute("scale"));
        sampler = new ContinuousSampler(new CauchyDistributionImpl(param1, param2));
        break;
    case BETA:
        param1 = Double.parseDouble(xml.getAttribute("alpha"));
        param2 = Double.parseDouble(xml.getAttribute("beta"));
        sampler = new ContinuousSampler(new BetaDistributionImpl(param1, param2));
        break;
    case BINOMIAL:
        param1 = Double.parseDouble(xml.getAttribute("trials"));
        param2 = Double.parseDouble(xml.getAttribute("success"));
        sampler = new DiscreteSampler(new BinomialDistributionImpl((int) param1, param2));
        break;
    case POISSON:
        param1 = Double.parseDouble(xml.getAttribute("lambda"));
        sampler = new DiscreteSampler(new PoissonDistributionImpl(param1));
        break;
    case CHISQUARED:
        param1 = Double.parseDouble(xml.getAttribute("dof"));
        sampler = new ContinuousSampler(new ChiSquaredDistributionImpl(param1));
        break;
    case EXPONENTIAL:
        param1 = Double.parseDouble(xml.getAttribute("mean"));
        sampler = new ContinuousSampler(new ExponentialDistributionImpl(param1));
        break;
    case GAMMA:
        param1 = Double.parseDouble(xml.getAttribute("alpha"));
        param2 = Double.parseDouble(xml.getAttribute("beta"));
        sampler = new ContinuousSampler(new GammaDistributionImpl(param1, param2));
        break;
    case WEIBULL:
        param1 = Double.parseDouble(xml.getAttribute("alpha"));
        param2 = Double.parseDouble(xml.getAttribute("beta"));
        sampler = new ContinuousSampler(new WeibullDistributionImpl(param1, param2));
        break;
    case STUDENT:
        param1 = Double.parseDouble(xml.getAttribute("dof"));
        sampler = new ContinuousSampler(new TDistributionImpl(param1));
        break;
    case SNEDECOR:
        param1 = Double.parseDouble(xml.getAttribute("d1"));
        param2 = Double.parseDouble(xml.getAttribute("d2"));
        sampler = new ContinuousSampler(new FDistributionImpl(param1, param2));
        break;
    default:
        throw new Error("Invalid probability distribution type!");
    }
    sampler.reseedRandomGenerator(service.getRandomSeedRepo()
            .getRandomSeed("factoredcustomer.ProbabilityDistribution", SeedIdGenerator.getId(), "Sampler")
            .getValue());
}

From source file:geogebra.kernel.statistics.AlgoDistribution.java

ChiSquaredDistribution getChiSquaredDistribution(double param) {
    if (chisquared == null)
        chisquared = new ChiSquaredDistributionImpl(param);
    else {//from w ww. ja  v a2s . com
        chisquared.setDegreesOfFreedom(param);
    }
    return chisquared;
}

From source file:de.dfki.madm.anomalydetection.evaluator.cluster_based.CMGOSEvaluator.java

/**
 * Main Algorithm// w  w  w.  jav  a 2 s. c o m
 * @throws OperatorException 
 */
public double[] evaluate() throws OperatorException {
    // remove small clusters
    boolean[] removed_cluster = new boolean[this.centroids.length];
    double limit = percentage * points.length / centroids.length;
    removed_cluster = this.reassignPoints(removed_cluster, limit);

    int TotalNumberOfPoints = points.length;
    int NumberOfCluster = this.centroids.length;
    int PointDimension = this.points[0].length;

    // remove clusters with less points than dimensions
    removed_cluster = this.reassignPoints(removed_cluster, PointDimension);
    int[][] remove = new int[NumberOfCluster][PointDimension];

    // assign distance limit -1 for error
    double DistanceLimit = -1;
    ChiSquaredDistributionImpl chi = new ChiSquaredDistributionImpl(points[0].length);
    try {
        DistanceLimit = chi.inverseCumulativeProbability(this.probability);
    } catch (MathException e) {
        System.out.println(e);
    }

    /* compute anomaly score */
    double[] result = new double[TotalNumberOfPoints];

    int[] workBelongsToCluster = this.belongsToCluster.clone();
    int[] workClusterSize = this.clusterSize.clone();

    double[] DistanceLimitPerCluster = new double[NumberOfCluster];
    Arrays.fill(DistanceLimitPerCluster, DistanceLimit);

    this.CovariancematrixPerCluster = new CovarianceMatrix[NumberOfCluster];

    // in case of fastMCD make sure don't remove any outliers and recompute
    // sanity check from user interface
    if (this.red == METHOD_COV_MCD)
        this.removeRuns = 0;

    for (int rem = 0; rem <= this.removeRuns; rem++) {

        // Associate instances to a cluster
        double[][][] ClusterWithPointsAssociation = new double[NumberOfCluster][][];
        int[] nextId = new int[NumberOfCluster];
        for (int ClusterId = 0; ClusterId < NumberOfCluster; ClusterId++) {
            ClusterWithPointsAssociation[ClusterId] = new double[workClusterSize[ClusterId]][PointDimension];
        }

        for (int PointId = 0; PointId < TotalNumberOfPoints; PointId++) {
            int ClusterId = workBelongsToCluster[PointId];
            if (ClusterId < NumberOfCluster) {
                ClusterWithPointsAssociation[ClusterId][nextId[ClusterId]] = this.points[PointId];
                nextId[ClusterId]++;
            }
        }

        // Subtract mean from all
        if (rem == 0) {
            for (int ClusterId = 0; ClusterId < NumberOfCluster; ClusterId++) {
                double[] erw = new double[PointDimension];

                for (int PointId = 0; PointId < ClusterWithPointsAssociation[ClusterId].length; PointId++) {
                    for (int PointAttribute = 0; PointAttribute < ClusterWithPointsAssociation[ClusterId][PointId].length; PointAttribute++) {
                        erw[PointAttribute] += ClusterWithPointsAssociation[ClusterId][PointId][PointAttribute];
                    }
                }
                for (int j1 = 0; j1 < erw.length; j1++) {
                    erw[j1] = 1.0 / ClusterWithPointsAssociation[ClusterId].length * erw[j1];
                }

                for (int PointId = 0; PointId < ClusterWithPointsAssociation[ClusterId].length; PointId++) {
                    for (int j1 = 0; j1 < ClusterWithPointsAssociation[ClusterId][PointId].length; j1++) {
                        ClusterWithPointsAssociation[ClusterId][PointId][j1] -= erw[j1];
                    }
                }

            }
        }

        // Calculate covariance for each cluster
        for (int ClusterId = 0; ClusterId < NumberOfCluster; ClusterId++) {
            if (workClusterSize[ClusterId] > 0) {
                double[][] data = null;
                // use all data instances
                if (this.red == METHOD_COV_MCD || this.cov_sampling == -1
                        || this.cov_sampling > ClusterWithPointsAssociation[ClusterId].length) {
                    data = ClusterWithPointsAssociation[ClusterId];
                }
                // sample data
                else {
                    data = new double[this.cov_sampling][ClusterWithPointsAssociation[ClusterId][0].length];
                    int i = 0;
                    for (Integer index : generator.nextIntSetWithRange(0,
                            ClusterWithPointsAssociation[ClusterId].length, this.cov_sampling)) {
                        data[i] = ClusterWithPointsAssociation[ClusterId][index];
                        i++;
                    }
                }
                // in the case of MCD, do it
                if (this.red == METHOD_COV_MCD) {
                    // we compute h from the normal probability
                    if (this.h == -1)
                        this.h = (int) Math.ceil(this.probability * (float) data.length);
                    CovariancematrixPerCluster[ClusterId] = fastMDC(data, this.h);
                }
                // Regularization and Reduction
                else {
                    if (CovariancematrixPerCluster[ClusterId] == null || rem < this.removeRuns) {
                        boolean change = false;
                        int count = 0;
                        // Reduction Method
                        if (this.red == METHOD_COV_REDUCTION) {
                            do {
                                change = false;
                                int ind = -1;
                                // look for attribute with only one value
                                for (int i = 0; i < data[0].length; i++) {
                                    change = true;
                                    ind = i;
                                    for (int j = 0; j < data.length; j++) {
                                        if (data[j][ind] != data[0][ind]) {
                                            change = false;
                                            ind = -1;
                                            break;
                                        }
                                    }
                                    if (change)
                                        break;
                                }
                                if (change) {
                                    // store which attribute to remove in which cluster
                                    remove[ClusterId][ind + count] = 1;
                                    count++;
                                    double[][] dataNew = new double[data.length][data[0].length - 1];
                                    for (int i = 0; i < data.length; i++) {
                                        System.arraycopy(data[i], 0, dataNew[i], 0, ind);
                                        System.arraycopy(data[i], ind + 1, dataNew[i], ind,
                                                data[0].length - (ind + 1));
                                    }
                                    data = dataNew;
                                }
                            } while (change);

                            // calculate new distancelimit using new number of dimension
                            chi = new ChiSquaredDistributionImpl(data[0].length);
                            try {
                                DistanceLimitPerCluster[ClusterId] = chi
                                        .inverseCumulativeProbability(this.probability);
                            } catch (MathException e) {
                                System.out.println(e);
                            }

                        }
                        CovariancematrixPerCluster[ClusterId] = new CovarianceMatrix(data, numberOfThreads);
                    }
                }
            }
        }

        // REGULARIZATION
        // S is the summarized covariance matrics (QDA)
        double[][] S = null;
        boolean thereisone = false;
        if (this.red == METHOD_COV_REGULARIZE) {
            int id = 0;
            for (boolean b : removed_cluster) {
                if (!b) {
                    thereisone = true;
                    break;
                }
                id++;
            }
            if (!thereisone) {
                throw new OperatorException(
                        "No cluster left. This is a problem. Try not to remove small clusters or reduce number of clusters.");
            }
            S = new double[CovariancematrixPerCluster[id]
                    .getCovMat().length][CovariancematrixPerCluster[id].getCovMat()[0].length];
            for (int ClusterId = 0; ClusterId < NumberOfCluster; ClusterId++) {
                if (!removed_cluster[ClusterId] && CovariancematrixPerCluster[ClusterId] != null) {
                    double[][] d = CovariancematrixPerCluster[ClusterId].getCovMat();
                    for (int i = 0; i < d.length; i++) {
                        for (int j = 0; j < d[i].length; j++) {
                            S[i][j] += d[i][j];
                        }
                    }
                }
            }
        }

        // reset Point-association
        if (rem == this.removeRuns) {
            workClusterSize = this.clusterSize.clone();
            workBelongsToCluster = this.belongsToCluster.clone();
        }
        for (int ClusterId = 0; ClusterId < NumberOfCluster; ClusterId++) {
            if (workClusterSize[ClusterId] > 0) {
                Matrix mh = new Matrix(CovariancematrixPerCluster[ClusterId].getCovMat());
                if (this.red == METHOD_COV_REDUCTION && mh.det() == 0) {
                    CovariancematrixPerCluster[ClusterId].addMinimum();
                    mh = new Matrix(CovariancematrixPerCluster[ClusterId].getCovMat());
                } else if (this.red == METHOD_COV_REGULARIZE) {
                    Matrix mS = new Matrix(S);
                    mS = mS.times(this.regularizedLambda / this.points.length);
                    mh = mh.times((1.0 - this.regularizedLambda));
                    mh = mh.plus(mS);
                }
                // This shouldn't happen ...
                if (mh.det() == 0) {
                    CovariancematrixPerCluster[ClusterId].addMinimum();
                    mh = new Matrix(CovariancematrixPerCluster[ClusterId].getCovMat());
                }

                mh = mh.inverse();

                for (int PointId = 0; PointId < points.length; PointId++) {
                    if (workBelongsToCluster[PointId] == ClusterId) {

                        int sum = 0;
                        for (int i : remove[ClusterId])
                            sum += i;

                        double[] point = new double[points[PointId].length - sum];

                        int count = 0;
                        for (int ind = 0; ind < remove[ClusterId].length; ind++) {
                            if (remove[ClusterId][ind] == 1)
                                count++;
                            int newid = ind - count;
                            if (newid < 0)
                                continue;
                            point[newid] = this.points[PointId][newid];
                        }

                        double mahaDist;
                        if (this.red == 0)
                            mahaDist = mahalanobisDistance(point, mh);
                        else
                            mahaDist = mahalanobisDistance(this.points[PointId], mh);
                        result[PointId] = mahaDist / DistanceLimit;

                        // remove association for minimum covariance
                        // determinant
                        if (rem != this.removeRuns && mahaDist > DistanceLimitPerCluster[ClusterId]) {
                            workBelongsToCluster[PointId] = NumberOfCluster;
                            workClusterSize[ClusterId]--;
                        }
                    }
                }
            }
        }
    }

    return result;
}

From source file:geogebra.common.kernel.statistics.AlgoDistribution.java

/**
 * @param param/*from   ww  w.  j ava2s .  com*/
 *            degrees of freedom
 * @return chi squared distribution
 */
protected ChiSquaredDistribution getChiSquaredDistribution(double param) {
    if (chisquared == null || chisquared.getDegreesOfFreedom() != param)
        chisquared = new ChiSquaredDistributionImpl(param);

    return chisquared;
}

From source file:de.dfki.madm.anomalydetection.operator.statistical_based.RobustPCAOperator.java

@Override
public void doWork() throws OperatorException {
    // check whether all attributes are numerical
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    Tools.onlyNonMissingValues(exampleSet, "PCA");
    Tools.onlyNumericalAttributes(exampleSet, "PCA");

    // Get normal probability.
    double normProb = getParameterAsDouble(PARAMETER_OUTLIER_PROBABILITY);
    int olInst = exampleSet.size() - (int) Math.floor(exampleSet.size() * normProb);
    log("Ignoring " + olInst + " anomalyous instances for robustness.");

    // The robust estimate is based on removing top outliers first based on Mahalanobis distance (MD).
    // Since MD is the same as the outlier score when using all PCs, the PCA is done twice:
    // First with all examples, second with top-outliers removed (robust)

    // First PCA for outlier removal
    // create covariance matrix
    Matrix covarianceMatrix = CovarianceMatrix.getCovarianceMatrix(exampleSet);

    // EigenVector and EigenValues of the covariance matrix
    EigenvalueDecomposition eigenvalueDecomposition = covarianceMatrix.eig();

    // create and deliver results
    double[] eigenvalues = eigenvalueDecomposition.getRealEigenvalues();
    Matrix eigenvectorMatrix = eigenvalueDecomposition.getV();
    double[][] eigenvectors = eigenvectorMatrix.getArray();

    PCAModel model = new PCAModel(exampleSet, eigenvalues, eigenvectors);

    // Perform transformation
    ExampleSet res = model.apply((ExampleSet) exampleSet.clone());

    // Compute simple list with MDs and sort according to MD.
    List<double[]> l = new LinkedList<double[]>();
    double eIdx = 0;
    for (Example example : res) {
        double md = 0.0;
        int aNr = 0;
        for (Attribute attr : example.getAttributes()) {
            double pcscore = example.getValue(attr);
            md += (pcscore * pcscore) / model.getEigenvalue(aNr);
            aNr++;/*from  w ww  .  j a v  a  2 s .  c o m*/
        }
        double[] x = { md, eIdx };
        l.add(x);
        eIdx++;
    }
    Collections.sort(l, new Comparator<double[]>() {
        public int compare(double[] first, double[] second) {
            return Double.compare(second[0], first[0]);
        }
    });
    // Out of the list, create array with outlier-indexes and array (mapping) with good instances. 
    Iterator<double[]> iter = l.iterator();
    int[] olMapping = new int[olInst];
    for (int i = 0; i < olInst; i++) {
        olMapping[i] = (int) ((double[]) iter.next())[1];
    }
    Arrays.sort(olMapping);
    int[] mapping = new int[exampleSet.size() - olInst];
    int olc = 0;
    int ctr = 0;
    for (int i = 0; i < exampleSet.size(); i++) {
        if (olc == olInst) { // Add last elements after last outlier
            mapping[ctr++] = i;
            continue;
        }
        if (olMapping[olc] != i) {
            mapping[ctr++] = i;
        } else {
            olc++;
        }
    }
    ExampleSet robustExampleSet = new MappedExampleSet(exampleSet, mapping); // creates a new example set without the top outliers.

    // ---
    // Second PCA (robust)
    covarianceMatrix = CovarianceMatrix.getCovarianceMatrix(robustExampleSet);
    eigenvalueDecomposition = covarianceMatrix.eig();

    // create and deliver results
    eigenvalues = eigenvalueDecomposition.getRealEigenvalues();
    eigenvectorMatrix = eigenvalueDecomposition.getV();
    eigenvectors = eigenvectorMatrix.getArray();

    // Apply on original set
    model = new PCAModel(exampleSet, eigenvalues, eigenvectors);

    // Perform transformation
    res = model.apply((ExampleSet) exampleSet.clone());

    // Sort eigenvalues
    Arrays.sort(eigenvalues);
    ArrayUtils.reverse(eigenvalues);

    // if necessary reduce nbr of dimensions ...
    int reductionType = getParameterAsInt(PARAMETER_REDUCTION_TYPE);
    List<Integer> pcList = new ArrayList<Integer>();
    if (reductionType == PCS_ALL) {
        for (int i = 0; i < exampleSet.getAttributes().size(); i++) {
            pcList.add(i);
        }
    }
    if (reductionType == PCS_TOP || reductionType == PCS_BOTH) {
        //top
        switch (getParameterAsInt(PARAMETER_TOP_METHODS)) {
        case PCS_TOP_FIX:
            for (int i = 0; i < getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS_TOP); i++) {
                pcList.add(i);
            }
            break;
        case PCS_TOP_VAR:
            double var = getParameterAsDouble(PARAMETER_VARIANCE_THRESHOLD);
            boolean last = false;
            for (int i = 0; i < exampleSet.getAttributes().size(); i++) {
                if (model.getCumulativeVariance(i) < var) {
                    pcList.add(i);
                } else if (!last) { // we need to add another PC to meet the minimum requirement.
                    last = true;
                    pcList.add(i);
                }
            }
            break;
        }
    }
    if (reductionType == PCS_LOWER || reductionType == PCS_BOTH) {
        //lower
        switch (getParameterAsInt(PARAMETER_LOW_METHODS)) {
        case PCS_LOW_FIX:
            for (int i = exampleSet.getAttributes().size()
                    - getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS_LOW); i < exampleSet.getAttributes()
                            .size(); i++) {
                pcList.add(i);
            }
            break;
        case PCS_LOW_VAL:
            double val = getParameterAsDouble(PARAMETER_VALUE_THRESHOLD);
            for (int i = 0; i < eigenvalues.length; i++) {
                if (eigenvalues[i] <= val) {
                    if (pcList.size() == 0) {
                        pcList.add(i);
                    } else if (pcList.get(pcList.size() - 1).intValue() < i) {
                        pcList.add(i);
                    }
                }
            }
            break;
        }
    }
    int[] opcs = ArrayUtils.toPrimitive(pcList.toArray(new Integer[pcList.size()]));

    if (opcs.length == 0) {
        throw new UserError(this,
                "Parameters thresholds are selected such that they did not match any principal component. Lower variance or increase eigenvalue threshold.");
    }
    if (opcs.length == exampleSet.getAttributes().size()) {
        log("Using all PCs for score.");
    } else {
        log("Using following PCs for score: " + Arrays.toString(opcs));
    }

    // Normalize by Chi-Dist with d degrees of freedom
    double scoreNormalizer = 1.0;
    ChiSquaredDistributionImpl chi = new ChiSquaredDistributionImpl(opcs.length);
    try {
        scoreNormalizer = chi.inverseCumulativeProbability(normProb);
    } catch (MathException e) {
        System.err.println(e);
    }
    log("Normalizing score with chi cumulative propability: " + scoreNormalizer);

    // compute scores
    Attribute scoreAttr = AttributeFactory.createAttribute("outlier", Ontology.REAL);
    exampleSet.getExampleTable().addAttribute(scoreAttr);
    exampleSet.getAttributes().setOutlier(scoreAttr);
    for (int exNr = 0; exNr < exampleSet.size(); exNr++) {
        Example orig = exampleSet.getExample(exNr);
        Example pc = res.getExample(exNr);
        double oscore = 0.0;
        int aNr = 0;
        ctr = 0;
        for (Attribute attr : pc.getAttributes()) {
            if (ctr < opcs.length && opcs[ctr] != aNr) { // we skip this dimension
                aNr++;
                continue;
            }
            double pcscore = pc.getValue(attr);
            oscore += (pcscore * pcscore) / model.getEigenvalue(aNr);
            aNr++;
            ctr++;
        }
        orig.setValue(scoreAttr, oscore / scoreNormalizer);
    }
    exampleSetOutput.deliver(exampleSet);
}

From source file:desmoj.core.statistic.Histogram.java

/**
 * Performs Pearson's Chi-square test on given frequencies, degrees of freedom and desired probability.
 * The frequencies are given in an array either including under- and overflow cells or not.
 * On error the Chi-squared test is not performed and false is returned.
 * Details on errors are given out in the error message log.
 * The result is true if the null hypothesis can not be rejected.
 *
 * @param values/* w w w.  ja v  a  2 s .c  om*/
 *          long[]: Array of assumed frequencies for each cell.
 * @param degFreedom
 *          int: Degrees of freedom of the test.
 * @param confidence
 *          double: (1-alpha) probability level.
 *
 * @return boolean : <code>true</code> if the the null hypothesis can not be rejected.
 *                   <code>false</code> on error or if the null hypothesis has to be rejected.
 */
public boolean chiSquareTest(long[] values, int degFreedom, double confidence) {

    // check if number of cells is valid
    if (!((values.length == this.getCells() + 2) || (values.length == this.getCells())
            || (values.length > 1))) {
        sendWarning("Attempt to perform a Chi-squared test on an invalid number of cells!  ",
                "chiSquareTest(long[], int, double) : ", "Too few or too many cells. ",
                "Make sure to have a valid number of cells " + "when calling the chiSquareTest() method.  ");
        return false;
    }

    // check if we have a reasonable amount of observations
    else if (this.getObservations() < 3) {
        sendWarning(
                "Attempt to perform a Chi-squared test on an insufficient data amount,  "
                        + "there are less than three observations!  ",
                "chiSquareTest(long[], int, double) : ", "Too few observations. ",
                "Make sure to have a sufficient amount of observations "
                        + "when calling the chiSquareTest() method.  ");
        return false;
    }

    // check if we have a reasonable probability
    else if (confidence < 0 || confidence > 1) {
        sendWarning("Attempt to perform a Chi-squared test with an illegal desired probability!  ",
                "chiSquareTest(long[], int, double) : ", "Illegal desired probability. ",
                "Make sure to have a valid desired probability "
                        + "when calling the chiSquareTest() method.  ");
        return false;
    }

    // check if degree of freedom is valid
    else if (degFreedom <= 0 || degFreedom >= values.length) {
        sendWarning("Attempt to perform a Chi-squared test with an illegal degree of freedom!  ",
                "chiSquareTest(long[], int, double) : ", "Illegal degree of freedom. ",
                "Make sure to have a valid degree of freedom " + "when calling the chiSquareTest() method.  ");
        return false;
    }

    else {
        long sumValuesObserved = 0;
        long sumValuesExpected = 0;

        // summarize expected values
        double[] expectedEntries = new double[values.length];
        for (double val : values) {
            if (val == 0) { // exit on 0 to avoid zero division
                sendWarning("Attempt to perform a Chi-squared test with an expected value of 0!  ",
                        "chiSquareTest(long[], int, double) : ", "Invalid expected value. ",
                        "Make sure to have a set of valid expected values "
                                + "when calling the chiSquareTest() method.  ");
                return false;
            } else {
                sumValuesExpected += val;
            }
        }
        // summarize observed values
        int cell;
        if (values.length == this.getCells()) { // without under- and overflow
            cell = 1;
        } else {
            cell = 0; // with under- and overflow
        }
        for (int i = cell; i < values.length + cell; i++) {
            sumValuesObserved += this.getObservationsInCell(i);
        }
        // expected frequency
        for (int i = 0; i < values.length; i++) {
            expectedEntries[i] = (double) (values[i]) / (double) (sumValuesExpected) * sumValuesObserved;
        }
        // calculation of chiSquared
        double testStat = 0;
        for (int i = 0; i < values.length; i++) {
            testStat += Math.pow((this.getObservationsInCell(cell)) - expectedEntries[i], 2)
                    / expectedEntries[i];
            cell++;
        }
        // chiSquared for degrees of freedom and  probability level
        ChiSquaredDistributionImpl chiSquared = new ChiSquaredDistributionImpl(degFreedom);

        // comparison
        boolean result = false;
        try {
            result = !(testStat > chiSquared.inverseCumulativeProbability(confidence));
        } catch (MathException e) {
            e.printStackTrace();
        }
        // trace not does not make much sense as the test is performed after the simulation has finished.
        // sendTraceNote("result of chi-squared test for " + this.getQuotedName() +  "is being returned. ");
        return result;
    }
}

From source file:org.apache.mahout.freqtermsets.fpgrowth.FPGrowth.java

/**
 * Internal TopKFrequentPattern Generation algorithm, which represents the
 * A's as integers and transforms features to use only integers
 * /*  w w  w. j ava2  s . c  o  m*/
 * @param transactions
 *          Transaction database Iterator
 * @param attributeFrequency
 *          array representing the Frequency of the corresponding
 *          attribute id
 * @param minSupport
 *          minimum support of the pattern to be mined
 * @param k
 *          Max value of the Size of the Max-Heap in which Patterns are
 *          held
 * @param featureSetSize
 *          number of features
 * @param returnFeatures
 *          the id's of the features for which Top K patterns have to be
 *          mined
 * @param topKPatternsOutputCollector
 *          the outputCollector which transforms the given Pattern in
 *          integer format to the corresponding A Format
 */
private void generateTopKFrequentPatterns(
        // Iterator<Pair<int[], Long>> transactions,
        TransactionTree cTree, OpenObjectIntHashMap<A> attributeIdMapping, long[] attributeFrequency,
        long minSupport, int k, int featureSetSize, Collection<Integer> returnFeatures,
        TopKPatternsOutputConverter<A> topKPatternsOutputCollector, StatusUpdater updater) throws IOException {
    // YA: BONSAAAAAAAII {
    // FPTree tree = new FPTree(featureSetSize);
    FPTree tree = null;
    boolean change = true;
    int pruneIters = 0;
    IntArrayList pruneByContingencyCount = new IntArrayList();
    IntArrayList pruneBySpreadCount = new IntArrayList();

    while (change) {
        pruneByContingencyCount.add(0);
        pruneBySpreadCount.add(0);

        change = false;
        tree = new FPTree(featureSetSize);
        OpenIntLongHashMap[] childJointFreq;
        long[] sumChildSupport;
        if (BONSAI_PRUNE) {
            childJointFreq = new OpenIntLongHashMap[featureSetSize];
            sumChildSupport = new long[featureSetSize];
        }
        double supportGrandTotal = 0;
        // } YA: BONSAAAAAAAII

        for (int i = 0; i < featureSetSize; i++) {
            tree.addHeaderCount(i, attributeFrequency[i]);

            // YA: BONSAAAAAAAII {
            if (attributeFrequency[i] < 0) {
                continue; // this is an attribute not satisfying the
                // monotone constraint
            }
            if (BONSAI_PRUNE) {
                childJointFreq[i] = new OpenIntLongHashMap();
                supportGrandTotal += attributeFrequency[i];
            }
            // } YA: Bonsai
        }

        // Constructing initial FPTree from the list of transactions
        // YA Bonsai : To pass the tree itself the iterator now would work
        // only with ints.. the A type argument is
        // not checked in the constructor. TOD: remove the type argument and
        // force using ints only
        Iterator<Pair<int[], Long>> transactions = new IntTransactionIterator(cTree.iterator(),
                attributeIdMapping);

        int nodecount = 0;
        // int attribcount = 0;
        int i = 0;
        while (transactions.hasNext()) {
            Pair<int[], Long> transaction = transactions.next();
            Arrays.sort(transaction.getFirst());
            // attribcount += transaction.length;
            // YA: Bonsai {
            // nodecount += treeAddCount(tree, transaction.getFirst(),
            // transaction.getSecond(), minSupport, attributeFrequency);
            int temp = FPTree.ROOTNODEID;
            boolean addCountMode = true;
            for (int attribute : transaction.getFirst()) {
                if (attributeFrequency[attribute] < 0) {
                    continue; // this is an attribute not satisfying the
                    // monotone constraint
                }
                if (attributeFrequency[attribute] < minSupport) {
                    break;
                }
                if (BONSAI_PRUNE && tree.attribute(temp) != -1) { // Root node
                    childJointFreq[tree.attribute(temp)].put(attribute,
                            childJointFreq[tree.attribute(temp)].get(attribute) + transaction.getSecond());
                    sumChildSupport[tree.attribute(temp)] += transaction.getSecond();
                }
                int child;
                if (addCountMode) {
                    child = tree.childWithAttribute(temp, attribute);
                    if (child == -1) {
                        addCountMode = false;
                    } else {
                        tree.addCount(child, transaction.getSecond());
                        temp = child;
                    }
                }
                if (!addCountMode) {
                    child = tree.createNode(temp, attribute, transaction.getSecond());
                    temp = child;
                    nodecount++;
                }
            }
            // } YA Bonsai
            i++;
            if (i % 10000 == 0) {
                log.info("FPTree Building: Read {} Transactions", i);
            }
        }

        log.info("Number of Nodes in the FP Tree: {}", nodecount);

        // YA: BONSAAAAAAAII {
        if (BONSAI_PRUNE) {
            if (log.isTraceEnabled())
                log.info("Bonsai prunining tree: {}", tree.toString());

            for (int a = 0; a < tree.getHeaderTableCount(); ++a) {
                int attr = tree.getAttributeAtIndex(a);

                if (attributeFrequency[attr] < 0) {
                    continue; // this is an attribute not satisfying the
                    // monotone constraint
                }
                if (attributeFrequency[attr] < minSupport) {
                    break;
                }
                // if (sumChildSupport[attr] < attributeFrequency[attr]) {
                // // the case of . (full stop) as the next child
                // childJointFreq[attr]
                // .put(-1,
                // (long) (attributeFrequency[attr] - sumChildSupport[attr]));
                // }
                float numChildren = childJointFreq[attr].size();

                // if (numChildren < LEAST_NUM_CHILDREN_TO_VOTE_FOR_NOISE) {
                // continue;
                // }
                if (log.isTraceEnabled()) {
                    log.trace("Voting for noisiness of attribute {} with number of children: {}", attr,
                            numChildren);
                    log.trace("Attribute support: {} - Total Children support: {}", attributeFrequency[attr],
                            sumChildSupport[attr]);
                }
                // EMD and the such.. the threshold isn't easy to define, and it
                // also doesn't take into account the weights of children.
                // // double uniformProb = 1.0 / numChildren;
                // // double uniformProb = sumChildSupport[attr] /
                // supportGrandTotal;
                // double uniformFreq = attributeFrequency[attr] / numChildren;
                // IntArrayList childAttrArr = childJointFreq[attr].keys();
                // // IntArrayList childAttrArr = new IntArrayList();
                // // childJointFreq[attr].keysSortedByValue(childAttrArr);
                // double totalDifference = 0;
                // double sumOfWeights = 0;
                // // double emd = 0;
                // for (int c = childAttrArr.size() - 1; c >=0 ; --c) {
                // int childAttr = childAttrArr.get(c);
                // double childJF = childJointFreq[attr].get(childAttr);
                // double childWeight = attributeFrequency[childAttr];
                // totalDifference += childWeight * Math.abs(childJF -
                // uniformFreq);
                // sumOfWeights += childWeight;
                //
                // // double jointProb = childJF /
                // // supportGrandTotal;
                // // double childProb = attributeFrequency[childAttr] /
                // // supportGrandTotal;
                // // double childConditional = childJF /
                // attributeFrequency[attr];
                // // emd = childConditional + emd - uniformProb;
                // // emd = childJF + emd - uniformFreq;
                // // totalDifference += Math.abs(emd);
                // }
                // // Probability (D > observed ) = QKS Ne + 0.12 + 0.11/ Ne D
                // // double pNotUniform = totalDifference / attrSupport;
                // // double threshold = (numChildren * (numChildren - 1) * 1.0)
                // // / (2.0 * attributeFrequency[attr]);
                // double weightedDiff = totalDifference / sumOfWeights;
                // double threshold = sumOfWeights / 2.0; // each child can be
                // up to
                // // 1 over or below the
                // // uniform freq
                // boolean noise = weightedDiff < threshold;
                // log.info("EMD: {} - Threshold: {}", weightedDiff, threshold);
                // ///////////////////////////////////
                // Log odds.. this is my hartala, and it needs ot be shifted
                // according to the number of children
                // // // if there is one child then the prob of random choice
                // // will be
                // // // 1, so anything would be
                // // // noise
                // // // and if there are few then the probability that this is
                // // // actually noise declines
                // // if (numChildren >= LEAST_NUM_CHILDREN_TO_VOTE_FOR_NOISE)
                // // {
                // // log.info(
                // //
                // "Voting for noisiness of attribute {} with number of children: {}",
                // // currentAttribute, numChildren);
                // // log.info(
                // // "Attribute support: {} - Total Children support: {}",
                // // attrSupport, sumOfChildSupport);
                // // int noiseVotes = 0;
                // // double randomSelectionLogOdds = 1.0 / numChildren;
                // // randomSelectionLogOdds = Math.log(randomSelectionLogOdds
                // // / (1 - randomSelectionLogOdds));
                // // randomSelectionLogOdds =
                // // Math.abs(randomSelectionLogOdds);
                // //
                // // IntArrayList childAttrArr = childJointFreq.keys();
                // // for (int c = 0; c < childAttrArr.size(); ++c) {
                // // double childConditional = 1.0
                // // * childJointFreq.get(childAttrArr.get(c))
                // // / sumOfChildSupport; // attrSupport;
                // // double childLogOdds = Math.log(childConditional
                // // / (1 - childConditional));
                // // if (Math.abs(childLogOdds) <= randomSelectionLogOdds) {
                // // // probability of the child given me is different
                // // // than
                // // // probability of choosing the
                // // // child randomly
                // // // from among my children.. using absolute log odds
                // // // because they are symmetric
                // // ++noiseVotes;
                // // }
                // // }
                // // log.info("Noisy if below: {} - Noise votes: {}",
                // // randomSelectionLogOdds, noiseVotes);
                // // noise = noiseVotes == numChildren;
                // ////////////////////////////////////////////////////

                // // Kullback-liebler divergence from the uniform distribution
                // double randomChild = 1.0 / numChildren;
                // IntArrayList childAttrArr = childJointFreq[attr].keys();
                //
                // double klDivergence = 0;
                // for (int c = 0; c < childAttrArr.size(); ++c) {
                // double childConditional = 1.0
                // * childJointFreq[attr].get(childAttrArr.get(c))
                // / attributeFrequency[attr];
                // if (childConditional == 0) {
                // continue; // a7a!
                // }
                // klDivergence += childConditional
                // * Math.log(childConditional / randomChild);
                // }
                //
                // boolean noise = Math.abs(klDivergence) < 0.05;
                // log.info("KL-Divergence: {} - Noise less than: {}",
                // klDivergence, 0.05);
                // //////////////////////////////////////
                // Pair wise metric with different children
                SummaryStatistics metricSummary = new SummaryStatistics();
                // double[] metric = new double[(int) numChildren];

                // SummaryStatistics spreadSummary = new SummaryStatistics();
                // double uniformSpread = attributeFrequency[attr] /
                // numChildren;
                double goodnessOfFit = 0.0;
                // If I don't take the . into account: sumChildSupport[attr] /
                // numChildren;

                double sumOfWeights = 0;
                IntArrayList childAttrArr = childJointFreq[attr].keys();
                for (int c = 0; c < childAttrArr.size(); ++c) {
                    int childAttr = childAttrArr.get(c);
                    double[][] contingencyTable = new double[2][2];
                    if (childAttr == -1) {
                        // this is meaningless, as yuleq will just be 1
                        contingencyTable[1][1] = childJointFreq[attr].get(childAttr);
                        contingencyTable[1][0] = sumChildSupport[attr];
                        // equals attributeFrequency[attr] -
                        // contingencyTable[1][1];
                        contingencyTable[0][1] = 0;
                        contingencyTable[0][0] = supportGrandTotal - attributeFrequency[attr];
                    } else {
                        contingencyTable[1][1] = childJointFreq[attr].get(childAttr);
                        contingencyTable[1][0] = attributeFrequency[attr] - contingencyTable[1][1];
                        contingencyTable[0][1] = attributeFrequency[childAttr] - contingencyTable[1][1];
                        contingencyTable[0][0] = supportGrandTotal - attributeFrequency[attr]
                                - attributeFrequency[childAttr] + contingencyTable[1][1];
                        // because of the meninglessness of yuleq in case of . }
                        double ad = contingencyTable[0][0] * contingencyTable[1][1];
                        double bc = contingencyTable[0][1] * contingencyTable[1][0];
                        double yuleq = (ad - bc) / (ad + bc);
                        double weight = attributeFrequency[childAttr];
                        sumOfWeights += weight;
                        metricSummary.addValue(Math.abs(yuleq * weight));
                        // metricSummary.addValue(yuleq * yuleq * weight);
                    }
                    // spreadSummary.addValue(Math.abs(uniformSpread
                    // - contingencyTable[1][1])
                    // / numChildren);
                    // spreadSummary.addValue(contingencyTable[1][1]); // *
                    // weight
                    goodnessOfFit += contingencyTable[1][1] * contingencyTable[1][1];
                }
                // double weightedquadraticMean =
                // Math.sqrt(metricSummary.getSum() / sumOfWeights);
                double weightedMean = (metricSummary.getSum() / sumOfWeights);

                boolean noise = false;
                // if (weightedMean < 0.5) {
                // pruneByContingencyCount.set(pruneIters, pruneByContingencyCount.get(pruneIters) + 1);
                // noise = true;
                // } else if (weightedMean < 0.95) {
                if (numChildren > 1) {
                    double n = sumChildSupport[attr]; // attributeFrequency[attr];
                    goodnessOfFit /= (n / numChildren);
                    goodnessOfFit -= n;
                    ChiSquaredDistributionImpl chisqDist = new ChiSquaredDistributionImpl(numChildren - 1);
                    double criticalPoint = -1;
                    try {
                        criticalPoint = chisqDist.inverseCumulativeProbability(1.0 - SIGNIFICANCE / 2.0);
                    } catch (MathException e) {
                        log.error(e.getMessage(), e);
                    }
                    if (goodnessOfFit < criticalPoint) {
                        pruneBySpreadCount.set(pruneIters, pruneBySpreadCount.get(pruneIters) + 1);
                        noise = true;
                    }
                    // // double spreadCentraltendency = (spreadSummary.getMax()
                    // -
                    // // spreadSummary.getMin()) / 2.0;
                    // // spreadSummary.getMean();
                    // // double uniformSpread = sumChildSupport[attr] /
                    // // numChildren;
                    //
                    // // noise = Math.abs(spreadCentraltendency -
                    // uniformSpread) <
                    // // 1e-4;
                    //
                    // double spreadCentraltendency = spreadSummary.getMean();
                    // // (spreadSummary.getMax() -
                    // // spreadSummary.getMin()) / 2.0;
                    // if(spreadCentraltendency < 1e-6){
                    // noise = true;
                    // }
                    //
                    // if (!noise && numChildren > 0) {
                    // // see if the difference is statitically significant
                    // double spreadCI = getConfidenceIntervalHalfWidth(
                    // spreadSummary, SIGNIFICANCE);
                    // spreadCentraltendency -= spreadCI;
                    // if (spreadCentraltendency < 0) {
                    // noise = true;
                    // }
                    // // // noise if the CI contains the uniform spread
                    // // threshold
                    // // if (spreadCentraltendency > uniformSpread) {
                    // // noise = (spreadCentraltendency - spreadCI) <
                    // // uniformSpread;
                    // // } else {
                    // // noise = (spreadCentraltendency + spreadCI) >
                    // // uniformSpread;
                    // // }
                    // }
                }
                change |= noise;

                if (noise) {
                    if (log.isTraceEnabled())
                        log.info("Pruning attribute {} with child joint freq {}", attr, childJointFreq[attr]);
                    returnFeatures.remove(attr);
                    attributeFrequency[attr] = -1;

                }
            }
        }
        ++pruneIters;
    }
    if (log.isTraceEnabled()) {
        log.info("Pruned tree: {}", tree.toString());
        log.info("Prune by contingency: {} - Prune by spread: {}", pruneByContingencyCount.toString(),
                pruneBySpreadCount.toString());
    }
    // } YA: Bonsai
    fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
}

From source file:org.caleydo.view.tourguide.impl.Statistics.java

public static double chiSquaredProbability(double x, int df) {
    // return weka.core.Statistics.chiSquaredProbability(x, df);
    ChiSquaredDistribution d;//from ww  w  .j a v a2  s . co m
    if (df == 1) {
        d = chiSquare1 != null ? chiSquare1.get() : null;
        if (d == null) {
            d = new ChiSquaredDistributionImpl(1);
            chiSquare1 = new SoftReference<>(d);
        }
    } else {
        d = new ChiSquaredDistributionImpl(df);
    }
    try {
        return 1.0 - d.cumulativeProbability(x);
    } catch (MathException e) {
        log.error("can't compute chiSquaredProbability of " + x + " with df: " + df, e);
    }
    return Float.NaN;
}