Example usage for org.apache.commons.math3.distribution HypergeometricDistribution probability

List of usage examples for org.apache.commons.math3.distribution HypergeometricDistribution probability

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution HypergeometricDistribution probability.

Prototype

public double probability(int x) 

Source Link

Usage

From source file:org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils.java

/**
 * Computes a two-sided p-Value for a Fisher's exact test on the contingency table, after normalizing counts so that the sum does not exceed {@value org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils#TARGET_TABLE_SIZE}
 * @param originalTable/*  w  w w  .  j ava  2  s . co  m*/
 * @return
 */
public static Double FisherExactPValueForContingencyTable(int[][] originalTable) {
    final int[][] normalizedTable = normalizeContingencyTable(originalTable);

    int[][] table = copyContingencyTable(normalizedTable);

    int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
    int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
    int N = rowSums[0] + rowSums[1];
    int sampleSize = colSums[0];
    int numberOfNonSuccesses = rowSums[1];
    int numberOfSuccesses = rowSums[0];
    /*
     * The lowest possible number of successes you can sample is what's left of your sample size after
     * drawing every non success in the urn. If the number of non successes in the urn is greater than the sample
     * size then the minimum number of drawn successes is 0.
     */
    int lo = Math.max(0, sampleSize - numberOfNonSuccesses);
    /*
     * The highest possible number of successes you can draw is either the total sample size or the number of
     * successes in the urn. (Whichever is smaller)
     */
    int hi = Math.min(sampleSize, numberOfSuccesses);

    /**
     * If the support of the distribution is only one value, creating the HypergeometricDistribution
     * doesn't make sense. There would be only one possible observation so the p-value has to be 1.
     */
    if (lo == hi) {
        return 1.0;
    }

    /**
     * For the hypergeometric distribution from which to calculate the probabilities:
     * The population (N = a+b+c+d) is the sum of all four numbers in the contingency table. Then the number of
     * "successes" (K = a+b) is the sum of the top row, and the sample size (n = a+c) is the sum of the first column.
     */
    final HypergeometricDistribution dist = new HypergeometricDistribution(N, numberOfSuccesses, sampleSize);

    //Then we determine a given probability with the sampled successes (k = a) from the first entry in the table.
    double pCutoff = dist.probability(table[0][0]);

    double pValue = 0.0;
    /**
     * Now cycle through all possible k's and add those whose probabilities are smaller than our observed k
     * to the p-value, since this is a two-sided test
     */
    for (int i = lo; i <= hi; i++) {
        double pValuePiece = dist.probability(i);

        if (pValuePiece <= pCutoff) {
            pValue += pValuePiece;
        }
    }

    // min is necessary as numerical precision can result in pValue being slightly greater than 1.0
    return Math.min(pValue, 1.0);
}

From source file:org.deidentifier.arx.risk.ModelZayatz.java

/**
 * Estimates the probability that an equivalence class of size 1 in the
 * sample was chosen from an equivalence class of size 1 in the population
 * /*from w ww .ja  va  2  s  . c  o  m*/
 * @param classes
 * @param populationSize
 * @param sampleSize
 * @param numClasses
 * @return
 */
private double computeConditionalUniqueness(int[] classes, double populationSize, double sampleSize,
        double numClasses) {

    int numClassesOfSize1 = classes[0] == 1 ? classes[1] : 0;
    double temp = 0;

    int param1 = (int) populationSize;
    if (populationSize > Integer.MAX_VALUE) {
        param1 = Integer.MAX_VALUE; // TODO: This is an error: overflow
    }
    int param2 = (int) sampleSize;
    if (sampleSize > Integer.MAX_VALUE) {
        param2 = Integer.MAX_VALUE; // TODO: This is an error: overflow
    }
    for (int i = 0; i < classes.length; i += 2) {
        int size = classes[i];
        int count = classes[i + 1];

        HypergeometricDistribution distribution = new HypergeometricDistribution(param1, size, param2);
        temp += (count / ((double) numClasses)) * distribution.probability(1);
        checkInterrupt();
    }

    HypergeometricDistribution distribution = new HypergeometricDistribution(param1, 1, param2);
    return (((double) numClassesOfSize1 / ((double) numClasses)) * (distribution.probability(1))) / temp;
}