List of usage examples for org.apache.commons.math3.distribution HypergeometricDistribution probability
public double probability(int x)
From source file:org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils.java
/** * Computes a two-sided p-Value for a Fisher's exact test on the contingency table, after normalizing counts so that the sum does not exceed {@value org.broadinstitute.gatk.tools.walkers.annotator.StrandBiasTableUtils#TARGET_TABLE_SIZE} * @param originalTable/* w w w . j ava 2 s . co m*/ * @return */ public static Double FisherExactPValueForContingencyTable(int[][] originalTable) { final int[][] normalizedTable = normalizeContingencyTable(originalTable); int[][] table = copyContingencyTable(normalizedTable); int[] rowSums = { sumRow(table, 0), sumRow(table, 1) }; int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) }; int N = rowSums[0] + rowSums[1]; int sampleSize = colSums[0]; int numberOfNonSuccesses = rowSums[1]; int numberOfSuccesses = rowSums[0]; /* * The lowest possible number of successes you can sample is what's left of your sample size after * drawing every non success in the urn. If the number of non successes in the urn is greater than the sample * size then the minimum number of drawn successes is 0. */ int lo = Math.max(0, sampleSize - numberOfNonSuccesses); /* * The highest possible number of successes you can draw is either the total sample size or the number of * successes in the urn. (Whichever is smaller) */ int hi = Math.min(sampleSize, numberOfSuccesses); /** * If the support of the distribution is only one value, creating the HypergeometricDistribution * doesn't make sense. There would be only one possible observation so the p-value has to be 1. */ if (lo == hi) { return 1.0; } /** * For the hypergeometric distribution from which to calculate the probabilities: * The population (N = a+b+c+d) is the sum of all four numbers in the contingency table. Then the number of * "successes" (K = a+b) is the sum of the top row, and the sample size (n = a+c) is the sum of the first column. */ final HypergeometricDistribution dist = new HypergeometricDistribution(N, numberOfSuccesses, sampleSize); //Then we determine a given probability with the sampled successes (k = a) from the first entry in the table. double pCutoff = dist.probability(table[0][0]); double pValue = 0.0; /** * Now cycle through all possible k's and add those whose probabilities are smaller than our observed k * to the p-value, since this is a two-sided test */ for (int i = lo; i <= hi; i++) { double pValuePiece = dist.probability(i); if (pValuePiece <= pCutoff) { pValue += pValuePiece; } } // min is necessary as numerical precision can result in pValue being slightly greater than 1.0 return Math.min(pValue, 1.0); }
From source file:org.deidentifier.arx.risk.ModelZayatz.java
/** * Estimates the probability that an equivalence class of size 1 in the * sample was chosen from an equivalence class of size 1 in the population * /*from w ww .ja va 2 s . c o m*/ * @param classes * @param populationSize * @param sampleSize * @param numClasses * @return */ private double computeConditionalUniqueness(int[] classes, double populationSize, double sampleSize, double numClasses) { int numClassesOfSize1 = classes[0] == 1 ? classes[1] : 0; double temp = 0; int param1 = (int) populationSize; if (populationSize > Integer.MAX_VALUE) { param1 = Integer.MAX_VALUE; // TODO: This is an error: overflow } int param2 = (int) sampleSize; if (sampleSize > Integer.MAX_VALUE) { param2 = Integer.MAX_VALUE; // TODO: This is an error: overflow } for (int i = 0; i < classes.length; i += 2) { int size = classes[i]; int count = classes[i + 1]; HypergeometricDistribution distribution = new HypergeometricDistribution(param1, size, param2); temp += (count / ((double) numClasses)) * distribution.probability(1); checkInterrupt(); } HypergeometricDistribution distribution = new HypergeometricDistribution(param1, 1, param2); return (((double) numClassesOfSize1 / ((double) numClasses)) * (distribution.probability(1))) / temp; }