List of usage examples for org.apache.commons.math.stat.inference ChiSquareTestImpl ChiSquareTestImpl
public ChiSquareTestImpl()
From source file:edu.cornell.med.icb.goby.algorithmic.algorithm.dmr.ChiSquareTestAdaptor.java
@Override public double calculateNoCovariate(int... a) { final int cma = a[0]; final int ca = a[1]; final int cmb = a[2]; final int cb = a[3]; if (cma == 0 || ca == 0 || cmb == 0 || cb == 0) { setIgnorePair(true);//from w w w . ja v a 2s . co m return -StrictMath.log10(1.0); } expectedCounts[0] = cma; expectedCounts[1] = ca; observedCounts[0] = cmb; observedCounts[1] = cb; double pValue = Double.NaN; final ChiSquareTest chisquare = new ChiSquareTestImpl(); try { final double pValueRaw = chisquare.chiSquareTest(expectedCounts, observedCounts); // math commons can return negative p-values? pValue = Math.abs(pValueRaw); } catch (MaxIterationsExceededException e) { LOG.error("expected:" + DoubleArrayList.wrap(expectedCounts).toString()); LOG.error("observed:" + LongArrayList.wrap(observedCounts).toString()); LOG.error(e); pValue = 1.0; setIgnorePair(true); } catch (MathException e) { e.printStackTrace(); setIgnorePair(true); } setIgnorePair(false); return -StrictMath.log10(pValue); }
From source file:edu.cornell.med.icb.goby.stats.ChiSquareTestCalculator.java
@Override public DifferentialExpressionInfo evaluate( final DifferentialExpressionCalculator differentialExpressionCalculator, final NormalizationMethod method, final DifferentialExpressionResults results, final DifferentialExpressionInfo info, final String... group) { // expected counts in each group, assuming the counts for the DE are spread among the groups according to sample // global count proportions final double[] expectedCounts = new double[group.length]; // counts observed in each group: final long[] observedCounts = new long[group.length]; final double[] groupProportions = new double[group.length]; final int chiSquarePValuesStatIndex = defineStatisticId(results, "chi-square-test", method, group); int i = 0;// w w w . ja v a 2s . com double pValue = 1; // estimate the sumOfCountsForDE of counts over all the samples included in any group compared. final long sumOfCountsForDE = 0; int numSamples = 0; double sumObservedCounts = 0; for (final String oneGroupId : group) { final ObjectArraySet<String> samplesForGroup = differentialExpressionCalculator.getSamples(oneGroupId); for (final String sample : samplesForGroup) { final long observedCount = differentialExpressionCalculator.getOverlapCount(sample, info.getElementId()); final double sampleProportion = differentialExpressionCalculator.getSampleProportion(sample); observedCounts[i] += observedCount; groupProportions[i] += sampleProportion; sumObservedCounts += observedCount; numSamples++; } if (observedCounts[i] == 0) { // Chi Square is not defined if any observed counts are zero. info.statistics.size(results.getNumberOfStatistics()); info.statistics.set(chiSquarePValuesStatIndex, Double.NaN); return info; } ++i; } i = 0; final double nGroups = group.length; for (int groupIndex = 0; groupIndex < nGroups; groupIndex++) { expectedCounts[groupIndex] += groupProportions[groupIndex] * sumObservedCounts; } final ChiSquareTest chisquare = new ChiSquareTestImpl(); try { final double pValueRaw = chisquare.chiSquareTest(expectedCounts, observedCounts); // math commons can return negative p-values? pValue = Math.abs(pValueRaw); } catch (MaxIterationsExceededException e) { LOG.error("elementId:" + info.getElementId()); LOG.error("expected:" + DoubleArrayList.wrap(expectedCounts).toString()); LOG.error("observed:" + LongArrayList.wrap(observedCounts).toString()); LOG.error(e); pValue = 1; } catch (MathException e) { e.printStackTrace(); } info.statistics.size(results.getNumberOfStatistics()); info.statistics.set(chiSquarePValuesStatIndex, pValue); return info; }
From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java
@Test public void testFisher() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); }/*from w ww . j av a 2 s .co m*/ /** * Encode the following table in two genes: Fisher's Exact Test http://www.langsrud.com/fisher.htm ------------------------------------------ TABLE = [ 10 , 20 , 30 , 40 ] Left : p-value = 0.2533310713617698 Right : p-value = 0.8676419647894328 2-Tail : p-value = 0.5044757698516504 ------------------------------------------ */ deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FisherExactTestCalculator fisher = new FisherExactTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001); final Fisher fisherTest = new Fisher(); final int totalCountInA = 1700; final int totalCountInB = 170; // equal total in each group final int sumCountInA = 90; final int sumCountInB = 45; // half the counts in sample B fisherTest.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB); final double pValue = fisherTest.getTwotail(); final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB)); final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB)); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double nGroups = 2; final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups, divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups }; final long[] observed = { sumCountInA, sumCountInB }; final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed)); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result // (results should be comparable since the counts in each cell are large) // assertTrue("pValue: " + pValue, pValue < 0.001); }
From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java
@Test public void testFisherExact() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); }//from ww w. jav a2s. c o m /** * Encode the following table in two genes: Fisher's Exact Test http://www.langsrud.com/fisher.htm ------------------------------------------ TABLE = [ 10 , 20 , 30 , 40 ] Left : p-value = 0.2533310713617698 Right : p-value = 0.8676419647894328 2-Tail : p-value = 0.5044757698516504 ------------------------------------------ */ deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FisherExactRCalculator fisher = new FisherExactRCalculator(results); if (fisher.installed()) { final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001); final int totalCountInA = 1700; final int totalCountInB = 170; // equal total in each group final int sumCountInA = 90; final int sumCountInB = 45; // half the counts in sample B final int sumCountNotInA = totalCountInA - sumCountInA; final int sumCountNotInB = totalCountInB - sumCountInB; final FisherExact.Result result = FisherExact.fexact(sumCountInA, sumCountNotInA, sumCountInB, sumCountNotInB); final double pValue = result.getPValue(); final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB)); final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB)); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double nGroups = 2; final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups, divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups }; final long[] observed = { sumCountInA, sumCountInB }; final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed)); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to // the chi-square result // (results should be comparable since the counts in each cell are large) assertTrue("pValue: " + pValue, pValue < 0.001); } }
From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java
@Test public void testChiSquare() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); }/*ww w .j av a 2 s . c om*/ deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final ChiSquareTestCalculator calc = new ChiSquareTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); calc.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("chi square test equal expected result", 0.456056540250256, results.getStatistic(info, calc.statisticIds.get(0)), 0.001); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double[] expected = { 30, 12 }; final long[] observed = { 0, 100 }; final double chiPValue = chisquare.chiSquareTest(expected, observed); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result // (results should be comparable since the counts in each cell are large) // assertTrue("pValue: " + pValue, pValue < 0.001); }
From source file:org.broadinstitute.gatk.engine.recalibration.RecalDatumNode.java
/** * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node. * * The chi^2 value indicates the degree of independence of the implied error rates among the * immediate subnodes//w ww .j a va 2s. co m * * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { if (isLeaf() || freeToMerge()) return 0.0; else if (subnodes.size() == 1) // only one value, so its free to merge away return 0.0; else { final long[][] counts = new long[subnodes.size()][2]; int i = 0; for (final RecalDatumNode<T> subnode : subnodes) { // use the yates correction to help avoid all zeros => NaN counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L; counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L; i++; } try { final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts); final double penalty = -10.0 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE)); // make sure things are reasonable and fail early if not if (Double.isInfinite(penalty) || Double.isNaN(penalty)) throw new ReviewedGATKException("chi2 value is " + chi2PValue + " at " + getRecalDatum()); return penalty; } catch (MathException e) { throw new ReviewedGATKException("Failed in calculating chi2 value", e); } } }
From source file:org.broadinstitute.sting.utils.recalibration.RecalDatumNode.java
/** * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node. * * The chi^2 value indicates the degree of independence of the implied error rates among the * immediate subnodes/*from w w w. ja va 2 s. c o m*/ * * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { if (isLeaf() || freeToMerge()) return 0.0; else if (subnodes.size() == 1) // only one value, so its free to merge away return 0.0; else { final long[][] counts = new long[subnodes.size()][2]; int i = 0; for (final RecalDatumNode<T> subnode : subnodes) { // use the yates correction to help avoid all zeros => NaN counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L; counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L; i++; } try { final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts); final double penalty = -10.0 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE)); // make sure things are reasonable and fail early if not if (Double.isInfinite(penalty) || Double.isNaN(penalty)) throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum()); return penalty; } catch (MathException e) { throw new ReviewedStingException("Failed in calculating chi2 value", e); } } }
From source file:org.rascalmpl.library.analysis.statistics.Inferences.java
public IValue chiSquare(IList dataValues) { makeChi(dataValues); return values.real(new ChiSquareTestImpl().chiSquare(expected, observed)); }
From source file:org.rascalmpl.library.analysis.statistics.Inferences.java
public IValue chiSquareTest(IList dataValues) { makeChi(dataValues);/*from w w w . j av a2s .co m*/ try { return values.real(new ChiSquareTestImpl().chiSquareTest(expected, observed)); } catch (IllegalArgumentException e) { throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage()); } catch (MathException e) { throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage()); } }
From source file:org.rascalmpl.library.analysis.statistics.Inferences.java
public IValue chiSquareTest(IList dataValues, IReal alpha) { makeChi(dataValues);/*www . j a v a 2 s. c o m*/ try { return values.bool(new ChiSquareTestImpl().chiSquareTest(expected, observed, alpha.doubleValue())); } catch (IllegalArgumentException e) { throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage()); } catch (MathException e) { throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage()); } }