Example usage for org.apache.commons.math.stat.inference ChiSquareTestImpl ChiSquareTestImpl

List of usage examples for org.apache.commons.math.stat.inference ChiSquareTestImpl ChiSquareTestImpl

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.inference ChiSquareTestImpl ChiSquareTestImpl.

Prototype

public ChiSquareTestImpl() 

Source Link

Document

Construct a ChiSquareTestImpl

Usage

From source file:edu.cornell.med.icb.goby.algorithmic.algorithm.dmr.ChiSquareTestAdaptor.java

@Override
public double calculateNoCovariate(int... a) {
    final int cma = a[0];
    final int ca = a[1];
    final int cmb = a[2];
    final int cb = a[3];

    if (cma == 0 || ca == 0 || cmb == 0 || cb == 0) {
        setIgnorePair(true);//from  w w w  . ja  v a  2s  .  co  m
        return -StrictMath.log10(1.0);
    }
    expectedCounts[0] = cma;
    expectedCounts[1] = ca;
    observedCounts[0] = cmb;
    observedCounts[1] = cb;
    double pValue = Double.NaN;
    final ChiSquareTest chisquare = new ChiSquareTestImpl();
    try {
        final double pValueRaw = chisquare.chiSquareTest(expectedCounts, observedCounts);
        // math commons can return negative p-values?
        pValue = Math.abs(pValueRaw);
    } catch (MaxIterationsExceededException e) {

        LOG.error("expected:" + DoubleArrayList.wrap(expectedCounts).toString());
        LOG.error("observed:" + LongArrayList.wrap(observedCounts).toString());
        LOG.error(e);
        pValue = 1.0;
        setIgnorePair(true);
    } catch (MathException e) {
        e.printStackTrace();
        setIgnorePair(true);

    }

    setIgnorePair(false);
    return -StrictMath.log10(pValue);
}

From source file:edu.cornell.med.icb.goby.stats.ChiSquareTestCalculator.java

@Override
public DifferentialExpressionInfo evaluate(
        final DifferentialExpressionCalculator differentialExpressionCalculator,
        final NormalizationMethod method, final DifferentialExpressionResults results,
        final DifferentialExpressionInfo info, final String... group) {

    // expected counts in each group, assuming the counts for the DE are spread  among the groups according to sample
    // global count proportions
    final double[] expectedCounts = new double[group.length];

    //  counts observed in each group:
    final long[] observedCounts = new long[group.length];
    final double[] groupProportions = new double[group.length];

    final int chiSquarePValuesStatIndex = defineStatisticId(results, "chi-square-test", method, group);

    int i = 0;// w w  w  .  ja v a 2s  . com

    double pValue = 1;
    // estimate the sumOfCountsForDE of counts over all the samples included in any group compared.
    final long sumOfCountsForDE = 0;
    int numSamples = 0;
    double sumObservedCounts = 0;

    for (final String oneGroupId : group) {
        final ObjectArraySet<String> samplesForGroup = differentialExpressionCalculator.getSamples(oneGroupId);

        for (final String sample : samplesForGroup) {
            final long observedCount = differentialExpressionCalculator.getOverlapCount(sample,
                    info.getElementId());
            final double sampleProportion = differentialExpressionCalculator.getSampleProportion(sample);
            observedCounts[i] += observedCount;
            groupProportions[i] += sampleProportion;
            sumObservedCounts += observedCount;
            numSamples++;
        }
        if (observedCounts[i] == 0) {
            // Chi Square is not defined if any observed counts are zero.
            info.statistics.size(results.getNumberOfStatistics());
            info.statistics.set(chiSquarePValuesStatIndex, Double.NaN);
            return info;
        }
        ++i;
    }

    i = 0;
    final double nGroups = group.length;
    for (int groupIndex = 0; groupIndex < nGroups; groupIndex++) {
        expectedCounts[groupIndex] += groupProportions[groupIndex] * sumObservedCounts;
    }

    final ChiSquareTest chisquare = new ChiSquareTestImpl();

    try {
        final double pValueRaw = chisquare.chiSquareTest(expectedCounts, observedCounts);
        // math commons can return negative p-values?
        pValue = Math.abs(pValueRaw);
    } catch (MaxIterationsExceededException e) {
        LOG.error("elementId:" + info.getElementId());
        LOG.error("expected:" + DoubleArrayList.wrap(expectedCounts).toString());
        LOG.error("observed:" + LongArrayList.wrap(observedCounts).toString());
        LOG.error(e);
        pValue = 1;
    } catch (MathException e) {
        e.printStackTrace();
    }

    info.statistics.size(results.getNumberOfStatistics());
    info.statistics.set(chiSquarePValuesStatIndex, pValue);
    return info;
}

From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java

@Test
public void testFisher() throws MathException {
    final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator();
    final int numReplicates = 2;
    deCalc.defineElement("id-1");
    deCalc.defineElement("id-2");
    deCalc.defineGroup("A");
    deCalc.defineGroup("B");
    deCalc.reserve(2, numReplicates * 2);

    for (int i = 1; i <= numReplicates; i++) {
        deCalc.associateSampleToGroup("A-" + i, "A");
        deCalc.associateSampleToGroup("B-" + i, "B");
    }/*from  w  ww  . j  av a 2 s  .co  m*/

    /**
     * Encode the following table in two genes:
     Fisher's Exact Test
     http://www.langsrud.com/fisher.htm
     ------------------------------------------
     TABLE = [ 10 , 20 , 30 , 40 ]
     Left   : p-value = 0.2533310713617698
     Right  : p-value = 0.8676419647894328
     2-Tail : p-value = 0.5044757698516504
     ------------------------------------------
     */
    deCalc.observe("A-1", "id-1", 7);
    deCalc.observe("A-2", "id-1", 3); // 7+3 = 10
    deCalc.observe("B-1", "id-1", 15);
    deCalc.observe("B-2", "id-1", 5); // 15+5 =20

    deCalc.observe("A-1", "id-2", 15);
    deCalc.observe("A-2", "id-2", 15); // 15+15=30
    deCalc.observe("B-1", "id-2", 20);
    deCalc.observe("B-2", "id-2", 20); // 20+20=40

    final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1");
    final DifferentialExpressionResults results = new DifferentialExpressionResults();
    final FisherExactTestCalculator fisher = new FisherExactTestCalculator(results);
    final NormalizationMethod normalizationMethod = new AlignedCountNormalization();
    fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B");
    assertEquals("fisher test equal expected result", 0.5044757698516504,
            results.getStatistic(info, fisher.statisticIds.get(0)), 0.001);

    final Fisher fisherTest = new Fisher();
    final int totalCountInA = 1700;
    final int totalCountInB = 170; // equal total in each group
    final int sumCountInA = 90;
    final int sumCountInB = 45; // half the counts in sample B

    fisherTest.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB);

    final double pValue = fisherTest.getTwotail();
    final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB));
    final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB));
    final ChiSquareTest chisquare = new ChiSquareTestImpl();
    final double nGroups = 2;
    final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups,
            divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups };
    final long[] observed = { sumCountInA, sumCountInB };
    final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed));

    assertTrue("pValue: " + chiPValue, chiPValue < 0.001);
    // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result
    // (results should be comparable since the counts in each cell are large)
    //         assertTrue("pValue: " + pValue, pValue < 0.001);
}

From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java

@Test
public void testFisherExact() throws MathException {
    final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator();
    final int numReplicates = 2;
    deCalc.defineElement("id-1");
    deCalc.defineElement("id-2");
    deCalc.defineGroup("A");
    deCalc.defineGroup("B");
    deCalc.reserve(2, numReplicates * 2);

    for (int i = 1; i <= numReplicates; i++) {
        deCalc.associateSampleToGroup("A-" + i, "A");
        deCalc.associateSampleToGroup("B-" + i, "B");
    }//from   ww w. jav a2s.  c  o m

    /**
     * Encode the following table in two genes:
     Fisher's Exact Test
     http://www.langsrud.com/fisher.htm
     ------------------------------------------
     TABLE = [ 10 , 20 , 30 , 40 ]
     Left   : p-value = 0.2533310713617698
     Right  : p-value = 0.8676419647894328
     2-Tail : p-value = 0.5044757698516504
     ------------------------------------------
     */
    deCalc.observe("A-1", "id-1", 7);
    deCalc.observe("A-2", "id-1", 3); // 7+3 = 10
    deCalc.observe("B-1", "id-1", 15);
    deCalc.observe("B-2", "id-1", 5); // 15+5 =20

    deCalc.observe("A-1", "id-2", 15);
    deCalc.observe("A-2", "id-2", 15); // 15+15=30
    deCalc.observe("B-1", "id-2", 20);
    deCalc.observe("B-2", "id-2", 20); // 20+20=40

    final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1");
    final DifferentialExpressionResults results = new DifferentialExpressionResults();

    final FisherExactRCalculator fisher = new FisherExactRCalculator(results);
    if (fisher.installed()) {
        final NormalizationMethod normalizationMethod = new AlignedCountNormalization();
        fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B");
        assertEquals("fisher test equal expected result", 0.5044757698516504,
                results.getStatistic(info, fisher.statisticIds.get(0)), 0.001);

        final int totalCountInA = 1700;
        final int totalCountInB = 170; // equal total in each group
        final int sumCountInA = 90;
        final int sumCountInB = 45; // half the counts in sample B

        final int sumCountNotInA = totalCountInA - sumCountInA;
        final int sumCountNotInB = totalCountInB - sumCountInB;

        final FisherExact.Result result = FisherExact.fexact(sumCountInA, sumCountNotInA, sumCountInB,
                sumCountNotInB);
        final double pValue = result.getPValue();

        final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB));
        final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB));
        final ChiSquareTest chisquare = new ChiSquareTestImpl();
        final double nGroups = 2;
        final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups,
                divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups };
        final long[] observed = { sumCountInA, sumCountInB };
        final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed));

        assertTrue("pValue: " + chiPValue, chiPValue < 0.001);
        // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to
        // the chi-square result
        // (results should be comparable since the counts in each cell are large)
        assertTrue("pValue: " + pValue, pValue < 0.001);
    }
}

From source file:edu.cornell.med.icb.goby.stats.TestStatistics.java

@Test
public void testChiSquare() throws MathException {

    final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator();
    final int numReplicates = 2;
    deCalc.defineElement("id-1");
    deCalc.defineElement("id-2");
    deCalc.defineGroup("A");
    deCalc.defineGroup("B");
    deCalc.reserve(2, numReplicates * 2);

    for (int i = 1; i <= numReplicates; i++) {
        deCalc.associateSampleToGroup("A-" + i, "A");
        deCalc.associateSampleToGroup("B-" + i, "B");
    }/*ww  w .j  av  a 2 s . c om*/

    deCalc.observe("A-1", "id-1", 7);
    deCalc.observe("A-2", "id-1", 3); // 7+3 = 10
    deCalc.observe("B-1", "id-1", 15);
    deCalc.observe("B-2", "id-1", 5); // 15+5 =20

    deCalc.observe("A-1", "id-2", 15);
    deCalc.observe("A-2", "id-2", 15); // 15+15=30
    deCalc.observe("B-1", "id-2", 20);
    deCalc.observe("B-2", "id-2", 20); // 20+20=40

    final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1");
    final DifferentialExpressionResults results = new DifferentialExpressionResults();

    final ChiSquareTestCalculator calc = new ChiSquareTestCalculator(results);
    final NormalizationMethod normalizationMethod = new AlignedCountNormalization();
    calc.evaluate(deCalc, normalizationMethod, results, info, "A", "B");
    assertEquals("chi square test equal expected result", 0.456056540250256,
            results.getStatistic(info, calc.statisticIds.get(0)), 0.001);

    final ChiSquareTest chisquare = new ChiSquareTestImpl();
    final double[] expected = { 30, 12 };
    final long[] observed = { 0, 100 };
    final double chiPValue = chisquare.chiSquareTest(expected, observed);

    assertTrue("pValue: " + chiPValue, chiPValue < 0.001);
    // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result
    // (results should be comparable since the counts in each cell are large)
    //         assertTrue("pValue: " + pValue, pValue < 0.001);
}

From source file:org.broadinstitute.gatk.engine.recalibration.RecalDatumNode.java

/**
 * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node.
 *
 * The chi^2 value indicates the degree of independence of the implied error rates among the
 * immediate subnodes//w ww  .j  a va  2s. co m
 *
 * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated
 */
private double calcPenalty() {
    if (isLeaf() || freeToMerge())
        return 0.0;
    else if (subnodes.size() == 1)
        // only one value, so its free to merge away
        return 0.0;
    else {
        final long[][] counts = new long[subnodes.size()][2];

        int i = 0;
        for (final RecalDatumNode<T> subnode : subnodes) {
            // use the yates correction to help avoid all zeros => NaN
            counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
            counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
            i++;
        }

        try {
            final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts);
            final double penalty = -10.0 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE));

            // make sure things are reasonable and fail early if not
            if (Double.isInfinite(penalty) || Double.isNaN(penalty))
                throw new ReviewedGATKException("chi2 value is " + chi2PValue + " at " + getRecalDatum());

            return penalty;
        } catch (MathException e) {
            throw new ReviewedGATKException("Failed in calculating chi2 value", e);
        }
    }
}

From source file:org.broadinstitute.sting.utils.recalibration.RecalDatumNode.java

/**
 * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node.
 *
 * The chi^2 value indicates the degree of independence of the implied error rates among the
 * immediate subnodes/*from  w  w  w.  ja va  2 s.  c o m*/
 *
 * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated
 */
private double calcPenalty() {
    if (isLeaf() || freeToMerge())
        return 0.0;
    else if (subnodes.size() == 1)
        // only one value, so its free to merge away
        return 0.0;
    else {
        final long[][] counts = new long[subnodes.size()][2];

        int i = 0;
        for (final RecalDatumNode<T> subnode : subnodes) {
            // use the yates correction to help avoid all zeros => NaN
            counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
            counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
            i++;
        }

        try {
            final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts);
            final double penalty = -10.0 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE));

            // make sure things are reasonable and fail early if not
            if (Double.isInfinite(penalty) || Double.isNaN(penalty))
                throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum());

            return penalty;
        } catch (MathException e) {
            throw new ReviewedStingException("Failed in calculating chi2 value", e);
        }
    }
}

From source file:org.rascalmpl.library.analysis.statistics.Inferences.java

public IValue chiSquare(IList dataValues) {
    makeChi(dataValues);
    return values.real(new ChiSquareTestImpl().chiSquare(expected, observed));
}

From source file:org.rascalmpl.library.analysis.statistics.Inferences.java

public IValue chiSquareTest(IList dataValues) {
    makeChi(dataValues);/*from  w  w  w  . j av  a2s  .co m*/
    try {
        return values.real(new ChiSquareTestImpl().chiSquareTest(expected, observed));
    } catch (IllegalArgumentException e) {
        throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage());
    } catch (MathException e) {
        throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage());
    }
}

From source file:org.rascalmpl.library.analysis.statistics.Inferences.java

public IValue chiSquareTest(IList dataValues, IReal alpha) {
    makeChi(dataValues);/*www . j a  v a  2  s. c o  m*/
    try {
        return values.bool(new ChiSquareTestImpl().chiSquareTest(expected, observed, alpha.doubleValue()));
    } catch (IllegalArgumentException e) {
        throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage());
    } catch (MathException e) {
        throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, e.getMessage());
    }
}