Example usage for org.apache.commons.math3.stat.regression SimpleRegression getR

List of usage examples for org.apache.commons.math3.stat.regression SimpleRegression getR

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression SimpleRegression getR.

Prototype

public double getR() 

Source Link

Document

Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html"> Pearson's product moment correlation coefficient</a>, usually denoted r.

Usage

From source file:nl.systemsgenetics.functionenrichmentoftransqtls.CorrelateSumChi2ToPathways.java

/**
 * @param args the command line arguments
 *//*from w  ww . jav  a2  s  . c  o  m*/
public static void main(String[] args) throws IOException {

    final File pathwayMatrixFile = new File(args[0]);
    final File significantTermsFile = new File(args[1]);
    final File sumChi2MatrixFile = new File(args[2]);
    final File transQtlEnrichmentsMatrixFile = new File(args[3]);

    System.out.println("Pathway file: " + pathwayMatrixFile.getPath());
    System.out.println("Pathway significant terms file: " + significantTermsFile.getPath());
    System.out.println("SumChi2 file: " + sumChi2MatrixFile.getPath());
    System.out.println("Output file: " + transQtlEnrichmentsMatrixFile.getPath());

    LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile);

    DoubleMatrixDataset<String, String> pathwayMatrix = DoubleMatrixDataset
            .loadDoubleData(pathwayMatrixFile.getPath());
    DoubleMatrixDataset<String, String> sumChi2Matrix = DoubleMatrixDataset
            .loadDoubleData(sumChi2MatrixFile.getPath());

    LinkedHashSet<String> genesInBoth = new LinkedHashSet<String>();

    for (String gene : pathwayMatrix.getHashRows().keySet()) {
        if (sumChi2Matrix.containsRow(gene)) {
            genesInBoth.add(gene);
        }
    }

    pathwayMatrix = pathwayMatrix.viewColSelection(significantTerms);

    pathwayMatrix = pathwayMatrix.viewRowSelection(genesInBoth);
    DoubleMatrixDataset<String, String> transQtlEnrichmentsMatrix = new DoubleMatrixDataset<String, String>(
            pathwayMatrix.getHashCols(), sumChi2Matrix.getHashCols());
    sumChi2Matrix = sumChi2Matrix.viewRowSelection(genesInBoth);

    System.out.println("Genes in both datasets: " + genesInBoth.size());

    System.out.println("Pathways to test: " + pathwayMatrix.columns());

    final SimpleRegression regression = new SimpleRegression();
    final DoubleRandomEngine randomEngine = new DRand();
    StudentT tDistColt = new StudentT(sumChi2Matrix.rows() / 2 - 2, randomEngine);

    for (String trait : sumChi2Matrix.getColObjects()) {

        System.out.println("Trait: " + trait);

        DoubleMatrix1D traitSumChi2 = sumChi2Matrix.getCol(trait);

        for (String pathway : pathwayMatrix.getColObjects()) {

            DoubleMatrix1D pathwayScores = pathwayMatrix.getCol(pathway);

            regression.clear();

            for (int i = 0; i < traitSumChi2.size(); ++i) {

                //System.out.println(traitSumChi2.get(i) + " & " + pathwayScores.get(i));

                regression.addData(traitSumChi2.get(i), pathwayScores.get(i));

            }

            double r = regression.getR();

            //System.out.println(trait + " " + pathway + " " + r);

            double t = r / (Math.sqrt((1 - r * r) / (double) (traitSumChi2.size() / 2 - 2)));
            double pValue;
            double zScore;
            if (t < 0) {
                pValue = tDistColt.cdf(t);
                if (pValue < 2.0E-323) {
                    pValue = 2.0E-323;
                }
                zScore = Probability.normalInverse(pValue);
            } else {
                pValue = tDistColt.cdf(-t);
                if (pValue < 2.0E-323) {
                    pValue = 2.0E-323;
                }
                zScore = -Probability.normalInverse(pValue);
            }
            pValue *= 2;

            transQtlEnrichmentsMatrix.setElement(pathway, trait, zScore);

        }

    }

    transQtlEnrichmentsMatrix.save(transQtlEnrichmentsMatrixFile);

}

From source file:org.apache.solr.client.solrj.io.eval.RegressionEvaluator.java

@Override
public Object doWork(Object first, Object second) throws IOException {
    if (null == first) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - null found for the first value", toExpression(constructingFactory)));
    }//from w ww.  j  a  v  a 2s  . c  o  m
    if (null == second) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - null found for the second value", toExpression(constructingFactory)));
    }
    if (!(first instanceof List<?>)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for the first value, expecting a list of numbers",
                toExpression(constructingFactory), first.getClass().getSimpleName()));
    }
    if (!(second instanceof List<?>)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for the second value, expecting a list of numbers",
                toExpression(constructingFactory), first.getClass().getSimpleName()));
    }

    List<?> l1 = (List<?>) first;
    List<?> l2 = (List<?>) second;

    if (l2.size() < l1.size()) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - first list (%d) has more values than the second list (%d)",
                toExpression(constructingFactory), l1.size(), l2.size()));
    }

    SimpleRegression regression = new SimpleRegression();
    for (int idx = 0; idx < l1.size(); ++idx) {
        regression.addData(((BigDecimal) l1.get(idx)).doubleValue(), ((BigDecimal) l2.get(idx)).doubleValue());
    }

    Map<String, Number> map = new HashMap<>();
    map.put("slope", regression.getSlope());
    map.put("intercept", regression.getIntercept());
    map.put("R", regression.getR());
    map.put("N", regression.getN());
    map.put("RSquare", regression.getRSquare());
    map.put("regressionSumSquares", regression.getRegressionSumSquares());
    map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval());
    map.put("interceptStdErr", regression.getInterceptStdErr());
    map.put("totalSumSquares", regression.getTotalSumSquares());
    map.put("significance", regression.getSignificance());
    map.put("meanSquareError", regression.getMeanSquareError());

    return new RegressionTuple(regression, map);
}

From source file:org.apache.solr.client.solrj.io.stream.RegressionEvaluator.java

public Tuple evaluate(Tuple tuple) throws IOException {

    if (subEvaluators.size() != 2) {
        throw new IOException("Regress expects 2 columns as parameters");
    }/* w  w  w.jav a  2s.c  o m*/

    StreamEvaluator colEval1 = subEvaluators.get(0);
    StreamEvaluator colEval2 = subEvaluators.get(1);

    List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple);
    List<Number> numbers2 = (List<Number>) colEval2.evaluate(tuple);
    double[] column1 = new double[numbers1.size()];
    double[] column2 = new double[numbers2.size()];

    for (int i = 0; i < numbers1.size(); i++) {
        column1[i] = numbers1.get(i).doubleValue();
    }

    for (int i = 0; i < numbers2.size(); i++) {
        column2[i] = numbers2.get(i).doubleValue();
    }

    SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < column1.length; i++) {
        regression.addData(column1[i], column2[i]);
    }

    Map map = new HashMap();
    map.put("slope", regression.getSlope());
    map.put("intercept", regression.getIntercept());
    map.put("R", regression.getR());
    map.put("N", regression.getN());
    map.put("regressionSumSquares", regression.getRegressionSumSquares());
    map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval());
    map.put("interceptStdErr", regression.getInterceptStdErr());
    map.put("totalSumSquares", regression.getTotalSumSquares());
    map.put("significance", regression.getSignificance());
    map.put("meanSquareError", regression.getMeanSquareError());
    return new RegressionTuple(regression, map);
}

From source file:org.openmainframe.ade.core.statistics.BasicStatistics.java

public static double correlation(IDoubleVector x, IDoubleVector y) {
    final int len = x.getLength();
    if (len != y.getLength()) {
        throw new AdeCoreIllegalArgumentException("Mismatching lengths");
    }//from   w w  w.ja  v a  2s  . c  om
    if (len < 2) {
        throw new AdeCoreIllegalArgumentException("Vectors must have length >=2");
    }
    final SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < len; i++) {
        regression.addData(x.get(i), y.get(i));
    }
    return regression.getR();
}