List of usage examples for org.apache.commons.math3.stat.regression SimpleRegression getR
public double getR()
From source file:nl.systemsgenetics.functionenrichmentoftransqtls.CorrelateSumChi2ToPathways.java
/** * @param args the command line arguments *//*from w ww . jav a2 s . c o m*/ public static void main(String[] args) throws IOException { final File pathwayMatrixFile = new File(args[0]); final File significantTermsFile = new File(args[1]); final File sumChi2MatrixFile = new File(args[2]); final File transQtlEnrichmentsMatrixFile = new File(args[3]); System.out.println("Pathway file: " + pathwayMatrixFile.getPath()); System.out.println("Pathway significant terms file: " + significantTermsFile.getPath()); System.out.println("SumChi2 file: " + sumChi2MatrixFile.getPath()); System.out.println("Output file: " + transQtlEnrichmentsMatrixFile.getPath()); LinkedHashSet<String> significantTerms = loadSignificantTerms(significantTermsFile); DoubleMatrixDataset<String, String> pathwayMatrix = DoubleMatrixDataset .loadDoubleData(pathwayMatrixFile.getPath()); DoubleMatrixDataset<String, String> sumChi2Matrix = DoubleMatrixDataset .loadDoubleData(sumChi2MatrixFile.getPath()); LinkedHashSet<String> genesInBoth = new LinkedHashSet<String>(); for (String gene : pathwayMatrix.getHashRows().keySet()) { if (sumChi2Matrix.containsRow(gene)) { genesInBoth.add(gene); } } pathwayMatrix = pathwayMatrix.viewColSelection(significantTerms); pathwayMatrix = pathwayMatrix.viewRowSelection(genesInBoth); DoubleMatrixDataset<String, String> transQtlEnrichmentsMatrix = new DoubleMatrixDataset<String, String>( pathwayMatrix.getHashCols(), sumChi2Matrix.getHashCols()); sumChi2Matrix = sumChi2Matrix.viewRowSelection(genesInBoth); System.out.println("Genes in both datasets: " + genesInBoth.size()); System.out.println("Pathways to test: " + pathwayMatrix.columns()); final SimpleRegression regression = new SimpleRegression(); final DoubleRandomEngine randomEngine = new DRand(); StudentT tDistColt = new StudentT(sumChi2Matrix.rows() / 2 - 2, randomEngine); for (String trait : sumChi2Matrix.getColObjects()) { System.out.println("Trait: " + trait); DoubleMatrix1D traitSumChi2 = sumChi2Matrix.getCol(trait); for (String pathway : pathwayMatrix.getColObjects()) { DoubleMatrix1D pathwayScores = pathwayMatrix.getCol(pathway); regression.clear(); for (int i = 0; i < traitSumChi2.size(); ++i) { //System.out.println(traitSumChi2.get(i) + " & " + pathwayScores.get(i)); regression.addData(traitSumChi2.get(i), pathwayScores.get(i)); } double r = regression.getR(); //System.out.println(trait + " " + pathway + " " + r); double t = r / (Math.sqrt((1 - r * r) / (double) (traitSumChi2.size() / 2 - 2))); double pValue; double zScore; if (t < 0) { pValue = tDistColt.cdf(t); if (pValue < 2.0E-323) { pValue = 2.0E-323; } zScore = Probability.normalInverse(pValue); } else { pValue = tDistColt.cdf(-t); if (pValue < 2.0E-323) { pValue = 2.0E-323; } zScore = -Probability.normalInverse(pValue); } pValue *= 2; transQtlEnrichmentsMatrix.setElement(pathway, trait, zScore); } } transQtlEnrichmentsMatrix.save(transQtlEnrichmentsMatrixFile); }
From source file:org.apache.solr.client.solrj.io.eval.RegressionEvaluator.java
@Override public Object doWork(Object first, Object second) throws IOException { if (null == first) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory))); }//from w ww. j a v a 2s . c o m if (null == second) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory))); } if (!(first instanceof List<?>)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a list of numbers", toExpression(constructingFactory), first.getClass().getSimpleName())); } if (!(second instanceof List<?>)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a list of numbers", toExpression(constructingFactory), first.getClass().getSimpleName())); } List<?> l1 = (List<?>) first; List<?> l2 = (List<?>) second; if (l2.size() < l1.size()) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - first list (%d) has more values than the second list (%d)", toExpression(constructingFactory), l1.size(), l2.size())); } SimpleRegression regression = new SimpleRegression(); for (int idx = 0; idx < l1.size(); ++idx) { regression.addData(((BigDecimal) l1.get(idx)).doubleValue(), ((BigDecimal) l2.get(idx)).doubleValue()); } Map<String, Number> map = new HashMap<>(); map.put("slope", regression.getSlope()); map.put("intercept", regression.getIntercept()); map.put("R", regression.getR()); map.put("N", regression.getN()); map.put("RSquare", regression.getRSquare()); map.put("regressionSumSquares", regression.getRegressionSumSquares()); map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval()); map.put("interceptStdErr", regression.getInterceptStdErr()); map.put("totalSumSquares", regression.getTotalSumSquares()); map.put("significance", regression.getSignificance()); map.put("meanSquareError", regression.getMeanSquareError()); return new RegressionTuple(regression, map); }
From source file:org.apache.solr.client.solrj.io.stream.RegressionEvaluator.java
public Tuple evaluate(Tuple tuple) throws IOException { if (subEvaluators.size() != 2) { throw new IOException("Regress expects 2 columns as parameters"); }/* w w w.jav a 2s.c o m*/ StreamEvaluator colEval1 = subEvaluators.get(0); StreamEvaluator colEval2 = subEvaluators.get(1); List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple); List<Number> numbers2 = (List<Number>) colEval2.evaluate(tuple); double[] column1 = new double[numbers1.size()]; double[] column2 = new double[numbers2.size()]; for (int i = 0; i < numbers1.size(); i++) { column1[i] = numbers1.get(i).doubleValue(); } for (int i = 0; i < numbers2.size(); i++) { column2[i] = numbers2.get(i).doubleValue(); } SimpleRegression regression = new SimpleRegression(); for (int i = 0; i < column1.length; i++) { regression.addData(column1[i], column2[i]); } Map map = new HashMap(); map.put("slope", regression.getSlope()); map.put("intercept", regression.getIntercept()); map.put("R", regression.getR()); map.put("N", regression.getN()); map.put("regressionSumSquares", regression.getRegressionSumSquares()); map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval()); map.put("interceptStdErr", regression.getInterceptStdErr()); map.put("totalSumSquares", regression.getTotalSumSquares()); map.put("significance", regression.getSignificance()); map.put("meanSquareError", regression.getMeanSquareError()); return new RegressionTuple(regression, map); }
From source file:org.openmainframe.ade.core.statistics.BasicStatistics.java
public static double correlation(IDoubleVector x, IDoubleVector y) { final int len = x.getLength(); if (len != y.getLength()) { throw new AdeCoreIllegalArgumentException("Mismatching lengths"); }//from w w w.ja v a 2s . c om if (len < 2) { throw new AdeCoreIllegalArgumentException("Vectors must have length >=2"); } final SimpleRegression regression = new SimpleRegression(); for (int i = 0; i < len; i++) { regression.addData(x.get(i), y.get(i)); } return regression.getR(); }