List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateRSquared
public double calculateRSquared() throws MathIllegalArgumentException
R2 = 1 - SSR / SSTOwhere SSR is the #calculateResidualSumOfSquares() sum of squared residuals and SSTO is the #calculateTotalSumOfSquares() total sum of squares
From source file:modelcreation.ModelCreation.java
public static void printRegressionStatistics(OLSMultipleLinearRegression regression) { System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared()); System.out.println("R^2 = " + regression.calculateRSquared()); System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares()); System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares()); double[] standardErrors = regression.estimateRegressionParametersStandardErrors(); double[] residuals = regression.estimateResiduals(); double[] parameters = regression.estimateRegressionParameters(); int residualdf = residuals.length - parameters.length; for (int i = 0; i < parameters.length; i++) { double coeff = parameters[i]; double tstat = parameters[i] / regression.estimateRegressionParametersStandardErrors()[i]; double pvalue = new TDistribution(residualdf).cumulativeProbability(-FastMath.abs(tstat)) * 2; System.out.println("Coefficient(" + i + ") : " + coeff); System.out.println("Standard Error(" + i + ") : " + standardErrors[i]); System.out.println("t-stats(" + i + ") : " + tstat); System.out.println("p-value(" + i + ") : " + pvalue); }//from ww w . j a v a 2s . c o m }
From source file:modelcreation.ModelCreation.java
public static double evaluateModel(OLSMultipleLinearRegression regression, double[][] subXTest, double[] subYTest) { System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared()); System.out.println("R^2 = " + regression.calculateRSquared()); System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares()); System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares()); double[] parameters = regression.estimateRegressionParameters(); double[] predictions = new double[subYTest.length]; for (int i = 0; i < subYTest.length; i++) { double prediction = parameters[0] + (parameters[1] * subXTest[i][0]) + (parameters[2] * subXTest[i][1]); predictions[i] = prediction;//from w w w.ja va 2s.c om } double meanSquaredError = calculateMeanSquaredError(subYTest, predictions); System.out.println("Mean Squared Error = " + meanSquaredError); return meanSquaredError; }
From source file:lu.lippmann.cdb.lab.regression.Regression.java
/** * Constructor.//from www.j a v a 2 s .c o m */ public Regression(final Instances ds, final int idx) throws Exception { this.newds = WekaDataProcessingUtil.buildDataSetSortedByAttribute(ds, idx); //System.out.println("Regression -> "+newds.toSummaryString()); final int N = this.newds.numInstances(); final int M = this.newds.numAttributes(); final double[][] x = new double[N][M - 1]; final double[] y = new double[N]; for (int i = 0; i < N; i++) { y[i] = this.newds.instance(i).value(0); } for (int i = 0; i < N; i++) { for (int j = 1; j < M; j++) { x[i][j - 1] = this.newds.instance(i).value(j); } } final OLSMultipleLinearRegression reg = new OLSMultipleLinearRegression(); //reg.setNoIntercept(true); reg.newSampleData(y, x); this.r2 = reg.calculateRSquared(); //this.r2=-1d; this.coe = reg.estimateRegressionParameters(); this.estims = calculateEstimations(x, y, coe); }
From source file:hms.hwestra.interactionrebuttal2.InteractionRebuttal2.java
private void iterativelyIncreaseNumberOfPCsInCellCountPredictionModel(String pcFile, String cellcountFile, String pheno) throws IOException { DoubleMatrixDataset<String, String> pcs = new DoubleMatrixDataset<String, String>(pcFile); // samples on rows, pcs on cols? DoubleMatrixDataset<String, String> cellcounts = new DoubleMatrixDataset<String, String>(cellcountFile); // samples on rows, celltype on cols Integer phenoId = cellcounts.hashCols.get(pheno); boolean[] includeRow = new boolean[pcs.nrRows]; int shared = 0; for (int i = 0; i < pcs.nrRows; i++) { String sample = pcs.rowObjects.get(i); if (cellcounts.hashRows.containsKey(sample)) { shared++;/*w w w. j av a2 s . c o m*/ includeRow[i] = true; } } // order the samples of the cell count in the order of the pcs double[] olsY = new double[shared]; //Ordinary least squares: cell count int ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { String sample = pcs.rowObjects.get(i); Integer sampleId = cellcounts.hashRows.get(sample); if (sampleId != null) { olsY[ctr] = cellcounts.rawData[sampleId][phenoId]; ctr++; } } org.apache.commons.math3.distribution.FDistribution fDist = null; cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null; cern.jet.random.tdouble.StudentT tDistColt = null; OLSMultipleLinearRegression previousFullModel = null; for (int col = 0; col < pcs.nrCols; col++) { OLSMultipleLinearRegression regressionFullModel = new OLSMultipleLinearRegression(); OLSMultipleLinearRegression regressionOrigModel = new OLSMultipleLinearRegression(); int nrPcs = col + 1; double[][] olsX = new double[shared][nrPcs]; double[][] olsXN = new double[shared][1]; for (int inc = 0; inc < col + 1; inc++) { ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { if (includeRow[i]) { olsX[ctr][inc] = pcs.rawData[i][inc]; ctr++; } } } double[] pc = new double[shared]; ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { if (includeRow[i]) { pc[ctr] = pcs.rawData[i][col]; olsXN[ctr][0] = pcs.rawData[i][0]; ctr++; } } double corr = JSci.maths.ArrayMath.correlation(pc, olsY); Correlation.correlationToZScore(olsY.length); double z = Correlation.convertCorrelationToZScore(olsY.length, corr); double p = ZScores.zToP(z); regressionFullModel.newSampleData(olsY, olsX); regressionOrigModel.newSampleData(olsY, olsXN); double rsquaredadj = regressionFullModel.calculateAdjustedRSquared(); double rsquared = regressionFullModel.calculateRSquared(); double rse = regressionOrigModel.estimateRegressionStandardError(); double rsefull = regressionFullModel.estimateRegressionStandardError(); double rss1 = regressionOrigModel.calculateResidualSumOfSquares(); double rss2 = regressionFullModel.calculateResidualSumOfSquares(); double F = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3)); int numParams1 = 1; // regressor + intercept int numParams2 = nrPcs; // regressors + intercept if (nrPcs > 1) { double F2 = ((rss1 - rss2) / (numParams2 - numParams1)) / (rss2 / (olsY.length - numParams2)); double rss3 = previousFullModel.calculateResidualSumOfSquares(); int numParams3 = nrPcs - 1; double FPrevious = ((rss3 - rss2) / (numParams2 - numParams3)) / (rss2 / (olsY.length - numParams2)); // pf(f, m1$df.residual-m2$df.residual, m2$df.residual, lower.tail = FALSE) // (double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom) fDist = new org.apache.commons.math3.distribution.FDistribution((numParams2 - numParams1), olsY.length - numParams2); FDistribution fDistPrev = new FDistribution((numParams2 - numParams3), olsY.length - numParams2); double anovaFTestP = -1; double anovaFTestP2 = -1; try { anovaFTestP = 1 - fDist.cumulativeProbability(F2); anovaFTestP2 = 1 - fDist.cumulativeProbability(FPrevious); if (anovaFTestP < 1E-160) { anovaFTestP = 1E-16; } if (anovaFTestP2 < 1E-160) { anovaFTestP2 = 1E-16; } } catch (Exception err) { } System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams2 + "\t" + F2 + "\t" + FPrevious + "\t" + anovaFTestP + "\t" + anovaFTestP2); } else { System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams1); } previousFullModel = regressionFullModel; } ArrayList<String> colNames = new ArrayList<String>(); colNames.add("CellCount"); double[][] data = new double[shared][pcs.nrCols + 1]; for (int i = 0; i < olsY.length; i++) { data[i][0] = olsY[i]; } ArrayList<String> rowNames = new ArrayList<String>(); for (int col = 0; col < pcs.nrCols; col++) { ctr = 0; colNames.add(pcs.colObjects.get(col)); for (int row = 0; row < pcs.nrRows; row++) { if (includeRow[row]) { data[ctr][col + 1] = pcs.rawData[row][col]; ctr++; } } } for (int row = 0; row < pcs.nrRows; row++) { if (includeRow[row]) { rowNames.add("Sample_" + pcs.rowObjects.get(row)); } } DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>(); dsout.rawData = data; dsout.rowObjects = rowNames; dsout.colObjects = colNames; dsout.recalculateHashMaps(); dsout.save(pcFile + "-mergedWCellCount.txt"); }
From source file:org.apache.solr.client.solrj.io.eval.OLSRegressionEvaluator.java
@Override public Object doWork(Object... values) throws IOException { Matrix observations = null;/*from ww w .ja va 2 s.com*/ List<Number> outcomes = null; if (values[0] instanceof Matrix) { observations = (Matrix) values[0]; } else { throw new IOException("The first parameter for olsRegress should be the observation matrix."); } if (values[1] instanceof List) { outcomes = (List) values[1]; } else { throw new IOException("The second parameter for olsRegress should be outcome array. "); } double[][] observationData = observations.getData(); double[] outcomeData = new double[outcomes.size()]; for (int i = 0; i < outcomeData.length; i++) { outcomeData[i] = outcomes.get(i).doubleValue(); } OLSMultipleLinearRegression multipleLinearRegression = (OLSMultipleLinearRegression) regress( observationData, outcomeData); Map map = new HashMap(); map.put("regressandVariance", multipleLinearRegression.estimateRegressandVariance()); map.put("regressionParameters", list(multipleLinearRegression.estimateRegressionParameters())); map.put("RSquared", multipleLinearRegression.calculateRSquared()); map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared()); map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares()); try { map.put("regressionParametersStandardErrors", list(multipleLinearRegression.estimateRegressionParametersStandardErrors())); map.put("regressionParametersVariance", new Matrix(multipleLinearRegression.estimateRegressionParametersVariance())); } catch (Exception e) { //Exception is thrown if the matrix is singular } return new MultipleRegressionTuple(multipleLinearRegression, map); }
From source file:org.easotope.shared.math.MultiLineCommonSlopeRegression.java
private void calculate() { if (intercepts != null || yList.size() < 2) { return;/*from ww w . j a va 2 s . c om*/ } double[] yArray = new double[yList.size()]; for (int i = 0; i < yList.size(); i++) { yArray[i] = yList.get(i); } double[][] xArray = new double[xList.size()][groups.size()]; Vector<Integer> columnToGroup = new Vector<Integer>(groups); HashMap<Integer, Integer> groupToColumn = new HashMap<Integer, Integer>(); int column = 0; for (Integer group : columnToGroup) { groupToColumn.put(group, column++); } for (int i = 0; i < xList.size(); i++) { int group = groupList.get(i); column = groupToColumn.get(group); double x = xList.get(i); xArray[i][0] = x; if (column != 0) { xArray[i][groupToColumn.get(group)] = 1.0d; } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(yArray, xArray); double[] regressionParameters = null; try { regressionParameters = regression.estimateRegressionParameters(); } catch (Exception e) { Log.getInstance().log(Level.INFO, this, "regression failed", e); return; } slope = regressionParameters[1]; intercepts = new HashMap<Integer, Double>(); intercepts.put(columnToGroup.get(0), regressionParameters[0]); for (int i = 2; i < regressionParameters.length; i++) { intercepts.put(columnToGroup.get(i - 1), regressionParameters[0] + regressionParameters[i]); } r2 = regression.calculateRSquared(); }
From source file:org.meteoinfo.math.fitting.OLSTrendLine.java
@Override public void setValues(Array y, Array x) { if (x.getSize() != y.getSize()) { throw new IllegalArgumentException(String .format("The numbers of y and x values must be equal (%d != %d)", y.getSize(), x.getSize())); }/*from w w w. j a va2s . c o m*/ double[][] xData = new double[(int) x.getSize()][]; for (int i = 0; i < x.getSize(); i++) { // the implementation determines how to produce a vector of predictors from a single x xData[i] = xVector(x.getDouble(i)); } double[] yy = new double[(int) y.getSize()]; if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y for (int i = 0; i < yy.length; i++) { if (i < x.getSize()) yy[i] = Math.log(y.getDouble(i)); else yy[i] = y.getDouble(i); } } else { for (int i = 0; i < yy.length; i++) { yy[i] = y.getDouble(i); } } // double[] yy = (double[])y.copyTo1DJavaArray(); // if(logY()) { // in some models we are predicting ln y, so we replace each y with ln y // yy = Arrays.copyOf(yy, yy.length); // user might not be finished with the array we were given // for (int i = 0; i < x.getSize(); i++) { // yy[i] = Math.log(yy[i]); // } // } OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired ols.newSampleData(yy, xData); // provide the data to the model coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs rs = ols.calculateRSquared(); }
From source file:org.opentestsystem.airose.regression.ols.OLSRegressionModeller.java
protected AbstractModel customProcessData(double[] y, double[][] x) { OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(y, x);// w ww . java2s.c om double[] beta = regression.estimateRegressionParameters(); double[] residuals = regression.estimateResiduals(); double[][] parametersVariance = regression.estimateRegressionParametersVariance(); double regressandVariance = regression.estimateRegressandVariance(); double rSquared = regression.calculateRSquared(); double sigma = regression.estimateRegressionStandardError(); OLSModel olsModel = new OLSModel(beta, residuals, parametersVariance, regressandVariance, rSquared, sigma, y, x); return olsModel; }