Example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateAdjustedRSquared

List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateAdjustedRSquared

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateAdjustedRSquared.

Prototype

public double calculateAdjustedRSquared() throws MathIllegalArgumentException 

Source Link

Document

Returns the adjusted R-squared statistic, defined by the formula

 R2adj = 1 - [SSR (n - 1)] / [SSTO (n - p)] 
where SSR is the #calculateResidualSumOfSquares() sum of squared residuals , SSTO is the #calculateTotalSumOfSquares() total sum of squares , n is the number of observations and p is the number of parameters estimated (including the intercept).

If the regression is estimated without an intercept term, what is returned is

  1 - (1 -  #calculateRSquared() ) * (n / (n - p))  

Usage

From source file:modelcreation.ModelCreation.java

public static void printRegressionStatistics(OLSMultipleLinearRegression regression) {
    System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared());
    System.out.println("R^2 = " + regression.calculateRSquared());
    System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares());
    System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares());

    double[] standardErrors = regression.estimateRegressionParametersStandardErrors();
    double[] residuals = regression.estimateResiduals();
    double[] parameters = regression.estimateRegressionParameters();

    int residualdf = residuals.length - parameters.length;
    for (int i = 0; i < parameters.length; i++) {
        double coeff = parameters[i];
        double tstat = parameters[i] / regression.estimateRegressionParametersStandardErrors()[i];
        double pvalue = new TDistribution(residualdf).cumulativeProbability(-FastMath.abs(tstat)) * 2;

        System.out.println("Coefficient(" + i + ") : " + coeff);
        System.out.println("Standard Error(" + i + ") : " + standardErrors[i]);
        System.out.println("t-stats(" + i + ") : " + tstat);
        System.out.println("p-value(" + i + ") : " + pvalue);
    }//  www  .  java  2 s .  c o m
}

From source file:modelcreation.ModelCreation.java

public static double evaluateModel(OLSMultipleLinearRegression regression, double[][] subXTest,
        double[] subYTest) {
    System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared());
    System.out.println("R^2 = " + regression.calculateRSquared());
    System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares());
    System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares());

    double[] parameters = regression.estimateRegressionParameters();
    double[] predictions = new double[subYTest.length];

    for (int i = 0; i < subYTest.length; i++) {
        double prediction = parameters[0] + (parameters[1] * subXTest[i][0]) + (parameters[2] * subXTest[i][1]);
        predictions[i] = prediction;/*from w w  w.j  av  a2s. co m*/
    }

    double meanSquaredError = calculateMeanSquaredError(subYTest, predictions);
    System.out.println("Mean Squared Error = " + meanSquaredError);
    return meanSquaredError;
}

From source file:hms.hwestra.interactionrebuttal2.InteractionRebuttal2.java

private void iterativelyIncreaseNumberOfPCsInCellCountPredictionModel(String pcFile, String cellcountFile,
        String pheno) throws IOException {

    DoubleMatrixDataset<String, String> pcs = new DoubleMatrixDataset<String, String>(pcFile); // samples on rows, pcs on cols?
    DoubleMatrixDataset<String, String> cellcounts = new DoubleMatrixDataset<String, String>(cellcountFile); // samples on rows, celltype on cols

    Integer phenoId = cellcounts.hashCols.get(pheno);

    boolean[] includeRow = new boolean[pcs.nrRows];
    int shared = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        if (cellcounts.hashRows.containsKey(sample)) {
            shared++;//w  w w  . j a v  a  2  s  .co m
            includeRow[i] = true;
        }
    }

    // order the samples of the cell count in the order of the pcs
    double[] olsY = new double[shared]; //Ordinary least squares: cell count
    int ctr = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        Integer sampleId = cellcounts.hashRows.get(sample);
        if (sampleId != null) {
            olsY[ctr] = cellcounts.rawData[sampleId][phenoId];
            ctr++;
        }
    }

    org.apache.commons.math3.distribution.FDistribution fDist = null;
    cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null;
    cern.jet.random.tdouble.StudentT tDistColt = null;

    OLSMultipleLinearRegression previousFullModel = null;

    for (int col = 0; col < pcs.nrCols; col++) {
        OLSMultipleLinearRegression regressionFullModel = new OLSMultipleLinearRegression();
        OLSMultipleLinearRegression regressionOrigModel = new OLSMultipleLinearRegression();

        int nrPcs = col + 1;
        double[][] olsX = new double[shared][nrPcs];
        double[][] olsXN = new double[shared][1];
        for (int inc = 0; inc < col + 1; inc++) {
            ctr = 0;
            for (int i = 0; i < pcs.nrRows; i++) {
                if (includeRow[i]) {
                    olsX[ctr][inc] = pcs.rawData[i][inc];
                    ctr++;
                }
            }
        }

        double[] pc = new double[shared];
        ctr = 0;
        for (int i = 0; i < pcs.nrRows; i++) {
            if (includeRow[i]) {
                pc[ctr] = pcs.rawData[i][col];
                olsXN[ctr][0] = pcs.rawData[i][0];
                ctr++;
            }
        }

        double corr = JSci.maths.ArrayMath.correlation(pc, olsY);
        Correlation.correlationToZScore(olsY.length);
        double z = Correlation.convertCorrelationToZScore(olsY.length, corr);
        double p = ZScores.zToP(z);

        regressionFullModel.newSampleData(olsY, olsX);
        regressionOrigModel.newSampleData(olsY, olsXN);

        double rsquaredadj = regressionFullModel.calculateAdjustedRSquared();
        double rsquared = regressionFullModel.calculateRSquared();

        double rse = regressionOrigModel.estimateRegressionStandardError();
        double rsefull = regressionFullModel.estimateRegressionStandardError();

        double rss1 = regressionOrigModel.calculateResidualSumOfSquares();
        double rss2 = regressionFullModel.calculateResidualSumOfSquares();
        double F = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3));
        int numParams1 = 1; // regressor + intercept
        int numParams2 = nrPcs; // regressors + intercept
        if (nrPcs > 1) {

            double F2 = ((rss1 - rss2) / (numParams2 - numParams1)) / (rss2 / (olsY.length - numParams2));

            double rss3 = previousFullModel.calculateResidualSumOfSquares();
            int numParams3 = nrPcs - 1;
            double FPrevious = ((rss3 - rss2) / (numParams2 - numParams3))
                    / (rss2 / (olsY.length - numParams2));

            // pf(f, m1$df.residual-m2$df.residual, m2$df.residual, lower.tail = FALSE)
            // (double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom)
            fDist = new org.apache.commons.math3.distribution.FDistribution((numParams2 - numParams1),
                    olsY.length - numParams2);
            FDistribution fDistPrev = new FDistribution((numParams2 - numParams3), olsY.length - numParams2);

            double anovaFTestP = -1;
            double anovaFTestP2 = -1;
            try {
                anovaFTestP = 1 - fDist.cumulativeProbability(F2);
                anovaFTestP2 = 1 - fDist.cumulativeProbability(FPrevious);
                if (anovaFTestP < 1E-160) {
                    anovaFTestP = 1E-16;
                }

                if (anovaFTestP2 < 1E-160) {
                    anovaFTestP2 = 1E-16;
                }
            } catch (Exception err) {
            }

            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams2
                    + "\t" + F2 + "\t" + FPrevious + "\t" + anovaFTestP + "\t" + anovaFTestP2);
        } else {
            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams1);
        }

        previousFullModel = regressionFullModel;

    }

    ArrayList<String> colNames = new ArrayList<String>();
    colNames.add("CellCount");
    double[][] data = new double[shared][pcs.nrCols + 1];
    for (int i = 0; i < olsY.length; i++) {
        data[i][0] = olsY[i];
    }

    ArrayList<String> rowNames = new ArrayList<String>();
    for (int col = 0; col < pcs.nrCols; col++) {
        ctr = 0;
        colNames.add(pcs.colObjects.get(col));
        for (int row = 0; row < pcs.nrRows; row++) {
            if (includeRow[row]) {
                data[ctr][col + 1] = pcs.rawData[row][col];
                ctr++;
            }

        }
    }

    for (int row = 0; row < pcs.nrRows; row++) {
        if (includeRow[row]) {
            rowNames.add("Sample_" + pcs.rowObjects.get(row));
        }
    }

    DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>();
    dsout.rawData = data;
    dsout.rowObjects = rowNames;
    dsout.colObjects = colNames;
    dsout.recalculateHashMaps();
    dsout.save(pcFile + "-mergedWCellCount.txt");

}

From source file:org.apache.solr.client.solrj.io.eval.OLSRegressionEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    Matrix observations = null;/*w w  w.  j  av a 2s .c  om*/
    List<Number> outcomes = null;

    if (values[0] instanceof Matrix) {
        observations = (Matrix) values[0];
    } else {
        throw new IOException("The first parameter for olsRegress should be the observation matrix.");
    }

    if (values[1] instanceof List) {
        outcomes = (List) values[1];
    } else {
        throw new IOException("The second parameter for olsRegress should be outcome array. ");
    }

    double[][] observationData = observations.getData();
    double[] outcomeData = new double[outcomes.size()];
    for (int i = 0; i < outcomeData.length; i++) {
        outcomeData[i] = outcomes.get(i).doubleValue();
    }

    OLSMultipleLinearRegression multipleLinearRegression = (OLSMultipleLinearRegression) regress(
            observationData, outcomeData);

    Map map = new HashMap();

    map.put("regressandVariance", multipleLinearRegression.estimateRegressandVariance());
    map.put("regressionParameters", list(multipleLinearRegression.estimateRegressionParameters()));
    map.put("RSquared", multipleLinearRegression.calculateRSquared());
    map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared());
    map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares());

    try {
        map.put("regressionParametersStandardErrors",
                list(multipleLinearRegression.estimateRegressionParametersStandardErrors()));
        map.put("regressionParametersVariance",
                new Matrix(multipleLinearRegression.estimateRegressionParametersVariance()));
    } catch (Exception e) {
        //Exception is thrown if the matrix is singular
    }

    return new MultipleRegressionTuple(multipleLinearRegression, map);
}