Example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateResidualSumOfSquares

List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateResidualSumOfSquares

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression calculateResidualSumOfSquares.

Prototype

public double calculateResidualSumOfSquares() 

Source Link

Document

Returns the sum of squared residuals.

Usage

From source file:modelcreation.ModelCreation.java

public static void printRegressionStatistics(OLSMultipleLinearRegression regression) {
    System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared());
    System.out.println("R^2 = " + regression.calculateRSquared());
    System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares());
    System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares());

    double[] standardErrors = regression.estimateRegressionParametersStandardErrors();
    double[] residuals = regression.estimateResiduals();
    double[] parameters = regression.estimateRegressionParameters();

    int residualdf = residuals.length - parameters.length;
    for (int i = 0; i < parameters.length; i++) {
        double coeff = parameters[i];
        double tstat = parameters[i] / regression.estimateRegressionParametersStandardErrors()[i];
        double pvalue = new TDistribution(residualdf).cumulativeProbability(-FastMath.abs(tstat)) * 2;

        System.out.println("Coefficient(" + i + ") : " + coeff);
        System.out.println("Standard Error(" + i + ") : " + standardErrors[i]);
        System.out.println("t-stats(" + i + ") : " + tstat);
        System.out.println("p-value(" + i + ") : " + pvalue);
    }/*from  w ww .  jav a  2 s  .com*/
}

From source file:modelcreation.ModelCreation.java

public static double evaluateModel(OLSMultipleLinearRegression regression, double[][] subXTest,
        double[] subYTest) {
    System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared());
    System.out.println("R^2 = " + regression.calculateRSquared());
    System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares());
    System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares());

    double[] parameters = regression.estimateRegressionParameters();
    double[] predictions = new double[subYTest.length];

    for (int i = 0; i < subYTest.length; i++) {
        double prediction = parameters[0] + (parameters[1] * subXTest[i][0]) + (parameters[2] * subXTest[i][1]);
        predictions[i] = prediction;/* w  w  w .  ja  va  2 s .  co  m*/
    }

    double meanSquaredError = calculateMeanSquaredError(subYTest, predictions);
    System.out.println("Mean Squared Error = " + meanSquaredError);
    return meanSquaredError;
}

From source file:hms.hwestra.interactionrebuttal2.InteractionRebuttal2.java

private void iterativelyIncreaseNumberOfPCsInCellCountPredictionModel(String pcFile, String cellcountFile,
        String pheno) throws IOException {

    DoubleMatrixDataset<String, String> pcs = new DoubleMatrixDataset<String, String>(pcFile); // samples on rows, pcs on cols?
    DoubleMatrixDataset<String, String> cellcounts = new DoubleMatrixDataset<String, String>(cellcountFile); // samples on rows, celltype on cols

    Integer phenoId = cellcounts.hashCols.get(pheno);

    boolean[] includeRow = new boolean[pcs.nrRows];
    int shared = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        if (cellcounts.hashRows.containsKey(sample)) {
            shared++;//ww  w . j  a  v a 2s  . c  o m
            includeRow[i] = true;
        }
    }

    // order the samples of the cell count in the order of the pcs
    double[] olsY = new double[shared]; //Ordinary least squares: cell count
    int ctr = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        Integer sampleId = cellcounts.hashRows.get(sample);
        if (sampleId != null) {
            olsY[ctr] = cellcounts.rawData[sampleId][phenoId];
            ctr++;
        }
    }

    org.apache.commons.math3.distribution.FDistribution fDist = null;
    cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null;
    cern.jet.random.tdouble.StudentT tDistColt = null;

    OLSMultipleLinearRegression previousFullModel = null;

    for (int col = 0; col < pcs.nrCols; col++) {
        OLSMultipleLinearRegression regressionFullModel = new OLSMultipleLinearRegression();
        OLSMultipleLinearRegression regressionOrigModel = new OLSMultipleLinearRegression();

        int nrPcs = col + 1;
        double[][] olsX = new double[shared][nrPcs];
        double[][] olsXN = new double[shared][1];
        for (int inc = 0; inc < col + 1; inc++) {
            ctr = 0;
            for (int i = 0; i < pcs.nrRows; i++) {
                if (includeRow[i]) {
                    olsX[ctr][inc] = pcs.rawData[i][inc];
                    ctr++;
                }
            }
        }

        double[] pc = new double[shared];
        ctr = 0;
        for (int i = 0; i < pcs.nrRows; i++) {
            if (includeRow[i]) {
                pc[ctr] = pcs.rawData[i][col];
                olsXN[ctr][0] = pcs.rawData[i][0];
                ctr++;
            }
        }

        double corr = JSci.maths.ArrayMath.correlation(pc, olsY);
        Correlation.correlationToZScore(olsY.length);
        double z = Correlation.convertCorrelationToZScore(olsY.length, corr);
        double p = ZScores.zToP(z);

        regressionFullModel.newSampleData(olsY, olsX);
        regressionOrigModel.newSampleData(olsY, olsXN);

        double rsquaredadj = regressionFullModel.calculateAdjustedRSquared();
        double rsquared = regressionFullModel.calculateRSquared();

        double rse = regressionOrigModel.estimateRegressionStandardError();
        double rsefull = regressionFullModel.estimateRegressionStandardError();

        double rss1 = regressionOrigModel.calculateResidualSumOfSquares();
        double rss2 = regressionFullModel.calculateResidualSumOfSquares();
        double F = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3));
        int numParams1 = 1; // regressor + intercept
        int numParams2 = nrPcs; // regressors + intercept
        if (nrPcs > 1) {

            double F2 = ((rss1 - rss2) / (numParams2 - numParams1)) / (rss2 / (olsY.length - numParams2));

            double rss3 = previousFullModel.calculateResidualSumOfSquares();
            int numParams3 = nrPcs - 1;
            double FPrevious = ((rss3 - rss2) / (numParams2 - numParams3))
                    / (rss2 / (olsY.length - numParams2));

            // pf(f, m1$df.residual-m2$df.residual, m2$df.residual, lower.tail = FALSE)
            // (double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom)
            fDist = new org.apache.commons.math3.distribution.FDistribution((numParams2 - numParams1),
                    olsY.length - numParams2);
            FDistribution fDistPrev = new FDistribution((numParams2 - numParams3), olsY.length - numParams2);

            double anovaFTestP = -1;
            double anovaFTestP2 = -1;
            try {
                anovaFTestP = 1 - fDist.cumulativeProbability(F2);
                anovaFTestP2 = 1 - fDist.cumulativeProbability(FPrevious);
                if (anovaFTestP < 1E-160) {
                    anovaFTestP = 1E-16;
                }

                if (anovaFTestP2 < 1E-160) {
                    anovaFTestP2 = 1E-16;
                }
            } catch (Exception err) {
            }

            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams2
                    + "\t" + F2 + "\t" + FPrevious + "\t" + anovaFTestP + "\t" + anovaFTestP2);
        } else {
            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams1);
        }

        previousFullModel = regressionFullModel;

    }

    ArrayList<String> colNames = new ArrayList<String>();
    colNames.add("CellCount");
    double[][] data = new double[shared][pcs.nrCols + 1];
    for (int i = 0; i < olsY.length; i++) {
        data[i][0] = olsY[i];
    }

    ArrayList<String> rowNames = new ArrayList<String>();
    for (int col = 0; col < pcs.nrCols; col++) {
        ctr = 0;
        colNames.add(pcs.colObjects.get(col));
        for (int row = 0; row < pcs.nrRows; row++) {
            if (includeRow[row]) {
                data[ctr][col + 1] = pcs.rawData[row][col];
                ctr++;
            }

        }
    }

    for (int row = 0; row < pcs.nrRows; row++) {
        if (includeRow[row]) {
            rowNames.add("Sample_" + pcs.rowObjects.get(row));
        }
    }

    DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>();
    dsout.rawData = data;
    dsout.rowObjects = rowNames;
    dsout.colObjects = colNames;
    dsout.recalculateHashMaps();
    dsout.save(pcFile + "-mergedWCellCount.txt");

}

From source file:eqtlmappingpipeline.interactionanalysis.InteractionPlotter.java

public InteractionPlotter(String interactionFile, String genotypeDir, String expressionDataFile,
        String covariateDataFile, String gteFile, String outdir) throws IOException {
    outdir = Gpio.formatAsDirectory(outdir);
    Gpio.createDir(outdir);//  w w  w .ja v  a  2  s. c  o  m
    Map<String, String> gte = null;
    if (gteFile != null) {
        TextFile tf = new TextFile(gteFile, TextFile.R);
        gte = tf.readAsHashMap(0, 1);
        tf.close();
    }

    HashSet<String> expressionProbes = new HashSet<String>();
    TextFile tf = new TextFile(interactionFile, TextFile.R);
    String[] elems = tf.readLineElems(TextFile.tab);
    ArrayList<Triple<String, String, String>> triples = new ArrayList<Triple<String, String, String>>();
    while (elems != null) {
        if (elems.length == 2) {
            String snp = elems[0];
            String probe = elems[1];
            expressionProbes.add(probe);
            triples.add(new Triple<String, String, String>(snp, null, probe));
        } else if (elems.length == 3) {
            String snp = elems[0];
            String covariate = elems[1];
            String probe = elems[2];
            expressionProbes.add(probe);
            triples.add(new Triple<String, String, String>(snp, covariate, probe));
        }
        elems = tf.readLineElems(TextFile.tab);
    }
    tf.close();
    System.out.println(triples.size() + " SNP - covariate - probe combinations read from: " + interactionFile);

    DoubleMatrixDataset<String, String> expressionData = new DoubleMatrixDataset<String, String>(
            expressionDataFile, expressionProbes);
    DoubleMatrixDataset<String, String> covariateData = new DoubleMatrixDataset<String, String>(
            covariateDataFile);

    int samplesHaveCovariatesOnCols = 0;
    int samplesHaveCovariatesOnRows = 0;
    for (int i = 0; i < expressionData.colObjects.size(); i++) {
        String expSample = expressionData.colObjects.get(i);
        Integer id1 = covariateData.hashCols.get(expSample);
        Integer id2 = covariateData.hashRows.get(expSample);
        if (id1 != null) {
            samplesHaveCovariatesOnCols++;
        }
        if (id2 != null) {
            samplesHaveCovariatesOnRows++;
        }
    }
    if (samplesHaveCovariatesOnRows > samplesHaveCovariatesOnCols) {
        System.out.println("Rows contain covariate samples in covariate file. Transposing covariates.");
        covariateData.transposeDataset();
    }

    TriTyperGenotypeData geno = new TriTyperGenotypeData(genotypeDir);
    SNPLoader loader = geno.createSNPLoader();

    int[] genotypeToCovariate = new int[geno.getIndividuals().length];
    int[] genotypeToExpression = new int[geno.getIndividuals().length];
    String[] genoIndividuals = geno.getIndividuals();
    for (int i = 0; i < genotypeToCovariate.length; i++) {
        String genoSample = genoIndividuals[i];
        if (geno.getIsIncluded()[i] != null && geno.getIsIncluded()[i]) {
            if (gte != null) {
                genoSample = gte.get(genoSample);
            }

            Integer covariateSample = covariateData.hashCols.get(genoSample);
            Integer expressionSample = expressionData.hashCols.get(genoSample);
            if (genoSample != null && covariateSample != null && expressionSample != null) {
                genotypeToCovariate[i] = covariateSample;
                genotypeToExpression[i] = expressionSample;
            } else {
                genotypeToCovariate[i] = -9;
                genotypeToExpression[i] = -9;

            }
        } else {
            genotypeToCovariate[i] = -9;
            genotypeToExpression[i] = -9;
        }
    }

    OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression();
    cern.jet.random.tdouble.StudentT tDistColt = null;
    org.apache.commons.math3.distribution.FDistribution fDist = null;
    cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null;

    Color[] colorarray = new Color[3];
    colorarray[0] = new Color(171, 178, 114);
    colorarray[1] = new Color(98, 175, 255);
    colorarray[2] = new Color(204, 86, 78);

    DecimalFormat decFormat = new DecimalFormat("#.###");
    DecimalFormat decFormatSmall = new DecimalFormat("0.#E0");
    for (Triple<String, String, String> triple : triples) {

        String snp = triple.getLeft();
        String covariate = triple.getMiddle();
        String probe = triple.getRight();

        Integer snpId = geno.getSnpToSNPId().get(snp);

        Integer probeId = expressionData.hashRows.get(probe);

        int startCovariate = -1;
        int endCovariate = -1;

        if (covariate == null) {
            startCovariate = 0;
            endCovariate = covariateData.nrRows;
        } else {
            Integer covariateId = covariateData.hashRows.get(covariate);
            if (covariateId != null) {
                startCovariate = covariateId;
                endCovariate = covariateId + 1;
            }
        }

        if (snpId >= 0 && probeId != null && startCovariate >= 0) {

            SNP snpObj = geno.getSNPObject(snpId);
            loader.loadGenotypes(snpObj);
            if (loader.hasDosageInformation()) {
                loader.loadDosage(snpObj);
            }

            double signInteractionEffectDirection = 1;
            String[] genotypeDescriptions = new String[3];
            if (snpObj.getAlleles()[1] == snpObj.getMinorAllele()) {
                signInteractionEffectDirection = -1;
                genotypeDescriptions[2] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[0]);
                genotypeDescriptions[1] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
                genotypeDescriptions[0] = BaseAnnot.toString(snpObj.getAlleles()[1]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
            } else {
                genotypeDescriptions[0] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[0]);
                genotypeDescriptions[1] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
                genotypeDescriptions[2] = BaseAnnot.toString(snpObj.getAlleles()[1]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);

            }

            for (int q = startCovariate; q < endCovariate; q++) {
                System.out.println("Plotting: " + snp + "\t" + covariateData.rowObjects.get(q) + "\t" + probe);
                System.out.println(
                        "Individual\tAllele1\tAllele2\tGenotype\tGenotypeFlipped\tCovariate\tExpression");
                byte[] alleles1 = snpObj.getAllele1();
                byte[] alleles2 = snpObj.getAllele2();
                byte[] genotypes = snpObj.getGenotypes();
                ArrayList<Byte> genotypeArr = new ArrayList<Byte>();
                ArrayList<Double> covariateArr = new ArrayList<Double>();
                ArrayList<Double> expressionArr = new ArrayList<Double>();

                int nrCalled = 0;

                for (int i = 0; i < genoIndividuals.length; i++) {
                    if (genotypes[i] != -1 && genotypeToCovariate[i] != -9 && genotypeToExpression[i] != -9) {

                        if (!Double.isNaN(covariateData.rawData[q][genotypeToCovariate[i]])) {

                            int genotypeflipped = genotypes[i];
                            if (signInteractionEffectDirection == -1) {
                                genotypeflipped = 2 - genotypeflipped;
                            }

                            String output = genoIndividuals[i] + "\t" + BaseAnnot.toString(alleles1[i]) + "\t"
                                    + BaseAnnot.toString(alleles2[i]) + "\t" + genotypes[i] + "\t"
                                    + genotypeflipped + "\t" + covariateData.rawData[q][genotypeToCovariate[i]]
                                    + "\t" + expressionData.rawData[probeId][genotypeToExpression[i]];
                            System.out.println(output);

                            genotypeArr.add(genotypes[i]);

                            covariateArr.add(covariateData.rawData[q][genotypeToCovariate[i]]);
                            expressionArr.add(expressionData.rawData[probeId][genotypeToExpression[i]]);
                            nrCalled++;
                        }

                    }
                }
                System.out.println("");
                //Fill arrays with data in order to be able to perform the ordinary least squares analysis:
                double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression
                double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount
                int itr = 0;

                double[] dataExp = new double[nrCalled];
                double[] dataCov = new double[nrCalled];
                int[] dataGen = new int[nrCalled];

                for (int s = 0; s < nrCalled; s++) {
                    byte originalGenotype = genotypeArr.get(s);
                    int genotype = originalGenotype;
                    if (signInteractionEffectDirection == -1) {
                        genotype = 2 - genotype;
                    }

                    olsY[s] = expressionArr.get(s);

                    olsXFullWithInteraction[s][0] = genotype;
                    olsXFullWithInteraction[s][1] = covariateArr.get(s);
                    olsXFullWithInteraction[s][2] = olsXFullWithInteraction[s][0]
                            * olsXFullWithInteraction[s][1];

                    dataExp[s] = olsY[s];
                    dataGen[s] = genotype;
                    dataCov[s] = covariateArr.get(s);

                    itr++;
                }

                regressionFullWithInteraction.newSampleData(olsY, olsXFullWithInteraction);

                double rss2 = regressionFullWithInteraction.calculateResidualSumOfSquares();
                double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters();
                double[] regressionStandardErrors = regressionFullWithInteraction
                        .estimateRegressionParametersStandardErrors();

                double betaInteraction = regressionParameters[3];
                double seInteraction = regressionStandardErrors[3];
                double tInteraction = betaInteraction / seInteraction;
                double pValueInteraction = 1;
                double zScoreInteraction = 0;

                if (fDist == null) {
                    fDist = new org.apache.commons.math3.distribution.FDistribution((int) (3 - 2),
                            (int) (olsY.length - 3));
                    randomEngine = new cern.jet.random.tdouble.engine.DRand();
                    tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine);
                }

                if (tInteraction < 0) {
                    pValueInteraction = tDistColt.cdf(tInteraction);
                    if (pValueInteraction < 2.0E-323) {
                        pValueInteraction = 2.0E-323;
                    }
                    zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
                } else {
                    pValueInteraction = tDistColt.cdf(-tInteraction);
                    if (pValueInteraction < 2.0E-323) {
                        pValueInteraction = 2.0E-323;
                    }

                    zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
                }
                pValueInteraction *= 2;
                String pvalFormatted = "";
                if (pValueInteraction >= 0.001) {
                    pvalFormatted = decFormat.format(pValueInteraction);
                } else {
                    pvalFormatted = decFormatSmall.format(pValueInteraction);
                }
                ScatterPlot scatterPlot = new ScatterPlot(500, 500, dataCov, dataExp, dataGen,
                        genotypeDescriptions, colorarray, ScatterPlot.OUTPUTFORMAT.PDF,
                        "Interaction between SNP " + snp + ", probe " + probe + " and covariate "
                                + covariateData.rowObjects.get(q),
                        "Z: " + decFormat.format(zScoreInteraction) + " Pvalue: " + pvalFormatted + " n: "
                                + nrCalled,
                        outdir + snp + "-" + probe + "-" + covariateData.rowObjects.get(q) + ".pdf", false);

            }

            snpObj.clearGenotypes();
        }
    }

    loader.close();
}

From source file:org.apache.solr.client.solrj.io.eval.OLSRegressionEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    Matrix observations = null;//w  w w  . ja va 2 s  . c  o  m
    List<Number> outcomes = null;

    if (values[0] instanceof Matrix) {
        observations = (Matrix) values[0];
    } else {
        throw new IOException("The first parameter for olsRegress should be the observation matrix.");
    }

    if (values[1] instanceof List) {
        outcomes = (List) values[1];
    } else {
        throw new IOException("The second parameter for olsRegress should be outcome array. ");
    }

    double[][] observationData = observations.getData();
    double[] outcomeData = new double[outcomes.size()];
    for (int i = 0; i < outcomeData.length; i++) {
        outcomeData[i] = outcomes.get(i).doubleValue();
    }

    OLSMultipleLinearRegression multipleLinearRegression = (OLSMultipleLinearRegression) regress(
            observationData, outcomeData);

    Map map = new HashMap();

    map.put("regressandVariance", multipleLinearRegression.estimateRegressandVariance());
    map.put("regressionParameters", list(multipleLinearRegression.estimateRegressionParameters()));
    map.put("RSquared", multipleLinearRegression.calculateRSquared());
    map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared());
    map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares());

    try {
        map.put("regressionParametersStandardErrors",
                list(multipleLinearRegression.estimateRegressionParametersStandardErrors()));
        map.put("regressionParametersVariance",
                new Matrix(multipleLinearRegression.estimateRegressionParametersVariance()));
    } catch (Exception e) {
        //Exception is thrown if the matrix is singular
    }

    return new MultipleRegressionTuple(multipleLinearRegression, map);
}