Example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression estimateRegressionParametersStandardErrors

List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression estimateRegressionParametersStandardErrors

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression estimateRegressionParametersStandardErrors.

Prototype

public double[] estimateRegressionParametersStandardErrors() 

Source Link

Usage

From source file:modelcreation.ModelCreation.java

public static void printRegressionStatistics(OLSMultipleLinearRegression regression) {
    System.out.println("Adjusted R^2 = " + regression.calculateAdjustedRSquared());
    System.out.println("R^2 = " + regression.calculateRSquared());
    System.out.println("Residual Sum Of Squares = " + regression.calculateResidualSumOfSquares());
    System.out.println("Total Sum of Squares = " + regression.calculateTotalSumOfSquares());

    double[] standardErrors = regression.estimateRegressionParametersStandardErrors();
    double[] residuals = regression.estimateResiduals();
    double[] parameters = regression.estimateRegressionParameters();

    int residualdf = residuals.length - parameters.length;
    for (int i = 0; i < parameters.length; i++) {
        double coeff = parameters[i];
        double tstat = parameters[i] / regression.estimateRegressionParametersStandardErrors()[i];
        double pvalue = new TDistribution(residualdf).cumulativeProbability(-FastMath.abs(tstat)) * 2;

        System.out.println("Coefficient(" + i + ") : " + coeff);
        System.out.println("Standard Error(" + i + ") : " + standardErrors[i]);
        System.out.println("t-stats(" + i + ") : " + tstat);
        System.out.println("p-value(" + i + ") : " + pvalue);
    }/*from w  ww  .j a v a  2s .c o  m*/
}

From source file:eqtlmappingpipeline.interactionanalysis.InteractionPlotter.java

public InteractionPlotter(String interactionFile, String genotypeDir, String expressionDataFile,
        String covariateDataFile, String gteFile, String outdir) throws IOException {
    outdir = Gpio.formatAsDirectory(outdir);
    Gpio.createDir(outdir);/*from  w  ww .  j  a v a2 s. c o m*/
    Map<String, String> gte = null;
    if (gteFile != null) {
        TextFile tf = new TextFile(gteFile, TextFile.R);
        gte = tf.readAsHashMap(0, 1);
        tf.close();
    }

    HashSet<String> expressionProbes = new HashSet<String>();
    TextFile tf = new TextFile(interactionFile, TextFile.R);
    String[] elems = tf.readLineElems(TextFile.tab);
    ArrayList<Triple<String, String, String>> triples = new ArrayList<Triple<String, String, String>>();
    while (elems != null) {
        if (elems.length == 2) {
            String snp = elems[0];
            String probe = elems[1];
            expressionProbes.add(probe);
            triples.add(new Triple<String, String, String>(snp, null, probe));
        } else if (elems.length == 3) {
            String snp = elems[0];
            String covariate = elems[1];
            String probe = elems[2];
            expressionProbes.add(probe);
            triples.add(new Triple<String, String, String>(snp, covariate, probe));
        }
        elems = tf.readLineElems(TextFile.tab);
    }
    tf.close();
    System.out.println(triples.size() + " SNP - covariate - probe combinations read from: " + interactionFile);

    DoubleMatrixDataset<String, String> expressionData = new DoubleMatrixDataset<String, String>(
            expressionDataFile, expressionProbes);
    DoubleMatrixDataset<String, String> covariateData = new DoubleMatrixDataset<String, String>(
            covariateDataFile);

    int samplesHaveCovariatesOnCols = 0;
    int samplesHaveCovariatesOnRows = 0;
    for (int i = 0; i < expressionData.colObjects.size(); i++) {
        String expSample = expressionData.colObjects.get(i);
        Integer id1 = covariateData.hashCols.get(expSample);
        Integer id2 = covariateData.hashRows.get(expSample);
        if (id1 != null) {
            samplesHaveCovariatesOnCols++;
        }
        if (id2 != null) {
            samplesHaveCovariatesOnRows++;
        }
    }
    if (samplesHaveCovariatesOnRows > samplesHaveCovariatesOnCols) {
        System.out.println("Rows contain covariate samples in covariate file. Transposing covariates.");
        covariateData.transposeDataset();
    }

    TriTyperGenotypeData geno = new TriTyperGenotypeData(genotypeDir);
    SNPLoader loader = geno.createSNPLoader();

    int[] genotypeToCovariate = new int[geno.getIndividuals().length];
    int[] genotypeToExpression = new int[geno.getIndividuals().length];
    String[] genoIndividuals = geno.getIndividuals();
    for (int i = 0; i < genotypeToCovariate.length; i++) {
        String genoSample = genoIndividuals[i];
        if (geno.getIsIncluded()[i] != null && geno.getIsIncluded()[i]) {
            if (gte != null) {
                genoSample = gte.get(genoSample);
            }

            Integer covariateSample = covariateData.hashCols.get(genoSample);
            Integer expressionSample = expressionData.hashCols.get(genoSample);
            if (genoSample != null && covariateSample != null && expressionSample != null) {
                genotypeToCovariate[i] = covariateSample;
                genotypeToExpression[i] = expressionSample;
            } else {
                genotypeToCovariate[i] = -9;
                genotypeToExpression[i] = -9;

            }
        } else {
            genotypeToCovariate[i] = -9;
            genotypeToExpression[i] = -9;
        }
    }

    OLSMultipleLinearRegression regressionFullWithInteraction = new OLSMultipleLinearRegression();
    cern.jet.random.tdouble.StudentT tDistColt = null;
    org.apache.commons.math3.distribution.FDistribution fDist = null;
    cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null;

    Color[] colorarray = new Color[3];
    colorarray[0] = new Color(171, 178, 114);
    colorarray[1] = new Color(98, 175, 255);
    colorarray[2] = new Color(204, 86, 78);

    DecimalFormat decFormat = new DecimalFormat("#.###");
    DecimalFormat decFormatSmall = new DecimalFormat("0.#E0");
    for (Triple<String, String, String> triple : triples) {

        String snp = triple.getLeft();
        String covariate = triple.getMiddle();
        String probe = triple.getRight();

        Integer snpId = geno.getSnpToSNPId().get(snp);

        Integer probeId = expressionData.hashRows.get(probe);

        int startCovariate = -1;
        int endCovariate = -1;

        if (covariate == null) {
            startCovariate = 0;
            endCovariate = covariateData.nrRows;
        } else {
            Integer covariateId = covariateData.hashRows.get(covariate);
            if (covariateId != null) {
                startCovariate = covariateId;
                endCovariate = covariateId + 1;
            }
        }

        if (snpId >= 0 && probeId != null && startCovariate >= 0) {

            SNP snpObj = geno.getSNPObject(snpId);
            loader.loadGenotypes(snpObj);
            if (loader.hasDosageInformation()) {
                loader.loadDosage(snpObj);
            }

            double signInteractionEffectDirection = 1;
            String[] genotypeDescriptions = new String[3];
            if (snpObj.getAlleles()[1] == snpObj.getMinorAllele()) {
                signInteractionEffectDirection = -1;
                genotypeDescriptions[2] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[0]);
                genotypeDescriptions[1] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
                genotypeDescriptions[0] = BaseAnnot.toString(snpObj.getAlleles()[1]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
            } else {
                genotypeDescriptions[0] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[0]);
                genotypeDescriptions[1] = BaseAnnot.toString(snpObj.getAlleles()[0]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);
                genotypeDescriptions[2] = BaseAnnot.toString(snpObj.getAlleles()[1]) + ""
                        + BaseAnnot.toString(snpObj.getAlleles()[1]);

            }

            for (int q = startCovariate; q < endCovariate; q++) {
                System.out.println("Plotting: " + snp + "\t" + covariateData.rowObjects.get(q) + "\t" + probe);
                System.out.println(
                        "Individual\tAllele1\tAllele2\tGenotype\tGenotypeFlipped\tCovariate\tExpression");
                byte[] alleles1 = snpObj.getAllele1();
                byte[] alleles2 = snpObj.getAllele2();
                byte[] genotypes = snpObj.getGenotypes();
                ArrayList<Byte> genotypeArr = new ArrayList<Byte>();
                ArrayList<Double> covariateArr = new ArrayList<Double>();
                ArrayList<Double> expressionArr = new ArrayList<Double>();

                int nrCalled = 0;

                for (int i = 0; i < genoIndividuals.length; i++) {
                    if (genotypes[i] != -1 && genotypeToCovariate[i] != -9 && genotypeToExpression[i] != -9) {

                        if (!Double.isNaN(covariateData.rawData[q][genotypeToCovariate[i]])) {

                            int genotypeflipped = genotypes[i];
                            if (signInteractionEffectDirection == -1) {
                                genotypeflipped = 2 - genotypeflipped;
                            }

                            String output = genoIndividuals[i] + "\t" + BaseAnnot.toString(alleles1[i]) + "\t"
                                    + BaseAnnot.toString(alleles2[i]) + "\t" + genotypes[i] + "\t"
                                    + genotypeflipped + "\t" + covariateData.rawData[q][genotypeToCovariate[i]]
                                    + "\t" + expressionData.rawData[probeId][genotypeToExpression[i]];
                            System.out.println(output);

                            genotypeArr.add(genotypes[i]);

                            covariateArr.add(covariateData.rawData[q][genotypeToCovariate[i]]);
                            expressionArr.add(expressionData.rawData[probeId][genotypeToExpression[i]]);
                            nrCalled++;
                        }

                    }
                }
                System.out.println("");
                //Fill arrays with data in order to be able to perform the ordinary least squares analysis:
                double[] olsY = new double[nrCalled]; //Ordinary least squares: Our gene expression
                double[][] olsXFullWithInteraction = new double[nrCalled][3]; //With interaction term, linear model: y ~ a * SNP + b * CellCount + c + d * SNP * CellCount
                int itr = 0;

                double[] dataExp = new double[nrCalled];
                double[] dataCov = new double[nrCalled];
                int[] dataGen = new int[nrCalled];

                for (int s = 0; s < nrCalled; s++) {
                    byte originalGenotype = genotypeArr.get(s);
                    int genotype = originalGenotype;
                    if (signInteractionEffectDirection == -1) {
                        genotype = 2 - genotype;
                    }

                    olsY[s] = expressionArr.get(s);

                    olsXFullWithInteraction[s][0] = genotype;
                    olsXFullWithInteraction[s][1] = covariateArr.get(s);
                    olsXFullWithInteraction[s][2] = olsXFullWithInteraction[s][0]
                            * olsXFullWithInteraction[s][1];

                    dataExp[s] = olsY[s];
                    dataGen[s] = genotype;
                    dataCov[s] = covariateArr.get(s);

                    itr++;
                }

                regressionFullWithInteraction.newSampleData(olsY, olsXFullWithInteraction);

                double rss2 = regressionFullWithInteraction.calculateResidualSumOfSquares();
                double[] regressionParameters = regressionFullWithInteraction.estimateRegressionParameters();
                double[] regressionStandardErrors = regressionFullWithInteraction
                        .estimateRegressionParametersStandardErrors();

                double betaInteraction = regressionParameters[3];
                double seInteraction = regressionStandardErrors[3];
                double tInteraction = betaInteraction / seInteraction;
                double pValueInteraction = 1;
                double zScoreInteraction = 0;

                if (fDist == null) {
                    fDist = new org.apache.commons.math3.distribution.FDistribution((int) (3 - 2),
                            (int) (olsY.length - 3));
                    randomEngine = new cern.jet.random.tdouble.engine.DRand();
                    tDistColt = new cern.jet.random.tdouble.StudentT(olsY.length - 4, randomEngine);
                }

                if (tInteraction < 0) {
                    pValueInteraction = tDistColt.cdf(tInteraction);
                    if (pValueInteraction < 2.0E-323) {
                        pValueInteraction = 2.0E-323;
                    }
                    zScoreInteraction = cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
                } else {
                    pValueInteraction = tDistColt.cdf(-tInteraction);
                    if (pValueInteraction < 2.0E-323) {
                        pValueInteraction = 2.0E-323;
                    }

                    zScoreInteraction = -cern.jet.stat.tdouble.Probability.normalInverse(pValueInteraction);
                }
                pValueInteraction *= 2;
                String pvalFormatted = "";
                if (pValueInteraction >= 0.001) {
                    pvalFormatted = decFormat.format(pValueInteraction);
                } else {
                    pvalFormatted = decFormatSmall.format(pValueInteraction);
                }
                ScatterPlot scatterPlot = new ScatterPlot(500, 500, dataCov, dataExp, dataGen,
                        genotypeDescriptions, colorarray, ScatterPlot.OUTPUTFORMAT.PDF,
                        "Interaction between SNP " + snp + ", probe " + probe + " and covariate "
                                + covariateData.rowObjects.get(q),
                        "Z: " + decFormat.format(zScoreInteraction) + " Pvalue: " + pvalFormatted + " n: "
                                + nrCalled,
                        outdir + snp + "-" + probe + "-" + covariateData.rowObjects.get(q) + ".pdf", false);

            }

            snpObj.clearGenotypes();
        }
    }

    loader.close();
}

From source file:org.apache.solr.client.solrj.io.eval.OLSRegressionEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    Matrix observations = null;/*from w  w  w. j av  a  2 s.c  o  m*/
    List<Number> outcomes = null;

    if (values[0] instanceof Matrix) {
        observations = (Matrix) values[0];
    } else {
        throw new IOException("The first parameter for olsRegress should be the observation matrix.");
    }

    if (values[1] instanceof List) {
        outcomes = (List) values[1];
    } else {
        throw new IOException("The second parameter for olsRegress should be outcome array. ");
    }

    double[][] observationData = observations.getData();
    double[] outcomeData = new double[outcomes.size()];
    for (int i = 0; i < outcomeData.length; i++) {
        outcomeData[i] = outcomes.get(i).doubleValue();
    }

    OLSMultipleLinearRegression multipleLinearRegression = (OLSMultipleLinearRegression) regress(
            observationData, outcomeData);

    Map map = new HashMap();

    map.put("regressandVariance", multipleLinearRegression.estimateRegressandVariance());
    map.put("regressionParameters", list(multipleLinearRegression.estimateRegressionParameters()));
    map.put("RSquared", multipleLinearRegression.calculateRSquared());
    map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared());
    map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares());

    try {
        map.put("regressionParametersStandardErrors",
                list(multipleLinearRegression.estimateRegressionParametersStandardErrors()));
        map.put("regressionParametersVariance",
                new Matrix(multipleLinearRegression.estimateRegressionParametersVariance()));
    } catch (Exception e) {
        //Exception is thrown if the matrix is singular
    }

    return new MultipleRegressionTuple(multipleLinearRegression, map);
}

From source file:org.hawkular.datamining.forecast.stats.AugmentedDickeyFullerTest.java

private void calculateStatistics(double[] x) {
    double[] differences = TimeSeriesDifferencing.differencesAtLag(x, 1);

    double[] dependedVariable = Arrays.copyOfRange(differences, maxLag, differences.length);
    double[][] explanatoryVariables = explanatoryVariables(x, differences);

    // OLS model//from ww w.j  a  va  2  s  .  com
    OLSMultipleLinearRegression olsMultipleLinearRegression = new OLSMultipleLinearRegression();
    olsMultipleLinearRegression.setNoIntercept(type == Type.NoInterceptNoTimeTrend);
    olsMultipleLinearRegression.newSampleData(dependedVariable, explanatoryVariables);

    double[] parameters = olsMultipleLinearRegression.estimateRegressionParameters();
    double[] standardErrors = olsMultipleLinearRegression.estimateRegressionParametersStandardErrors();

    // first parameter is intercept, second gamma*(lagged xt)
    int gammaIndex = (type == Type.NoInterceptNoTimeTrend) ? 0 : 1;
    adfStat = parameters[gammaIndex] / standardErrors[gammaIndex];
}