Example usage for org.apache.commons.math3.stat.regression SimpleRegression getSignificance

List of usage examples for org.apache.commons.math3.stat.regression SimpleRegression getSignificance

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression SimpleRegression getSignificance.

Prototype

public double getSignificance() 

Source Link

Document

Returns the significance level of the slope (equiv) correlation.

Usage

From source file:nl.systemsgenetics.cellTypeSpecificAlleleSpecificExpression.CTSlinearRegression.java

public CTSlinearRegression(ArrayList<IndividualSnpData> all_individuals) {

    //basic information, get the zero instance.
    snpName = all_individuals.get(0).getSnpName();
    chromosome = all_individuals.get(0).getChromosome();
    position = all_individuals.get(0).getPosition();

    //isolate heterozygotes
    ArrayList<IndividualSnpData> het_individuals = UtilityMethods
            .isolateValidHeterozygotesFromIndividualSnpData(all_individuals);
    numberOfHets = het_individuals.size();

    hetSampleNames = new ArrayList<String>();
    asRef = new ArrayList<Integer>();
    asAlt = new ArrayList<Integer>();
    asNo = new ArrayList<Integer>();

    cellProp = new ArrayList<Double>();
    int total_overlap = 0;

    //Get the basic data without doing any tests.
    for (IndividualSnpData temp_het : het_individuals) {
        //Do nothing if there is no data in het_individuals

        hetSampleNames.add(temp_het.getSampleName());

        asRef.add(temp_het.getRefNum());
        asAlt.add(temp_het.getAltNum());
        asNo.add(temp_het.getNoNum());//from  w  ww  .java2  s  . co  m
        cellProp.add(temp_het.getCellTypeProp());

        //this is used to check if we will continue with calculations.
        //BASED on the minHets and MinReads
        total_overlap += temp_het.getRefNum() + temp_het.getAltNum();
    }

    //Check if we do a test.
    if ((total_overlap >= GlobalVariables.minReads) && (numberOfHets >= GlobalVariables.minHets)
            && (numberOfHets >= 3)) {

        ASScatterPlot plotThis = null;

        if (!GlobalVariables.plotDir.equals("")) {
            plotThis = new ASScatterPlot(400);
        }

        SimpleRegression thisRegression = new SimpleRegression();
        for (int i = 0; i < asRef.size(); i++) {
            Double asRatio;
            //do this check, otherwise the denominator will be zero.

            if (asRef.get(i) != 0) {
                asRatio = ((double) asRef.get(i)) / ((double) (asRef.get(i) + asAlt.get(i)));
            } else {
                asRatio = 0.0;
            }

            Double phenoRatio = cellProp.get(i);
            thisRegression.addData(phenoRatio, asRatio);
            if (!GlobalVariables.plotDir.equals("")) {
                plotThis.plot(asRatio, phenoRatio);
            }
        }

        if (!GlobalVariables.plotDir.equals("")) {
            plotThis.draw(GlobalVariables.plotDir + "/" + snpName + "_ASratio_Pheno_Plot.png");
        }

        slope = thisRegression.getSlope();
        intercept = thisRegression.getIntercept();
        Rsquared = thisRegression.getRSquare();
        stdErrorIntercept = thisRegression.getInterceptStdErr();
        stdErrorSlope = thisRegression.getSlopeStdErr();

        pValue = thisRegression.getSignificance();

        if (GlobalVariables.verbosity >= 10) {
            System.out.println("\n--- Starting cell type specific linear regression ---");
            System.out.println("\tSlope:                   " + Double.toString(slope));
            System.out.println("\tStdError of Slope:       " + Double.toString(stdErrorSlope) + "\n");
            System.out.println("\tIntercept:               " + Double.toString(intercept));
            System.out.println("\tStdError of Intercept:   " + Double.toString(stdErrorIntercept) + "\n");
            System.out.println("\tP value:                 " + Double.toString(pValue));
            System.out.println("--------------------------------------------------------------");

        }

        testPerformed = true;
    }

}

From source file:nl.systemsgenetics.eqtlinteractionanalyser.eqtlinteractionanalyser.PerformInteractionAnalysisPermutationTask.java

private double correlateCovariateWithGenotype(int snp) {
    SimpleRegression simpleRegression = new SimpleRegression();
    double[] expression = datasetCovariatesPCAForceNormal.rawData[covToTest];
    double[] genotypes = datasetGenotypes.rawData[snp];
    for (int s = 0; s < expression.length; s++) {
        simpleRegression.addData(expression[s], genotypes[s]);
    }// w  w w  . j  a  v  a 2s.c  o  m
    //This is not working now that we have the _rs next to the gene names
    //      if (datasetGenotypes.probeNames[snp].equals(datasetCovariatesPCAForceNormal.probeNames[covToTest])){
    //         System.out.println("Same gene! " + datasetGenotypes.probeNames[snp] + "\t" + datasetCovariatesPCAForceNormal.probeNames[covToTest] + "\t" + simpleRegression.getSignificance() + "\t" + simpleRegression.getR());
    //      }
    return simpleRegression.getSignificance();
}

From source file:org.apache.solr.client.solrj.io.eval.RegressionEvaluator.java

@Override
public Object doWork(Object first, Object second) throws IOException {
    if (null == first) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - null found for the first value", toExpression(constructingFactory)));
    }//from w w w .  j  a  va2 s.c o m
    if (null == second) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - null found for the second value", toExpression(constructingFactory)));
    }
    if (!(first instanceof List<?>)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for the first value, expecting a list of numbers",
                toExpression(constructingFactory), first.getClass().getSimpleName()));
    }
    if (!(second instanceof List<?>)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for the second value, expecting a list of numbers",
                toExpression(constructingFactory), first.getClass().getSimpleName()));
    }

    List<?> l1 = (List<?>) first;
    List<?> l2 = (List<?>) second;

    if (l2.size() < l1.size()) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - first list (%d) has more values than the second list (%d)",
                toExpression(constructingFactory), l1.size(), l2.size()));
    }

    SimpleRegression regression = new SimpleRegression();
    for (int idx = 0; idx < l1.size(); ++idx) {
        regression.addData(((BigDecimal) l1.get(idx)).doubleValue(), ((BigDecimal) l2.get(idx)).doubleValue());
    }

    Map<String, Number> map = new HashMap<>();
    map.put("slope", regression.getSlope());
    map.put("intercept", regression.getIntercept());
    map.put("R", regression.getR());
    map.put("N", regression.getN());
    map.put("RSquare", regression.getRSquare());
    map.put("regressionSumSquares", regression.getRegressionSumSquares());
    map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval());
    map.put("interceptStdErr", regression.getInterceptStdErr());
    map.put("totalSumSquares", regression.getTotalSumSquares());
    map.put("significance", regression.getSignificance());
    map.put("meanSquareError", regression.getMeanSquareError());

    return new RegressionTuple(regression, map);
}

From source file:org.apache.solr.client.solrj.io.stream.RegressionEvaluator.java

public Tuple evaluate(Tuple tuple) throws IOException {

    if (subEvaluators.size() != 2) {
        throw new IOException("Regress expects 2 columns as parameters");
    }/* w w w  . ja  v  a  2 s  .com*/

    StreamEvaluator colEval1 = subEvaluators.get(0);
    StreamEvaluator colEval2 = subEvaluators.get(1);

    List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple);
    List<Number> numbers2 = (List<Number>) colEval2.evaluate(tuple);
    double[] column1 = new double[numbers1.size()];
    double[] column2 = new double[numbers2.size()];

    for (int i = 0; i < numbers1.size(); i++) {
        column1[i] = numbers1.get(i).doubleValue();
    }

    for (int i = 0; i < numbers2.size(); i++) {
        column2[i] = numbers2.get(i).doubleValue();
    }

    SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < column1.length; i++) {
        regression.addData(column1[i], column2[i]);
    }

    Map map = new HashMap();
    map.put("slope", regression.getSlope());
    map.put("intercept", regression.getIntercept());
    map.put("R", regression.getR());
    map.put("N", regression.getN());
    map.put("regressionSumSquares", regression.getRegressionSumSquares());
    map.put("slopeConfidenceInterval", regression.getSlopeConfidenceInterval());
    map.put("interceptStdErr", regression.getInterceptStdErr());
    map.put("totalSumSquares", regression.getTotalSumSquares());
    map.put("significance", regression.getSignificance());
    map.put("meanSquareError", regression.getMeanSquareError());
    return new RegressionTuple(regression, map);
}

From source file:uk.ac.babraham.SeqMonk.Pipelines.IntronRegressionPipeline.java

protected void startPipeline() {

    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.

    Vector<Probe> probes = new Vector<Probe>();
    int minDensity = optionsPanel.minDensity();
    int minLength = optionsPanel.minLength();
    double maxPValue = optionsPanel.maxPValue();
    int binSize = optionsPanel.measurementBinSize();

    QuantitationStrandType readFilter = optionsPanel.readFilter();

    Chromosome[] chrs = collection().genome().getAllChromosomes();

    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();/* ww w  .ja  v  a2 s. c  om*/
            return;
        }

        Vector<Probe> probesForThisChromosome = new Vector<Probe>();

        progressUpdated("Making probes", c, chrs.length);

        Feature[] features = getValidFeatures(chrs[c]);

        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }

            // Now we can iterate through the introns in this feature
            if (!(features[f].location() instanceof SplitLocation))
                continue; // There are no introns here

            Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();

            // TODO: Reverse the subLocations if its a reverse feature            
            for (int intron = 1; intron < subLocations.length; intron++) {

                int start = subLocations[intron - 1].end();
                int end = subLocations[intron].start();

                if ((end - start) + 1 < minLength) {
                    continue; // This intron is too short.
                }

                // TODO: We could throw away any probes which didn't have enough reads in any feature

                Probe p = new Probe(chrs[c], start, end, features[f].location().strand(),
                        features[f].name() + "_" + intron);
                probesForThisChromosome.add(p);

            }
        }

        // Now we can deduplicate the probes for this chromosome and add them to the main collection
        Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
        Arrays.sort(dupProbes);

        for (int p = 0; p < dupProbes.length; p++) {
            if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
                continue;
            probes.add(dupProbes[p]);
        }

    }

    Probe[] allProbes = probes.toArray(new Probe[0]);

    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));

    // Now we go back through the probes and quantitate them
    for (int p = 0; p < allProbes.length; p++) {

        if (cancel) {
            progressCancelled();
            return;
        }

        if (p % 1000 == 0) {
            progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p,
                    allProbes.length);
        }

        for (int d = 0; d < data.length; d++) {
            long[] reads = data[d].getReadsForProbe(allProbes[p]);

            int[] countsPerSite = new int[allProbes[p].length()];

            int usableCounts = 0;

            for (int r = 0; r < reads.length; r++) {
                if (readFilter.useRead(allProbes[p], reads[r])) {
                    ++usableCounts;
                    for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math
                            .min(countsPerSite.length - 1,
                                    SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
                        ++countsPerSite[pos];
                    }
                }
            }

            if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
                // We're going to do a linear regression rather than a correlation

                // We're analysing in bins so we'll work out the bin counts and
                // add them dynamically to the regression.

                SimpleRegression regression = new SimpleRegression();

                int binCount = 0;
                for (int i = 0; i < countsPerSite.length; i++) {
                    if (i > 0 && i % binSize == 0) {
                        regression.addData(i, binCount);
                        binCount = 0;
                    }

                    binCount += countsPerSite[i];
                }

                float slope = (float) (regression.getSlope() * 1000000);
                double pValue = regression.getSignificance();

                if (allProbes[p].strand() == Location.REVERSE) {
                    slope = 0 - slope;
                }

                if (pValue <= maxPValue) {
                    data[d].setValueForProbe(allProbes[p], slope);
                } else {
                    data[d].setValueForProbe(allProbes[p], Float.NaN);
                }
            } else {
                data[d].setValueForProbe(allProbes[p], Float.NaN);
            }

        }
    }

    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Intron regression pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    quantitationDescription.append(". Min intron length was ");
    quantitationDescription.append(minLength);
    quantitationDescription.append(". Min read density was ");
    quantitationDescription.append(minDensity);
    quantitationDescription.append(". Max slope p-value was ");
    quantitationDescription.append(maxPValue);

    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());

    quantitatonComplete();

}