Example usage for org.apache.commons.math3.random EmpiricalDistribution getUpperBounds

List of usage examples for org.apache.commons.math3.random EmpiricalDistribution getUpperBounds

Introduction

In this page you can find the example usage for org.apache.commons.math3.random EmpiricalDistribution getUpperBounds.

Prototype

public double[] getUpperBounds() 

Source Link

Document

Returns a fresh copy of the array of upper bounds for the bins.

Usage

From source file:uk.ac.ebi.phenotype.service.StatisticalResultService.java

public StackedBarsData getUnidimensionalData(Parameter p, List<String> genes, ArrayList<String> strains,
        String biologicalSample, String[] center, String[] sex) throws SolrServerException {

    String urlParams = "";
    SolrQuery query = new SolrQuery()
            .addFilterQuery(StatisticalResultDTO.PARAMETER_STABLE_ID + ":" + p.getStableId());
    String q = "*:*";
    query.addFilterQuery((strains.size() > 1)
            ? "(" + StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\""
                    + StringUtils.join(strains.toArray(),
                            "\" OR " + StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\"")
                    + "\")"
            : StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\"");
    if (strains.size() > 0) {
        urlParams += "&strain=" + StringUtils.join(strains.toArray(), "&strain=");
    }//from   w ww .ja va 2s . co  m

    if (center != null && center.length > 0) {
        query.addFilterQuery(
                "(" + ((center.length > 1)
                        ? StatisticalResultDTO.PHENOTYPING_CENTER + ":\""
                                + StringUtils.join(center,
                                        "\" OR " + StatisticalResultDTO.PHENOTYPING_CENTER + ":\"")
                                + "\""
                        : StatisticalResultDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"") + ")");
        urlParams += "&phenotyping_center=" + StringUtils.join(center, "&phenotyping_center=");
    }

    if (sex != null && sex.length == 1) {
        if (sex[0].equalsIgnoreCase("male")) {
            query.addFilterQuery(StatisticalResultDTO.MALE_CONTROL_COUNT + ":[4 TO 100000]");
            query.addFilterQuery(StatisticalResultDTO.MALE_MUTANT_COUNT + ":[4 TO 100000]");
        } else {
            query.addFilterQuery(StatisticalResultDTO.FEMALE_CONTROL_COUNT + ":[4 TO 100000]");
            query.addFilterQuery(StatisticalResultDTO.FEMALE_MUTANT_COUNT + ":[4 TO 100000]");
        }
    }

    query.setQuery(q);
    query.addFilterQuery("(" + StatisticalResultDTO.FEMALE_CONTROL_COUNT + ":[4 TO 100000] OR "
            + StatisticalResultDTO.MALE_CONTROL_COUNT + ":[4 TO 100000])");
    query.setRows(10000000);
    query.setFields(StatisticalResultDTO.MARKER_ACCESSION_ID, StatisticalResultDTO.FEMALE_CONTROL_MEAN,
            StatisticalResultDTO.MARKER_SYMBOL, StatisticalResultDTO.FEMALE_MUTANT_MEAN,
            StatisticalResultDTO.MALE_CONTROL_MEAN, StatisticalResultDTO.MALE_MUTANT_MEAN,
            StatisticalResultDTO.FEMALE_CONTROL_COUNT, StatisticalResultDTO.FEMALE_MUTANT_COUNT,
            StatisticalResultDTO.MALE_CONTROL_COUNT, StatisticalResultDTO.MALE_MUTANT_COUNT);
    query.set("group", true);
    query.set("group.field", StatisticalResultDTO.COLONY_ID);
    query.set("group.limit", 1);

    List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues();
    double[] meansArray = new double[groups.size()];
    String[] genesArray = new String[groups.size()];
    String[] geneSymbolArray = new String[groups.size()];
    int size = 0;

    for (Group gr : groups) {

        SolrDocumentList resDocs = gr.getResult();
        String sexToDisplay = null;
        OverviewRatio overviewRatio = new OverviewRatio();

        for (SolrDocument doc : resDocs) {
            sexToDisplay = getSexToDisplay(sex, sexToDisplay, doc);
            overviewRatio.add(doc);
        }

        if (sexToDisplay != null) {
            Double ratio = overviewRatio.getPlotRatio(sexToDisplay);
            if (ratio != null) {
                genesArray[size] = (String) resDocs.get(0).get(StatisticalResultDTO.MARKER_ACCESSION_ID);
                geneSymbolArray[size] = (String) resDocs.get(0).get(StatisticalResultDTO.MARKER_SYMBOL);
                meansArray[size] = ratio;
                size++;
            }
        }
    }

    // we do the binning for all the data but fill the bins after that to
    // keep tract of phenotype associations
    int binCount = Math.min((int) Math.floor((double) groups.size() / 2), 20);
    ArrayList<String> mutantGenes = new ArrayList<String>();
    ArrayList<String> controlGenes = new ArrayList<String>();
    ArrayList<String> mutantGeneAcc = new ArrayList<String>();
    ArrayList<String> controlGeneAcc = new ArrayList<String>();
    ArrayList<Double> upperBounds = new ArrayList<Double>();
    EmpiricalDistribution distribution = new EmpiricalDistribution(binCount);
    if (size > 0) {
        distribution.load(ArrayUtils.subarray(meansArray, 0, size - 1));
        for (double bound : distribution.getUpperBounds()) {
            upperBounds.add(bound);
        }
        // we we need to distribute the control mutants and the
        // phenotype-mutants in the bins
        ArrayList<Double> controlM = new ArrayList<Double>();
        ArrayList<Double> phenMutants = new ArrayList<Double>();

        for (int j = 0; j < upperBounds.size(); j++) {
            controlM.add((double) 0);
            phenMutants.add((double) 0);
            controlGenes.add("");
            mutantGenes.add("");
            controlGeneAcc.add("");
            mutantGeneAcc.add("");
        }

        for (int j = 0; j < size; j++) {
            // find out the proper bin
            int binIndex = getBin(upperBounds, meansArray[j]);
            if (genes.contains(genesArray[j])) {
                phenMutants.set(binIndex, 1 + phenMutants.get(binIndex));
                String genesString = mutantGenes.get(binIndex);
                if (!genesString.contains(geneSymbolArray[j])) {
                    if (genesString.equals("")) {
                        mutantGenes.set(binIndex, geneSymbolArray[j]);
                        mutantGeneAcc.set(binIndex, "accession=" + genesArray[j]);
                    } else {
                        mutantGenes.set(binIndex, genesString + ", " + geneSymbolArray[j]);
                        mutantGeneAcc.set(binIndex,
                                mutantGeneAcc.get(binIndex) + "&accession=" + genesArray[j]);
                    }
                }
            } else { // treat as control because they don't have this phenotype association
                String genesString = controlGenes.get(binIndex);
                if (!genesString.contains(geneSymbolArray[j])) {
                    if (genesString.equalsIgnoreCase("")) {
                        controlGenes.set(binIndex, geneSymbolArray[j]);
                        controlGeneAcc.set(binIndex, "accession=" + genesArray[j]);
                    } else {
                        controlGenes.set(binIndex, genesString + ", " + geneSymbolArray[j]);
                        controlGeneAcc.set(binIndex,
                                controlGeneAcc.get(binIndex) + "&accession=" + genesArray[j]);
                    }
                }
                controlM.set(binIndex, 1 + controlM.get(binIndex));
            }
        }
        // System.out.println(" Mutants list " + phenMutants);

        // add the rest of parameters to the graph urls
        for (int t = 0; t < controlGeneAcc.size(); t++) {
            controlGeneAcc.set(t, controlGeneAcc.get(t) + urlParams);
            mutantGeneAcc.set(t, mutantGeneAcc.get(t) + urlParams);
        }

        StackedBarsData data = new StackedBarsData();
        data.setUpperBounds(upperBounds);
        data.setControlGenes(controlGenes);
        data.setControlMutatns(controlM);
        data.setMutantGenes(mutantGenes);
        data.setPhenMutants(phenMutants);
        data.setControlGeneAccesionIds(controlGeneAcc);
        data.setMutantGeneAccesionIds(mutantGeneAcc);
        return data;
    }

    return null;
}

From source file:util.Statistics.java

public Statistics(List<Integer> list) {
    scores = intsToDoubles(list);/*from  www . ja va2  s  . c  o m*/
    DescriptiveStatistics dStats = new DescriptiveStatistics(scores);

    summaryStatistics.put("min", dStats.getMin()); // Minimum
    summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1)
    summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2)
    summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3)
    summaryStatistics.put("max", dStats.getMax()); // Maxiumum

    summaryStatistics.put("mean", dStats.getMean()); // Mean
    summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation

    EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS);
    distribution.load(scores);
    List<SummaryStatistics> binStats = distribution.getBinStats();
    double[] upperBounds = distribution.getUpperBounds();

    Double lastUpperBound = upperBounds[0];
    bins.add(new Pair<Pair<Double, Double>, Long>(
            new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN()));
    for (int i = 1; i < binStats.size(); i++) {
        bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]),
                binStats.get(i).getN()));
        lastUpperBound = upperBounds[i];
    }

    if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal
    {
        double mean = dStats.getMean();
        double stDev = dStats.getStandardDeviation();
        NormalDistribution normalDistribution = new NormalDistribution(mean, stDev);

        Iterator<Integer> listIterator = list.iterator();
        double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers
        while (listIterator.hasNext()) {
            int num = listIterator.next();
            double pValue = normalDistribution.cumulativeProbability(num);
            if (pValue < significanceLevel) {
                outliers.add(num);
                listIterator.remove();
            }
        }

        if (list.size() != dStats.getN()) // If and only if outliers have been removed
        {
            double[] significantData = intsToDoubles(list);
            dStats = new DescriptiveStatistics(significantData);

            summaryStatistics.put("min", dStats.getMin());
            summaryStatistics.put("max", dStats.getMax());
            summaryStatistics.put("mean", dStats.getMean());
            summaryStatistics.put("sd", dStats.getStandardDeviation());
        }
    }
}