List of usage examples for org.apache.commons.math3.random EmpiricalDistribution getUpperBounds
public double[] getUpperBounds()
Returns a fresh copy of the array of upper bounds for the bins.
From source file:uk.ac.ebi.phenotype.service.StatisticalResultService.java
public StackedBarsData getUnidimensionalData(Parameter p, List<String> genes, ArrayList<String> strains, String biologicalSample, String[] center, String[] sex) throws SolrServerException { String urlParams = ""; SolrQuery query = new SolrQuery() .addFilterQuery(StatisticalResultDTO.PARAMETER_STABLE_ID + ":" + p.getStableId()); String q = "*:*"; query.addFilterQuery((strains.size() > 1) ? "(" + StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\"" + StringUtils.join(strains.toArray(), "\" OR " + StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\"") + "\")" : StatisticalResultDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\""); if (strains.size() > 0) { urlParams += "&strain=" + StringUtils.join(strains.toArray(), "&strain="); }//from w ww .ja va 2s . co m if (center != null && center.length > 0) { query.addFilterQuery( "(" + ((center.length > 1) ? StatisticalResultDTO.PHENOTYPING_CENTER + ":\"" + StringUtils.join(center, "\" OR " + StatisticalResultDTO.PHENOTYPING_CENTER + ":\"") + "\"" : StatisticalResultDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"") + ")"); urlParams += "&phenotyping_center=" + StringUtils.join(center, "&phenotyping_center="); } if (sex != null && sex.length == 1) { if (sex[0].equalsIgnoreCase("male")) { query.addFilterQuery(StatisticalResultDTO.MALE_CONTROL_COUNT + ":[4 TO 100000]"); query.addFilterQuery(StatisticalResultDTO.MALE_MUTANT_COUNT + ":[4 TO 100000]"); } else { query.addFilterQuery(StatisticalResultDTO.FEMALE_CONTROL_COUNT + ":[4 TO 100000]"); query.addFilterQuery(StatisticalResultDTO.FEMALE_MUTANT_COUNT + ":[4 TO 100000]"); } } query.setQuery(q); query.addFilterQuery("(" + StatisticalResultDTO.FEMALE_CONTROL_COUNT + ":[4 TO 100000] OR " + StatisticalResultDTO.MALE_CONTROL_COUNT + ":[4 TO 100000])"); query.setRows(10000000); query.setFields(StatisticalResultDTO.MARKER_ACCESSION_ID, StatisticalResultDTO.FEMALE_CONTROL_MEAN, StatisticalResultDTO.MARKER_SYMBOL, StatisticalResultDTO.FEMALE_MUTANT_MEAN, StatisticalResultDTO.MALE_CONTROL_MEAN, StatisticalResultDTO.MALE_MUTANT_MEAN, StatisticalResultDTO.FEMALE_CONTROL_COUNT, StatisticalResultDTO.FEMALE_MUTANT_COUNT, StatisticalResultDTO.MALE_CONTROL_COUNT, StatisticalResultDTO.MALE_MUTANT_COUNT); query.set("group", true); query.set("group.field", StatisticalResultDTO.COLONY_ID); query.set("group.limit", 1); List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues(); double[] meansArray = new double[groups.size()]; String[] genesArray = new String[groups.size()]; String[] geneSymbolArray = new String[groups.size()]; int size = 0; for (Group gr : groups) { SolrDocumentList resDocs = gr.getResult(); String sexToDisplay = null; OverviewRatio overviewRatio = new OverviewRatio(); for (SolrDocument doc : resDocs) { sexToDisplay = getSexToDisplay(sex, sexToDisplay, doc); overviewRatio.add(doc); } if (sexToDisplay != null) { Double ratio = overviewRatio.getPlotRatio(sexToDisplay); if (ratio != null) { genesArray[size] = (String) resDocs.get(0).get(StatisticalResultDTO.MARKER_ACCESSION_ID); geneSymbolArray[size] = (String) resDocs.get(0).get(StatisticalResultDTO.MARKER_SYMBOL); meansArray[size] = ratio; size++; } } } // we do the binning for all the data but fill the bins after that to // keep tract of phenotype associations int binCount = Math.min((int) Math.floor((double) groups.size() / 2), 20); ArrayList<String> mutantGenes = new ArrayList<String>(); ArrayList<String> controlGenes = new ArrayList<String>(); ArrayList<String> mutantGeneAcc = new ArrayList<String>(); ArrayList<String> controlGeneAcc = new ArrayList<String>(); ArrayList<Double> upperBounds = new ArrayList<Double>(); EmpiricalDistribution distribution = new EmpiricalDistribution(binCount); if (size > 0) { distribution.load(ArrayUtils.subarray(meansArray, 0, size - 1)); for (double bound : distribution.getUpperBounds()) { upperBounds.add(bound); } // we we need to distribute the control mutants and the // phenotype-mutants in the bins ArrayList<Double> controlM = new ArrayList<Double>(); ArrayList<Double> phenMutants = new ArrayList<Double>(); for (int j = 0; j < upperBounds.size(); j++) { controlM.add((double) 0); phenMutants.add((double) 0); controlGenes.add(""); mutantGenes.add(""); controlGeneAcc.add(""); mutantGeneAcc.add(""); } for (int j = 0; j < size; j++) { // find out the proper bin int binIndex = getBin(upperBounds, meansArray[j]); if (genes.contains(genesArray[j])) { phenMutants.set(binIndex, 1 + phenMutants.get(binIndex)); String genesString = mutantGenes.get(binIndex); if (!genesString.contains(geneSymbolArray[j])) { if (genesString.equals("")) { mutantGenes.set(binIndex, geneSymbolArray[j]); mutantGeneAcc.set(binIndex, "accession=" + genesArray[j]); } else { mutantGenes.set(binIndex, genesString + ", " + geneSymbolArray[j]); mutantGeneAcc.set(binIndex, mutantGeneAcc.get(binIndex) + "&accession=" + genesArray[j]); } } } else { // treat as control because they don't have this phenotype association String genesString = controlGenes.get(binIndex); if (!genesString.contains(geneSymbolArray[j])) { if (genesString.equalsIgnoreCase("")) { controlGenes.set(binIndex, geneSymbolArray[j]); controlGeneAcc.set(binIndex, "accession=" + genesArray[j]); } else { controlGenes.set(binIndex, genesString + ", " + geneSymbolArray[j]); controlGeneAcc.set(binIndex, controlGeneAcc.get(binIndex) + "&accession=" + genesArray[j]); } } controlM.set(binIndex, 1 + controlM.get(binIndex)); } } // System.out.println(" Mutants list " + phenMutants); // add the rest of parameters to the graph urls for (int t = 0; t < controlGeneAcc.size(); t++) { controlGeneAcc.set(t, controlGeneAcc.get(t) + urlParams); mutantGeneAcc.set(t, mutantGeneAcc.get(t) + urlParams); } StackedBarsData data = new StackedBarsData(); data.setUpperBounds(upperBounds); data.setControlGenes(controlGenes); data.setControlMutatns(controlM); data.setMutantGenes(mutantGenes); data.setPhenMutants(phenMutants); data.setControlGeneAccesionIds(controlGeneAcc); data.setMutantGeneAccesionIds(mutantGeneAcc); return data; } return null; }
From source file:util.Statistics.java
public Statistics(List<Integer> list) { scores = intsToDoubles(list);/*from www . ja va2 s . c o m*/ DescriptiveStatistics dStats = new DescriptiveStatistics(scores); summaryStatistics.put("min", dStats.getMin()); // Minimum summaryStatistics.put("q1", dStats.getPercentile(25)); // Lower Quartile (Q1) summaryStatistics.put("q2", dStats.getPercentile(50)); // Middle Quartile (Median - Q2) summaryStatistics.put("q3", dStats.getPercentile(75)); // High Quartile (Q3) summaryStatistics.put("max", dStats.getMax()); // Maxiumum summaryStatistics.put("mean", dStats.getMean()); // Mean summaryStatistics.put("sd", dStats.getStandardDeviation()); // Standard Deviation EmpiricalDistribution distribution = new EmpiricalDistribution(NUM_BINS); distribution.load(scores); List<SummaryStatistics> binStats = distribution.getBinStats(); double[] upperBounds = distribution.getUpperBounds(); Double lastUpperBound = upperBounds[0]; bins.add(new Pair<Pair<Double, Double>, Long>( new Pair<Double, Double>(summaryStatistics.get("min"), lastUpperBound), binStats.get(0).getN())); for (int i = 1; i < binStats.size(); i++) { bins.add(new Pair<Pair<Double, Double>, Long>(new Pair<Double, Double>(lastUpperBound, upperBounds[i]), binStats.get(i).getN())); lastUpperBound = upperBounds[i]; } if (list.size() > 5 && dStats.getStandardDeviation() > 0) // Only remove outliers if relatively normal { double mean = dStats.getMean(); double stDev = dStats.getStandardDeviation(); NormalDistribution normalDistribution = new NormalDistribution(mean, stDev); Iterator<Integer> listIterator = list.iterator(); double significanceLevel = .50 / list.size(); // Chauvenet's Criterion for Outliers while (listIterator.hasNext()) { int num = listIterator.next(); double pValue = normalDistribution.cumulativeProbability(num); if (pValue < significanceLevel) { outliers.add(num); listIterator.remove(); } } if (list.size() != dStats.getN()) // If and only if outliers have been removed { double[] significantData = intsToDoubles(list); dStats = new DescriptiveStatistics(significantData); summaryStatistics.put("min", dStats.getMin()); summaryStatistics.put("max", dStats.getMax()); summaryStatistics.put("mean", dStats.getMean()); summaryStatistics.put("sd", dStats.getStandardDeviation()); } } }