List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics
public DescriptiveStatistics()
From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.ChiSquareTestCategorical.java
@Override public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet, LinkedList<Attribute> attributesForSplitting) { // checks if the example set is pure, and if it is, it exits the method Attribute label = exampleSet.getAttributes().getLabel(); if (Tools.getAllCategories(exampleSet, label).size() < 2) return attributesForSplitting; // selects the attributes to be evaluated for removal (by calculating // chi-square probability for each attribute) ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>(); ArrayList<Double> attributeProbabilities = new ArrayList<Double>(); for (Attribute attr : attributesForSplitting) if (attr.isNominal()) { // calculate chi-square probability of the attribute double probability = 0; try { long[][] matrixForAttribute = getContigencyTable(exampleSet, attr); ChiSquareTestImpl chiTest = new ChiSquareTestImpl(); probability = chiTest.chiSquareTest(matrixForAttribute); } catch (MathException me) { // System.out.println("Error in calculating math formula (chiTest)"); }// w ww .j a v a2 s.c o m // add the attribute to the list attributesToRemove.add(attr); attributeProbabilities.add(new Double(probability)); } // calculates the percentile of the required percentage. Percentile // variable in code represents the percentage of attributes to be kept // (not removed) double percentile; DescriptiveStatistics stat = new DescriptiveStatistics(); for (Double d : attributeProbabilities) stat.addValue(d.doubleValue()); percentile = stat.getPercentile((1 - Percentage_Remove) * 100); // evaluates attributes and chooses the ones for removal (actually saves // the ones not for removal) Iterator<Attribute> iattr = attributesToRemove.iterator(); Iterator<Double> iprob = attributeProbabilities.iterator(); while (iattr.hasNext()) { iattr.next(); Double prob = iprob.next(); if (Use_Percentage_Instead == 0) { if (prob <= Alpha_Value) { iattr.remove(); iprob.remove(); } } else { if (prob <= percentile) { iattr.remove(); iprob.remove(); } } } // removes the attributes for (Attribute attr : attributesToRemove) attributesForSplitting.remove(attr); return attributesForSplitting; }
From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.FTestNumerical.java
public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet, LinkedList<Attribute> attributesForSplitting) { // checks if the example set is pure, and if it is, it exits the method Attribute label = exampleSet.getAttributes().getLabel(); if (Tools.getAllCategories(exampleSet, label).size() < 2) return attributesForSplitting; // selects the attributes to be evaluated for removal (by calculating // F-test probability for each attribute) ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>(); ArrayList<Double> attributeProbabilities = new ArrayList<Double>(); for (Attribute attr : attributesForSplitting) if (attr.isNumerical()) { // calculate F-test probability of the attribute double probability = 0; try { OneWayAnova fTest = new OneWayAnovaImpl(); List<double[]> paramForFTest = getArraysByLabel(exampleSet, attr); // tests if no arrays for f-test has fewer that 2 elements boolean fTestImpossible = false; for (double[] i : paramForFTest) if (i.length < 2) fTestImpossible = true; // calculates ftest probability if (!fTestImpossible) probability = fTest.anovaPValue(paramForFTest); } catch (Exception e) { // System.out.println("Error in calculating math formula (FTest)"); }/*from w ww.j a v a 2s .com*/ // add the attribute to the list attributesToRemove.add(attr); attributeProbabilities.add(new Double(probability)); } if (attributesToRemove.size() == 0) return attributesForSplitting; // calculates the percentile of the required percentage. Percentile // variable in code represents the percentage of attributes to be kept // (not removed) double percentile; DescriptiveStatistics stat = new DescriptiveStatistics(); for (Double d : attributeProbabilities) stat.addValue(d.doubleValue()); percentile = stat.getPercentile((1 - Percentage_Remove) * 100); // evaluates attributes and chooses the ones for removal (actually saves // the ones not for removal) Iterator<Attribute> iattr = attributesToRemove.iterator(); Iterator<Double> iprob = attributeProbabilities.iterator(); while (iattr.hasNext()) { iattr.next(); Double prob = iprob.next(); if (Use_Percentage_Instead == 0) { if (prob <= Alpha_Value) { iattr.remove(); iprob.remove(); } } else { if (prob <= percentile) { iattr.remove(); iprob.remove(); } } } // removes the attributes for (Attribute attr : attributesToRemove) attributesForSplitting.remove(attr); return attributesForSplitting; }
From source file:uk.ac.ebi.phenotype.service.ObservationService.java
public Map<String, List<DiscreteTimePoint>> getTimeSeriesMutantData(String parameter, List<String> genes, ArrayList<String> strains, String[] center, String[] sex) throws SolrServerException { Map<String, List<DiscreteTimePoint>> finalRes = new HashMap<String, List<DiscreteTimePoint>>(); // <allele_accession, // timeSeriesData> SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":experimental") .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter); String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")" : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\""; if (genes != null && genes.size() > 0) { q += " AND ("; q += (genes.size() > 1) ? ObservationDTO.GENE_ACCESSION_ID + ":\"" + StringUtils.join(genes.toArray(), "\" OR " + ObservationDTO.GENE_ACCESSION_ID + ":\"") + "\"" : ObservationDTO.GENE_ACCESSION_ID + ":\"" + genes.get(0) + "\""; q += ")"; }/*from w w w.j a v a2 s .co m*/ if (center != null && center.length > 0) { q += " AND ("; q += (center.length > 1) ? ObservationDTO.PHENOTYPING_CENTER + ":\"" + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\"" : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\""; q += ")"; } if (sex != null && sex.length == 1) { q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\""; } query.setQuery(q); query.set("group.field", ObservationDTO.GENE_SYMBOL); query.set("group", true); query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT); query.set("group.limit", 100000); // number of documents to be returned // per group query.set("group.sort", ObservationDTO.DISCRETE_POINT + " asc"); query.setRows(10000); // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" + // query); List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues(); // for mutants it doesn't seem we need binning // groups are the alleles for (Group gr : groups) { SolrDocumentList resDocs = gr.getResult(); DescriptiveStatistics stats = new DescriptiveStatistics(); float discreteTime = (float) resDocs.get(0).getFieldValue(ObservationDTO.DISCRETE_POINT); ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>(); for (int i = 0; i < resDocs.getNumFound(); i++) { SolrDocument doc = resDocs.get(i); stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT)); if (discreteTime != (float) doc.getFieldValue(ObservationDTO.DISCRETE_POINT) || i == resDocs.getNumFound() - 1) { // we // are // at // the // end // of // the // document // list // add to list float discreteDataPoint = (float) stats.getMean(); DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint, new Float(stats.getStandardDeviation())); List<Float> errorPair = new ArrayList<>(); Float lower = new Float(discreteDataPoint); Float higher = new Float(discreteDataPoint); errorPair.add(lower); errorPair.add(higher); dp.setErrorPair(errorPair); res.add(dp); // update discrete point discreteTime = Float.valueOf(doc.getFieldValue(ObservationDTO.DISCRETE_POINT).toString()); // update stats stats = new DescriptiveStatistics(); } } // add list finalRes.put(gr.getGroupValue(), res); } return finalRes; }
From source file:uk.ac.ebi.phenotype.service.ObservationService.java
public List<DiscreteTimePoint> getTimeSeriesControlData(String parameter, ArrayList<String> strains, String[] center, String[] sex) throws SolrServerException { ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>(); SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":control") .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter); String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")" : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\""; if (center != null && center.length > 0) { q += " AND ("; q += (center.length > 1)/*from w w w . j a va 2 s. co m*/ ? ObservationDTO.PHENOTYPING_CENTER + ":\"" + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\"" : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\""; q += ")"; } if (sex != null && sex.length == 1) { q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\""; } query.setQuery(q); query.set("group.field", ObservationDTO.DISCRETE_POINT); query.set("group", true); query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT); query.set("group.limit", 100000); // number of documents to be returned // per group query.set("sort", ObservationDTO.DISCRETE_POINT + " asc"); query.setRows(10000); // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" + // query); List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues(); boolean rounding = false; // decide if binning is needed i.e. is the increment points are too // scattered, as for calorimetry if (groups.size() > 30) { // arbitrary value, just piced it because it // seems reasonable for the size of our // graphs if (Float.valueOf(groups.get(groups.size() - 1).getGroupValue()) - Float.valueOf(groups.get(0).getGroupValue()) <= 30) { // then // rounding // will // be // enough rounding = true; } } if (rounding) { int bin = Math.round(Float.valueOf(groups.get(0).getGroupValue())); for (Group gr : groups) { int discreteTime = Math.round(Float.valueOf(gr.getGroupValue())); // for calormetry ignore what's before -5 and after 16 if (parameter.startsWith("IMPC_CAL") || parameter.startsWith("ESLIM_003_001") || parameter.startsWith("M-G-P_003_001")) { if (discreteTime < -5) { continue; } else if (discreteTime > 16) { break; } } float sum = 0; SolrDocumentList resDocs = gr.getResult(); DescriptiveStatistics stats = new DescriptiveStatistics(); for (SolrDocument doc : resDocs) { sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT); stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT)); } if (bin < discreteTime || groups.indexOf(gr) == groups.size() - 1) { // finished // the // groups // of // filled // the // bin float discreteDataPoint = sum / resDocs.getNumFound(); DiscreteTimePoint dp = new DiscreteTimePoint((float) discreteTime, discreteDataPoint, new Float(stats.getStandardDeviation())); List<Float> errorPair = new ArrayList<>(); double std = stats.getStandardDeviation(); Float lower = new Float(discreteDataPoint - std); Float higher = new Float(discreteDataPoint + std); errorPair.add(lower); errorPair.add(higher); dp.setErrorPair(errorPair); res.add(dp); bin = discreteTime; } } } else { for (Group gr : groups) { Float discreteTime = Float.valueOf(gr.getGroupValue()); float sum = 0; SolrDocumentList resDocs = gr.getResult(); DescriptiveStatistics stats = new DescriptiveStatistics(); for (SolrDocument doc : resDocs) { sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT); stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT)); } float discreteDataPoint = sum / resDocs.getNumFound(); DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint, new Float(stats.getStandardDeviation())); List<Float> errorPair = new ArrayList<>(); double std = stats.getStandardDeviation(); Float lower = new Float(discreteDataPoint - std); Float higher = new Float(discreteDataPoint + std); errorPair.add(lower); errorPair.add(higher); dp.setErrorPair(errorPair); res.add(dp); } } return res; }