Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics() 

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.ChiSquareTestCategorical.java

@Override
public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet,
        LinkedList<Attribute> attributesForSplitting) {

    // checks if the example set is pure, and if it is, it exits the method
    Attribute label = exampleSet.getAttributes().getLabel();
    if (Tools.getAllCategories(exampleSet, label).size() < 2)
        return attributesForSplitting;

    // selects the attributes to be evaluated for removal (by calculating
    // chi-square probability for each attribute)
    ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>();
    ArrayList<Double> attributeProbabilities = new ArrayList<Double>();
    for (Attribute attr : attributesForSplitting)
        if (attr.isNominal()) {
            // calculate chi-square probability of the attribute
            double probability = 0;
            try {
                long[][] matrixForAttribute = getContigencyTable(exampleSet, attr);
                ChiSquareTestImpl chiTest = new ChiSquareTestImpl();
                probability = chiTest.chiSquareTest(matrixForAttribute);
            } catch (MathException me) {
                // System.out.println("Error in calculating math formula (chiTest)");
            }//  w  ww  .j a  v a2 s.c  o  m
            // add the attribute to the list
            attributesToRemove.add(attr);
            attributeProbabilities.add(new Double(probability));
        }

    // calculates the percentile of the required percentage. Percentile
    // variable in code represents the percentage of attributes to be kept
    // (not removed)
    double percentile;
    DescriptiveStatistics stat = new DescriptiveStatistics();
    for (Double d : attributeProbabilities)
        stat.addValue(d.doubleValue());
    percentile = stat.getPercentile((1 - Percentage_Remove) * 100);

    // evaluates attributes and chooses the ones for removal (actually saves
    // the ones not for removal)
    Iterator<Attribute> iattr = attributesToRemove.iterator();
    Iterator<Double> iprob = attributeProbabilities.iterator();
    while (iattr.hasNext()) {
        iattr.next();
        Double prob = iprob.next();
        if (Use_Percentage_Instead == 0) {
            if (prob <= Alpha_Value) {
                iattr.remove();
                iprob.remove();
            }
        } else {
            if (prob <= percentile) {
                iattr.remove();
                iprob.remove();
            }
        }
    }

    // removes the attributes
    for (Attribute attr : attributesToRemove)
        attributesForSplitting.remove(attr);
    return attributesForSplitting;
}

From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.FTestNumerical.java

public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet,
        LinkedList<Attribute> attributesForSplitting) {
    // checks if the example set is pure, and if it is, it exits the method
    Attribute label = exampleSet.getAttributes().getLabel();
    if (Tools.getAllCategories(exampleSet, label).size() < 2)
        return attributesForSplitting;

    // selects the attributes to be evaluated for removal (by calculating
    // F-test probability for each attribute)
    ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>();
    ArrayList<Double> attributeProbabilities = new ArrayList<Double>();
    for (Attribute attr : attributesForSplitting)
        if (attr.isNumerical()) {
            // calculate F-test probability of the attribute
            double probability = 0;
            try {

                OneWayAnova fTest = new OneWayAnovaImpl();
                List<double[]> paramForFTest = getArraysByLabel(exampleSet, attr);

                // tests if no arrays for f-test has fewer that 2 elements
                boolean fTestImpossible = false;
                for (double[] i : paramForFTest)
                    if (i.length < 2)
                        fTestImpossible = true;

                // calculates ftest probability
                if (!fTestImpossible)
                    probability = fTest.anovaPValue(paramForFTest);

            } catch (Exception e) {
                // System.out.println("Error in calculating math formula (FTest)");
            }/*from  w ww.j  a v a 2s .com*/
            // add the attribute to the list
            attributesToRemove.add(attr);
            attributeProbabilities.add(new Double(probability));
        }

    if (attributesToRemove.size() == 0)
        return attributesForSplitting;

    // calculates the percentile of the required percentage. Percentile
    // variable in code represents the percentage of attributes to be kept
    // (not removed)
    double percentile;
    DescriptiveStatistics stat = new DescriptiveStatistics();
    for (Double d : attributeProbabilities)
        stat.addValue(d.doubleValue());
    percentile = stat.getPercentile((1 - Percentage_Remove) * 100);

    // evaluates attributes and chooses the ones for removal (actually saves
    // the ones not for removal)
    Iterator<Attribute> iattr = attributesToRemove.iterator();
    Iterator<Double> iprob = attributeProbabilities.iterator();
    while (iattr.hasNext()) {
        iattr.next();
        Double prob = iprob.next();
        if (Use_Percentage_Instead == 0) {
            if (prob <= Alpha_Value) {
                iattr.remove();
                iprob.remove();
            }
        } else {
            if (prob <= percentile) {
                iattr.remove();
                iprob.remove();
            }
        }
    }

    // removes the attributes
    for (Attribute attr : attributesToRemove)
        attributesForSplitting.remove(attr);
    return attributesForSplitting;

}

From source file:uk.ac.ebi.phenotype.service.ObservationService.java

public Map<String, List<DiscreteTimePoint>> getTimeSeriesMutantData(String parameter, List<String> genes,
        ArrayList<String> strains, String[] center, String[] sex) throws SolrServerException {

    Map<String, List<DiscreteTimePoint>> finalRes = new HashMap<String, List<DiscreteTimePoint>>(); // <allele_accession,
    // timeSeriesData>

    SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":experimental")
            .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter);

    String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\""
            + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")"
            : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\"";

    if (genes != null && genes.size() > 0) {
        q += " AND (";
        q += (genes.size() > 1) ? ObservationDTO.GENE_ACCESSION_ID + ":\""
                + StringUtils.join(genes.toArray(), "\" OR " + ObservationDTO.GENE_ACCESSION_ID + ":\"") + "\""
                : ObservationDTO.GENE_ACCESSION_ID + ":\"" + genes.get(0) + "\"";
        q += ")";
    }/*from  w  w w.j  a  v a2 s .co m*/

    if (center != null && center.length > 0) {
        q += " AND (";
        q += (center.length > 1)
                ? ObservationDTO.PHENOTYPING_CENTER + ":\""
                        + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\""
                : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"";
        q += ")";
    }

    if (sex != null && sex.length == 1) {
        q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\"";
    }

    query.setQuery(q);
    query.set("group.field", ObservationDTO.GENE_SYMBOL);
    query.set("group", true);
    query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT);
    query.set("group.limit", 100000); // number of documents to be returned
    // per group
    query.set("group.sort", ObservationDTO.DISCRETE_POINT + " asc");
    query.setRows(10000);

    // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" +
    // query);
    List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues();
    // for mutants it doesn't seem we need binning
    // groups are the alleles
    for (Group gr : groups) {
        SolrDocumentList resDocs = gr.getResult();
        DescriptiveStatistics stats = new DescriptiveStatistics();
        float discreteTime = (float) resDocs.get(0).getFieldValue(ObservationDTO.DISCRETE_POINT);
        ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>();
        for (int i = 0; i < resDocs.getNumFound(); i++) {
            SolrDocument doc = resDocs.get(i);
            stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            if (discreteTime != (float) doc.getFieldValue(ObservationDTO.DISCRETE_POINT)
                    || i == resDocs.getNumFound() - 1) { // we
                // are
                // at
                // the
                // end
                // of
                // the
                // document
                // list
                // add to list
                float discreteDataPoint = (float) stats.getMean();
                DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint,
                        new Float(stats.getStandardDeviation()));
                List<Float> errorPair = new ArrayList<>();
                Float lower = new Float(discreteDataPoint);
                Float higher = new Float(discreteDataPoint);
                errorPair.add(lower);
                errorPair.add(higher);
                dp.setErrorPair(errorPair);
                res.add(dp);
                // update discrete point
                discreteTime = Float.valueOf(doc.getFieldValue(ObservationDTO.DISCRETE_POINT).toString());
                // update stats
                stats = new DescriptiveStatistics();
            }
        }
        // add list
        finalRes.put(gr.getGroupValue(), res);
    }
    return finalRes;
}

From source file:uk.ac.ebi.phenotype.service.ObservationService.java

public List<DiscreteTimePoint> getTimeSeriesControlData(String parameter, ArrayList<String> strains,
        String[] center, String[] sex) throws SolrServerException {

    ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>();
    SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":control")
            .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter);
    String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\""
            + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")"
            : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\"";

    if (center != null && center.length > 0) {
        q += " AND (";
        q += (center.length > 1)/*from w  w w  .  j a  va  2  s. co m*/
                ? ObservationDTO.PHENOTYPING_CENTER + ":\""
                        + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\""
                : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"";
        q += ")";
    }

    if (sex != null && sex.length == 1) {
        q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\"";
    }

    query.setQuery(q);
    query.set("group.field", ObservationDTO.DISCRETE_POINT);
    query.set("group", true);
    query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT);
    query.set("group.limit", 100000); // number of documents to be returned
    // per group
    query.set("sort", ObservationDTO.DISCRETE_POINT + " asc");
    query.setRows(10000);

    // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" +
    // query);
    List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues();
    boolean rounding = false;
    // decide if binning is needed i.e. is the increment points are too
    // scattered, as for calorimetry
    if (groups.size() > 30) { // arbitrary value, just piced it because it
        // seems reasonable for the size of our
        // graphs
        if (Float.valueOf(groups.get(groups.size() - 1).getGroupValue())
                - Float.valueOf(groups.get(0).getGroupValue()) <= 30) { // then
            // rounding
            // will
            // be
            // enough
            rounding = true;
        }
    }
    if (rounding) {
        int bin = Math.round(Float.valueOf(groups.get(0).getGroupValue()));
        for (Group gr : groups) {
            int discreteTime = Math.round(Float.valueOf(gr.getGroupValue()));
            // for calormetry ignore what's before -5 and after 16
            if (parameter.startsWith("IMPC_CAL") || parameter.startsWith("ESLIM_003_001")
                    || parameter.startsWith("M-G-P_003_001")) {
                if (discreteTime < -5) {
                    continue;
                } else if (discreteTime > 16) {
                    break;
                }
            }
            float sum = 0;
            SolrDocumentList resDocs = gr.getResult();
            DescriptiveStatistics stats = new DescriptiveStatistics();
            for (SolrDocument doc : resDocs) {
                sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT);
                stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            }
            if (bin < discreteTime || groups.indexOf(gr) == groups.size() - 1) { // finished
                // the
                // groups
                // of
                // filled
                // the
                // bin
                float discreteDataPoint = sum / resDocs.getNumFound();
                DiscreteTimePoint dp = new DiscreteTimePoint((float) discreteTime, discreteDataPoint,
                        new Float(stats.getStandardDeviation()));
                List<Float> errorPair = new ArrayList<>();
                double std = stats.getStandardDeviation();
                Float lower = new Float(discreteDataPoint - std);
                Float higher = new Float(discreteDataPoint + std);
                errorPair.add(lower);
                errorPair.add(higher);
                dp.setErrorPair(errorPair);
                res.add(dp);
                bin = discreteTime;
            }
        }
    } else {
        for (Group gr : groups) {
            Float discreteTime = Float.valueOf(gr.getGroupValue());
            float sum = 0;
            SolrDocumentList resDocs = gr.getResult();
            DescriptiveStatistics stats = new DescriptiveStatistics();
            for (SolrDocument doc : resDocs) {
                sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT);
                stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            }
            float discreteDataPoint = sum / resDocs.getNumFound();
            DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint,
                    new Float(stats.getStandardDeviation()));
            List<Float> errorPair = new ArrayList<>();
            double std = stats.getStandardDeviation();
            Float lower = new Float(discreteDataPoint - std);
            Float higher = new Float(discreteDataPoint + std);
            errorPair.add(lower);
            errorPair.add(higher);
            dp.setErrorPair(errorPair);
            res.add(dp);
        }
    }
    return res;
}