Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics()

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.ChiSquareTestCategorical.java

@Override
public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet,
        LinkedList<Attribute> attributesForSplitting) {

    // checks if the example set is pure, and if it is, it exits the method
    Attribute label = exampleSet.getAttributes().getLabel();
    if (Tools.getAllCategories(exampleSet, label).size() < 2)
        return attributesForSplitting;

    // selects the attributes to be evaluated for removal (by calculating
    // chi-square probability for each attribute)
    ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>();
    ArrayList<Double> attributeProbabilities = new ArrayList<Double>();
    for (Attribute attr : attributesForSplitting)
        if (attr.isNominal()) {
            // calculate chi-square probability of the attribute
            double probability = 0;
            try {
                long[][] matrixForAttribute = getContigencyTable(exampleSet, attr);
                ChiSquareTestImpl chiTest = new ChiSquareTestImpl();
                probability = chiTest.chiSquareTest(matrixForAttribute);
            } catch (MathException me) {
                // System.out.println("Error in calculating math formula (chiTest)");
            }//  w  ww  .j a  v a2 s.c  o  m
            // add the attribute to the list
            attributesToRemove.add(attr);
            attributeProbabilities.add(new Double(probability));
        }

    // calculates the percentile of the required percentage. Percentile
    // variable in code represents the percentage of attributes to be kept
    // (not removed)
    double percentile;
    DescriptiveStatistics stat = new DescriptiveStatistics();
    for (Double d : attributeProbabilities)
        stat.addValue(d.doubleValue());
    percentile = stat.getPercentile((1 - Percentage_Remove) * 100);

    // evaluates attributes and chooses the ones for removal (actually saves
    // the ones not for removal)
    Iterator<Attribute> iattr = attributesToRemove.iterator();
    Iterator<Double> iprob = attributeProbabilities.iterator();
    while (iattr.hasNext()) {
        iattr.next();
        Double prob = iprob.next();
        if (Use_Percentage_Instead == 0) {
            if (prob <= Alpha_Value) {
                iattr.remove();
                iprob.remove();
            }
        } else {
            if (prob <= percentile) {
                iattr.remove();
                iprob.remove();
            }
        }
    }

    // removes the attributes
    for (Attribute attr : attributesToRemove)
        attributesForSplitting.remove(attr);
    return attributesForSplitting;
}

From source file:rs.fon.whibo.GDT.component.removeInsignificantAttributes.FTestNumerical.java

public LinkedList<Attribute> removeAttributes(ExampleSet exampleSet,
        LinkedList<Attribute> attributesForSplitting) {
    // checks if the example set is pure, and if it is, it exits the method
    Attribute label = exampleSet.getAttributes().getLabel();
    if (Tools.getAllCategories(exampleSet, label).size() < 2)
        return attributesForSplitting;

    // selects the attributes to be evaluated for removal (by calculating
    // F-test probability for each attribute)
    ArrayList<Attribute> attributesToRemove = new ArrayList<Attribute>();
    ArrayList<Double> attributeProbabilities = new ArrayList<Double>();
    for (Attribute attr : attributesForSplitting)
        if (attr.isNumerical()) {
            // calculate F-test probability of the attribute
            double probability = 0;
            try {

                OneWayAnova fTest = new OneWayAnovaImpl();
                List<double[]> paramForFTest = getArraysByLabel(exampleSet, attr);

                // tests if no arrays for f-test has fewer that 2 elements
                boolean fTestImpossible = false;
                for (double[] i : paramForFTest)
                    if (i.length < 2)
                        fTestImpossible = true;

                // calculates ftest probability
                if (!fTestImpossible)
                    probability = fTest.anovaPValue(paramForFTest);

            } catch (Exception e) {
                // System.out.println("Error in calculating math formula (FTest)");
            }/*from  w ww.j  a v a 2s .com*/
            // add the attribute to the list
            attributesToRemove.add(attr);
            attributeProbabilities.add(new Double(probability));
        }

    if (attributesToRemove.size() == 0)
        return attributesForSplitting;

    // calculates the percentile of the required percentage. Percentile
    // variable in code represents the percentage of attributes to be kept
    // (not removed)
    double percentile;
    DescriptiveStatistics stat = new DescriptiveStatistics();
    for (Double d : attributeProbabilities)
        stat.addValue(d.doubleValue());
    percentile = stat.getPercentile((1 - Percentage_Remove) * 100);

    // evaluates attributes and chooses the ones for removal (actually saves
    // the ones not for removal)
    Iterator<Attribute> iattr = attributesToRemove.iterator();
    Iterator<Double> iprob = attributeProbabilities.iterator();
    while (iattr.hasNext()) {
        iattr.next();
        Double prob = iprob.next();
        if (Use_Percentage_Instead == 0) {
            if (prob <= Alpha_Value) {
                iattr.remove();
                iprob.remove();
            }
        } else {
            if (prob <= percentile) {
                iattr.remove();
                iprob.remove();
            }
        }
    }

    // removes the attributes
    for (Attribute attr : attributesToRemove)
        attributesForSplitting.remove(attr);
    return attributesForSplitting;

}

From source file:uk.ac.ebi.phenotype.service.ObservationService.java

public Map<String, List<DiscreteTimePoint>> getTimeSeriesMutantData(String parameter, List<String> genes,
        ArrayList<String> strains, String[] center, String[] sex) throws SolrServerException {

    Map<String, List<DiscreteTimePoint>> finalRes = new HashMap<String, List<DiscreteTimePoint>>(); // <allele_accession,
    // timeSeriesData>

    SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":experimental")
            .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter);

    String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\""
            + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")"
            : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\"";

    if (genes != null && genes.size() > 0) {
        q += " AND (";
        q += (genes.size() > 1) ? ObservationDTO.GENE_ACCESSION_ID + ":\""
                + StringUtils.join(genes.toArray(), "\" OR " + ObservationDTO.GENE_ACCESSION_ID + ":\"") + "\""
                : ObservationDTO.GENE_ACCESSION_ID + ":\"" + genes.get(0) + "\"";
        q += ")";
    }/*from  w  w w.j  a  v a2 s .co m*/

    if (center != null && center.length > 0) {
        q += " AND (";
        q += (center.length > 1)
                ? ObservationDTO.PHENOTYPING_CENTER + ":\""
                        + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\""
                : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"";
        q += ")";
    }

    if (sex != null && sex.length == 1) {
        q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\"";
    }

    query.setQuery(q);
    query.set("group.field", ObservationDTO.GENE_SYMBOL);
    query.set("group", true);
    query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT);
    query.set("group.limit", 100000); // number of documents to be returned
    // per group
    query.set("group.sort", ObservationDTO.DISCRETE_POINT + " asc");
    query.setRows(10000);

    // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" +
    // query);
    List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues();
    // for mutants it doesn't seem we need binning
    // groups are the alleles
    for (Group gr : groups) {
        SolrDocumentList resDocs = gr.getResult();
        DescriptiveStatistics stats = new DescriptiveStatistics();
        float discreteTime = (float) resDocs.get(0).getFieldValue(ObservationDTO.DISCRETE_POINT);
        ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>();
        for (int i = 0; i < resDocs.getNumFound(); i++) {
            SolrDocument doc = resDocs.get(i);
            stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            if (discreteTime != (float) doc.getFieldValue(ObservationDTO.DISCRETE_POINT)
                    || i == resDocs.getNumFound() - 1) { // we
                // are
                // at
                // the
                // end
                // of
                // the
                // document
                // list
                // add to list
                float discreteDataPoint = (float) stats.getMean();
                DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint,
                        new Float(stats.getStandardDeviation()));
                List<Float> errorPair = new ArrayList<>();
                Float lower = new Float(discreteDataPoint);
                Float higher = new Float(discreteDataPoint);
                errorPair.add(lower);
                errorPair.add(higher);
                dp.setErrorPair(errorPair);
                res.add(dp);
                // update discrete point
                discreteTime = Float.valueOf(doc.getFieldValue(ObservationDTO.DISCRETE_POINT).toString());
                // update stats
                stats = new DescriptiveStatistics();
            }
        }
        // add list
        finalRes.put(gr.getGroupValue(), res);
    }
    return finalRes;
}

From source file:uk.ac.ebi.phenotype.service.ObservationService.java

public List<DiscreteTimePoint> getTimeSeriesControlData(String parameter, ArrayList<String> strains,
        String[] center, String[] sex) throws SolrServerException {

    ArrayList<DiscreteTimePoint> res = new ArrayList<DiscreteTimePoint>();
    SolrQuery query = new SolrQuery().addFilterQuery(ObservationDTO.BIOLOGICAL_SAMPLE_GROUP + ":control")
            .addFilterQuery(ObservationDTO.PARAMETER_STABLE_ID + ":" + parameter);
    String q = (strains.size() > 1) ? "(" + ObservationDTO.STRAIN_ACCESSION_ID + ":\""
            + StringUtils.join(strains.toArray(), "\" OR " + ObservationDTO.STRAIN_ACCESSION_ID + ":\"") + "\")"
            : ObservationDTO.STRAIN_ACCESSION_ID + ":\"" + strains.get(0) + "\"";

    if (center != null && center.length > 0) {
        q += " AND (";
        q += (center.length > 1)/*from w  w w  .  j a  va  2  s. co m*/
                ? ObservationDTO.PHENOTYPING_CENTER + ":\""
                        + StringUtils.join(center, "\" OR " + ObservationDTO.PHENOTYPING_CENTER + ":\"") + "\""
                : ObservationDTO.PHENOTYPING_CENTER + ":\"" + center[0] + "\"";
        q += ")";
    }

    if (sex != null && sex.length == 1) {
        q += " AND " + ObservationDTO.SEX + ":\"" + sex[0] + "\"";
    }

    query.setQuery(q);
    query.set("group.field", ObservationDTO.DISCRETE_POINT);
    query.set("group", true);
    query.set("fl", ObservationDTO.DATA_POINT + "," + ObservationDTO.DISCRETE_POINT);
    query.set("group.limit", 100000); // number of documents to be returned
    // per group
    query.set("sort", ObservationDTO.DISCRETE_POINT + " asc");
    query.setRows(10000);

    // System.out.println("+_+_+ " + solr.getBaseURL() + "/select?" +
    // query);
    List<Group> groups = solr.query(query).getGroupResponse().getValues().get(0).getValues();
    boolean rounding = false;
    // decide if binning is needed i.e. is the increment points are too
    // scattered, as for calorimetry
    if (groups.size() > 30) { // arbitrary value, just piced it because it
        // seems reasonable for the size of our
        // graphs
        if (Float.valueOf(groups.get(groups.size() - 1).getGroupValue())
                - Float.valueOf(groups.get(0).getGroupValue()) <= 30) { // then
            // rounding
            // will
            // be
            // enough
            rounding = true;
        }
    }
    if (rounding) {
        int bin = Math.round(Float.valueOf(groups.get(0).getGroupValue()));
        for (Group gr : groups) {
            int discreteTime = Math.round(Float.valueOf(gr.getGroupValue()));
            // for calormetry ignore what's before -5 and after 16
            if (parameter.startsWith("IMPC_CAL") || parameter.startsWith("ESLIM_003_001")
                    || parameter.startsWith("M-G-P_003_001")) {
                if (discreteTime < -5) {
                    continue;
                } else if (discreteTime > 16) {
                    break;
                }
            }
            float sum = 0;
            SolrDocumentList resDocs = gr.getResult();
            DescriptiveStatistics stats = new DescriptiveStatistics();
            for (SolrDocument doc : resDocs) {
                sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT);
                stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            }
            if (bin < discreteTime || groups.indexOf(gr) == groups.size() - 1) { // finished
                // the
                // groups
                // of
                // filled
                // the
                // bin
                float discreteDataPoint = sum / resDocs.getNumFound();
                DiscreteTimePoint dp = new DiscreteTimePoint((float) discreteTime, discreteDataPoint,
                        new Float(stats.getStandardDeviation()));
                List<Float> errorPair = new ArrayList<>();
                double std = stats.getStandardDeviation();
                Float lower = new Float(discreteDataPoint - std);
                Float higher = new Float(discreteDataPoint + std);
                errorPair.add(lower);
                errorPair.add(higher);
                dp.setErrorPair(errorPair);
                res.add(dp);
                bin = discreteTime;
            }
        }
    } else {
        for (Group gr : groups) {
            Float discreteTime = Float.valueOf(gr.getGroupValue());
            float sum = 0;
            SolrDocumentList resDocs = gr.getResult();
            DescriptiveStatistics stats = new DescriptiveStatistics();
            for (SolrDocument doc : resDocs) {
                sum += (float) doc.getFieldValue(ObservationDTO.DATA_POINT);
                stats.addValue((float) doc.getFieldValue(ObservationDTO.DATA_POINT));
            }
            float discreteDataPoint = sum / resDocs.getNumFound();
            DiscreteTimePoint dp = new DiscreteTimePoint(discreteTime, discreteDataPoint,
                    new Float(stats.getStandardDeviation()));
            List<Float> errorPair = new ArrayList<>();
            double std = stats.getStandardDeviation();
            Float lower = new Float(discreteDataPoint - std);
            Float higher = new Float(discreteDataPoint + std);
            errorPair.add(lower);
            errorPair.add(higher);
            dp.setErrorPair(errorPair);
            res.add(dp);
        }
    }
    return res;
}