Example usage for org.apache.commons.math.stat.descriptive StatisticalSummary getMean

List of usage examples for org.apache.commons.math.stat.descriptive StatisticalSummary getMean

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive StatisticalSummary getMean.

Prototype

double getMean();

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:org.datacleaner.beans.NumberAnalyzer.java

@Override
public NumberAnalyzerResult getResult() {
    CrosstabDimension measureDimension = new CrosstabDimension(DIMENSION_MEASURE);
    measureDimension.addCategory(MEASURE_ROW_COUNT);
    measureDimension.addCategory(MEASURE_NULL_COUNT);
    measureDimension.addCategory(MEASURE_HIGHEST_VALUE);
    measureDimension.addCategory(MEASURE_LOWEST_VALUE);
    measureDimension.addCategory(MEASURE_SUM);
    measureDimension.addCategory(MEASURE_MEAN);
    measureDimension.addCategory(MEASURE_GEOMETRIC_MEAN);
    measureDimension.addCategory(MEASURE_STANDARD_DEVIATION);
    measureDimension.addCategory(MEASURE_VARIANCE);
    measureDimension.addCategory(MEASURE_SECOND_MOMENT);
    measureDimension.addCategory(MEASURE_SUM_OF_SQUARES);

    if (descriptiveStatistics) {
        measureDimension.addCategory(MEASURE_MEDIAN);
        measureDimension.addCategory(MEASURE_PERCENTILE25);
        measureDimension.addCategory(MEASURE_PERCENTILE75);
        measureDimension.addCategory(MEASURE_SKEWNESS);
        measureDimension.addCategory(MEASURE_KURTOSIS);
    }// w w  w  .  j  a  v a 2  s . co m

    CrosstabDimension columnDimension = new CrosstabDimension(DIMENSION_COLUMN);
    for (InputColumn<? extends Number> column : _columns) {
        columnDimension.addCategory(column.getName());
    }

    Crosstab<Number> crosstab = new Crosstab<Number>(Number.class, columnDimension, measureDimension);
    for (InputColumn<? extends Number> column : _columns) {
        CrosstabNavigator<Number> nav = crosstab.navigate().where(columnDimension, column.getName());
        NumberAnalyzerColumnDelegate delegate = _columnDelegates.get(column);

        StatisticalSummary s = delegate.getStatistics();
        int nullCount = delegate.getNullCount();

        nav.where(measureDimension, MEASURE_NULL_COUNT).put(nullCount);

        if (nullCount > 0) {
            addAttachment(nav, delegate.getNullAnnotation(), column);
        }

        int numRows = delegate.getNumRows();
        nav.where(measureDimension, MEASURE_ROW_COUNT).put(numRows);

        long nonNullCount = s.getN();

        if (nonNullCount > 0) {
            final double highestValue = s.getMax();
            final double lowestValue = s.getMin();
            final double sum = s.getSum();
            final double mean = s.getMean();
            final double standardDeviation = s.getStandardDeviation();
            final double variance = s.getVariance();

            final double geometricMean;
            final double secondMoment;
            final double sumOfSquares;
            if (descriptiveStatistics) {
                final DescriptiveStatistics descriptiveStats = (DescriptiveStatistics) s;
                geometricMean = descriptiveStats.getGeometricMean();
                sumOfSquares = descriptiveStats.getSumsq();
                secondMoment = new SecondMoment().evaluate(descriptiveStats.getValues());
            } else {
                final SummaryStatistics summaryStats = (SummaryStatistics) s;
                geometricMean = summaryStats.getGeometricMean();
                secondMoment = summaryStats.getSecondMoment();
                sumOfSquares = summaryStats.getSumsq();
            }

            nav.where(measureDimension, MEASURE_HIGHEST_VALUE).put(highestValue);
            addAttachment(nav, delegate.getMaxAnnotation(), column);

            nav.where(measureDimension, MEASURE_LOWEST_VALUE).put(lowestValue);
            addAttachment(nav, delegate.getMinAnnotation(), column);

            nav.where(measureDimension, MEASURE_SUM).put(sum);
            nav.where(measureDimension, MEASURE_MEAN).put(mean);
            nav.where(measureDimension, MEASURE_GEOMETRIC_MEAN).put(geometricMean);
            nav.where(measureDimension, MEASURE_STANDARD_DEVIATION).put(standardDeviation);
            nav.where(measureDimension, MEASURE_VARIANCE).put(variance);
            nav.where(measureDimension, MEASURE_SUM_OF_SQUARES).put(sumOfSquares);
            nav.where(measureDimension, MEASURE_SECOND_MOMENT).put(secondMoment);

            if (descriptiveStatistics) {
                final DescriptiveStatistics descriptiveStatistics = (DescriptiveStatistics) s;
                final double kurtosis = descriptiveStatistics.getKurtosis();
                final double skewness = descriptiveStatistics.getSkewness();
                final double median = descriptiveStatistics.getPercentile(50.0);
                final double percentile25 = descriptiveStatistics.getPercentile(25.0);
                final double percentile75 = descriptiveStatistics.getPercentile(75.0);

                nav.where(measureDimension, MEASURE_MEDIAN).put(median);
                nav.where(measureDimension, MEASURE_PERCENTILE25).put(percentile25);
                nav.where(measureDimension, MEASURE_PERCENTILE75).put(percentile75);
                nav.where(measureDimension, MEASURE_SKEWNESS).put(skewness);
                nav.where(measureDimension, MEASURE_KURTOSIS).put(kurtosis);
            }
        }
    }
    return new NumberAnalyzerResult(_columns, crosstab);
}

From source file:org.datacleaner.beans.NumberAnalyzerResultReducer.java

@Override
protected Serializable reduceValues(List<Object> slaveValues, String column, String measure,
        Collection<? extends NumberAnalyzerResult> results, Class<?> valueClass) {

    if (SUM_MEASURES.contains(measure)) {
        return sum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_HIGHEST_VALUE.equals(measure)) {
        return maximum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_LOWEST_VALUE.equals(measure)) {
        return minimum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_MEAN.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getMean();
    } else if (NumberAnalyzer.MEASURE_STANDARD_DEVIATION.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getStandardDeviation();
    } else if (NumberAnalyzer.MEASURE_VARIANCE.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getVariance();
    }/*from   w  ww  .j  ava  2s.  c o  m*/

    logger.warn("Encountered non-reduceable measure '{}'. Slave values are: {}", measure, slaveValues);
    return null;
}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
 * t statistic </a> to use in comparing the mean of the dataset described by
 * <code>sampleStats</code> to <code>mu</code>.
 * <p>/*from   ww  w.j  a  v  a2s .co  m*/
 * This statistic can be used to perform a one sample t-test for the mean.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li><code>observed.getN() &ge; 2</code>.
 * </li></ul></p>
 *
 * @param mu comparison constant
 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
 * @return t statistic
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double t(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats);
    return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, without the
 * assumption of equal subpopulation variances.  Use
 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
 * compute a t-statistic under the equal variances assumption.
 * <p>/*from w ww.  j  ava  2  s.  c o  m*/
 * This statistic can be used to perform a two-sample t-test to compare
 * sample means.</p>
 * <p>
  * The returned  t-statistic is</p>
 * <p>
 * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of the first sample;
 * <strong><code> n2</code></strong> is the size of the second sample;
 * <strong><code> m1</code></strong> is the mean of the first sample;
 * <strong><code> m2</code></strong> is the mean of the second sample
 * <strong><code> var1</code></strong> is the variance of the first sample;
 * <strong><code> var2</code></strong> is the variance of the second sample
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double t(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a 2-sample t statistic, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, under the
 * assumption of equal subpopulation variances.  To compute a t-statistic
 * without the equal variances assumption, use
 * {@link #t(StatisticalSummary, StatisticalSummary)}.
 * <p>/* w  ww  . ja va  2s.  c  om*/
 * This statistic can be used to perform a (homoscedastic) two-sample
 * t-test to compare sample means.</p>
 * <p>
 * The t-statistic returned is</p>
 * <p>
 * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of first sample;
 * <strong><code> n2</code></strong> is the size of second sample;
 * <strong><code> m1</code></strong> is the mean of first sample;
 * <strong><code> m2</code></strong> is the mean of second sample
 * and <strong><code>var</code></strong> is the pooled variance estimate:
 * </p><p>
 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 * </p><p>
 * with <strong><code>var1<code></strong> the variance of the first sample and
 * <strong><code>var2</code></strong> the variance of the second sample.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double homoscedasticT(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
 * comparing the mean of the dataset described by <code>sampleStats</code>
 * with the constant <code>mu</code>.
 * <p>//from  w ww .  j  a  va2  s  .  com
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the mean equals
 * <code>mu</code> in favor of the two-sided alternative that the mean
 * is different from <code>mu</code>. For a one-sided test, divide the
 * returned value by 2.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the test depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The sample must contain at least 2 observations.
 * </li></ul></p>
 *
 * @param mu constant value to compare sample mean against
 * @param sampleStats StatisticalSummary describing sample data
 * @return p-value
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double tTest(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats);
    return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances.// w w w .ja  v a  2s.  c o  m
 * <p>
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * The test does not assume that the underlying population variances are
 * equal  and it uses approximated degrees of freedom computed from the
 * sample data to compute the p-value.   To perform the test assuming
 * equal variances, use
 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double tTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances, under the hypothesis of equal subpopulation variances. To
 * perform a test without the equal variances assumption, use
 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
 * <p>// w w w  . j a  v  a  2 s. c om
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * See {@link #homoscedasticT(double[], double[])} for the formula used to
 * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
 * the degrees of freedom.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double homoscedasticTTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticTTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}