Example usage for org.apache.commons.math.stat.descriptive StatisticalSummary getMean

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive StatisticalSummary getMean.

Prototype

double getMean();

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:org.datacleaner.beans.NumberAnalyzer.java

@Override
public NumberAnalyzerResult getResult() {
    CrosstabDimension measureDimension = new CrosstabDimension(DIMENSION_MEASURE);
    measureDimension.addCategory(MEASURE_ROW_COUNT);
    measureDimension.addCategory(MEASURE_NULL_COUNT);
    measureDimension.addCategory(MEASURE_HIGHEST_VALUE);
    measureDimension.addCategory(MEASURE_LOWEST_VALUE);
    measureDimension.addCategory(MEASURE_SUM);
    measureDimension.addCategory(MEASURE_MEAN);
    measureDimension.addCategory(MEASURE_GEOMETRIC_MEAN);
    measureDimension.addCategory(MEASURE_STANDARD_DEVIATION);
    measureDimension.addCategory(MEASURE_VARIANCE);
    measureDimension.addCategory(MEASURE_SECOND_MOMENT);
    measureDimension.addCategory(MEASURE_SUM_OF_SQUARES);

    if (descriptiveStatistics) {
        measureDimension.addCategory(MEASURE_MEDIAN);
        measureDimension.addCategory(MEASURE_PERCENTILE25);
        measureDimension.addCategory(MEASURE_PERCENTILE75);
        measureDimension.addCategory(MEASURE_SKEWNESS);
        measureDimension.addCategory(MEASURE_KURTOSIS);
    }// w w  w  .  j  a  v a 2  s . co m

    CrosstabDimension columnDimension = new CrosstabDimension(DIMENSION_COLUMN);
    for (InputColumn<? extends Number> column : _columns) {
        columnDimension.addCategory(column.getName());
    }

    Crosstab<Number> crosstab = new Crosstab<Number>(Number.class, columnDimension, measureDimension);
    for (InputColumn<? extends Number> column : _columns) {
        CrosstabNavigator<Number> nav = crosstab.navigate().where(columnDimension, column.getName());
        NumberAnalyzerColumnDelegate delegate = _columnDelegates.get(column);

        StatisticalSummary s = delegate.getStatistics();
        int nullCount = delegate.getNullCount();

        nav.where(measureDimension, MEASURE_NULL_COUNT).put(nullCount);

        if (nullCount > 0) {
            addAttachment(nav, delegate.getNullAnnotation(), column);
        }

        int numRows = delegate.getNumRows();
        nav.where(measureDimension, MEASURE_ROW_COUNT).put(numRows);

        long nonNullCount = s.getN();

        if (nonNullCount > 0) {
            final double highestValue = s.getMax();
            final double lowestValue = s.getMin();
            final double sum = s.getSum();
            final double mean = s.getMean();
            final double standardDeviation = s.getStandardDeviation();
            final double variance = s.getVariance();

            final double geometricMean;
            final double secondMoment;
            final double sumOfSquares;
            if (descriptiveStatistics) {
                final DescriptiveStatistics descriptiveStats = (DescriptiveStatistics) s;
                geometricMean = descriptiveStats.getGeometricMean();
                sumOfSquares = descriptiveStats.getSumsq();
                secondMoment = new SecondMoment().evaluate(descriptiveStats.getValues());
            } else {
                final SummaryStatistics summaryStats = (SummaryStatistics) s;
                geometricMean = summaryStats.getGeometricMean();
                secondMoment = summaryStats.getSecondMoment();
                sumOfSquares = summaryStats.getSumsq();
            }

            nav.where(measureDimension, MEASURE_HIGHEST_VALUE).put(highestValue);
            addAttachment(nav, delegate.getMaxAnnotation(), column);

            nav.where(measureDimension, MEASURE_LOWEST_VALUE).put(lowestValue);
            addAttachment(nav, delegate.getMinAnnotation(), column);

            nav.where(measureDimension, MEASURE_SUM).put(sum);
            nav.where(measureDimension, MEASURE_MEAN).put(mean);
            nav.where(measureDimension, MEASURE_GEOMETRIC_MEAN).put(geometricMean);
            nav.where(measureDimension, MEASURE_STANDARD_DEVIATION).put(standardDeviation);
            nav.where(measureDimension, MEASURE_VARIANCE).put(variance);
            nav.where(measureDimension, MEASURE_SUM_OF_SQUARES).put(sumOfSquares);
            nav.where(measureDimension, MEASURE_SECOND_MOMENT).put(secondMoment);

            if (descriptiveStatistics) {
                final DescriptiveStatistics descriptiveStatistics = (DescriptiveStatistics) s;
                final double kurtosis = descriptiveStatistics.getKurtosis();
                final double skewness = descriptiveStatistics.getSkewness();
                final double median = descriptiveStatistics.getPercentile(50.0);
                final double percentile25 = descriptiveStatistics.getPercentile(25.0);
                final double percentile75 = descriptiveStatistics.getPercentile(75.0);

                nav.where(measureDimension, MEASURE_MEDIAN).put(median);
                nav.where(measureDimension, MEASURE_PERCENTILE25).put(percentile25);
                nav.where(measureDimension, MEASURE_PERCENTILE75).put(percentile75);
                nav.where(measureDimension, MEASURE_SKEWNESS).put(skewness);
                nav.where(measureDimension, MEASURE_KURTOSIS).put(kurtosis);
            }
        }
    }
    return new NumberAnalyzerResult(_columns, crosstab);
}

From source file:org.datacleaner.beans.NumberAnalyzerResultReducer.java

@Override
protected Serializable reduceValues(List<Object> slaveValues, String column, String measure,
        Collection<? extends NumberAnalyzerResult> results, Class<?> valueClass) {

    if (SUM_MEASURES.contains(measure)) {
        return sum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_HIGHEST_VALUE.equals(measure)) {
        return maximum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_LOWEST_VALUE.equals(measure)) {
        return minimum(slaveValues);
    } else if (NumberAnalyzer.MEASURE_MEAN.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getMean();
    } else if (NumberAnalyzer.MEASURE_STANDARD_DEVIATION.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getStandardDeviation();
    } else if (NumberAnalyzer.MEASURE_VARIANCE.equals(measure)) {
        StatisticalSummary summary = getSummary(column, results);
        return summary.getVariance();
    }/*from   w  ww  .j  ava  2s.  c o  m*/

    logger.warn("Encountered non-reduceable measure '{}'. Slave values are: {}", measure, slaveValues);
    return null;
}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
 * t statistic </a> to use in comparing the mean of the dataset described by
 * <code>sampleStats</code> to <code>mu</code>.
 * <p>/*from   ww  w.j  a  v  a2s .co  m*/
 * This statistic can be used to perform a one sample t-test for the mean.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li><code>observed.getN() &ge; 2</code>.
 * </li></ul></p>
 *
 * @param mu comparison constant
 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
 * @return t statistic
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double t(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats);
    return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, without the
 * assumption of equal subpopulation variances.  Use
 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
 * compute a t-statistic under the equal variances assumption.
 * <p>/*from w ww.  j  ava  2  s.  c o  m*/
 * This statistic can be used to perform a two-sample t-test to compare
 * sample means.</p>
 * <p>
  * The returned  t-statistic is</p>
 * <p>
 * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of the first sample;
 * <strong><code> n2</code></strong> is the size of the second sample;
 * <strong><code> m1</code></strong> is the mean of the first sample;
 * <strong><code> m2</code></strong> is the mean of the second sample
 * <strong><code> var1</code></strong> is the variance of the first sample;
 * <strong><code> var2</code></strong> is the variance of the second sample
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double t(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Computes a 2-sample t statistic, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, under the
 * assumption of equal subpopulation variances.  To compute a t-statistic
 * without the equal variances assumption, use
 * {@link #t(StatisticalSummary, StatisticalSummary)}.
 * <p>/* w  ww  . ja va  2s.  c  om*/
 * This statistic can be used to perform a (homoscedastic) two-sample
 * t-test to compare sample means.</p>
 * <p>
 * The t-statistic returned is</p>
 * <p>
 * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of first sample;
 * <strong><code> n2</code></strong> is the size of second sample;
 * <strong><code> m1</code></strong> is the mean of first sample;
 * <strong><code> m2</code></strong> is the mean of second sample
 * and <strong><code>var</code></strong> is the pooled variance estimate:
 * </p><p>
 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 * </p><p>
 * with <strong><code>var1<code></strong> the variance of the first sample and
 * <strong><code>var2</code></strong> the variance of the second sample.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public double homoscedasticT(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
 * comparing the mean of the dataset described by <code>sampleStats</code>
 * with the constant <code>mu</code>.
 * <p>//from  w ww .  j  a  va2  s  .  com
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the mean equals
 * <code>mu</code> in favor of the two-sided alternative that the mean
 * is different from <code>mu</code>. For a one-sided test, divide the
 * returned value by 2.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the test depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The sample must contain at least 2 observations.
 * </li></ul></p>
 *
 * @param mu constant value to compare sample mean against
 * @param sampleStats StatisticalSummary describing sample data
 * @return p-value
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double tTest(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats);
    return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances.// w w w .ja  v a  2s.  c o  m
 * <p>
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * The test does not assume that the underlying population variances are
 * equal  and it uses approximated degrees of freedom computed from the
 * sample data to compute the p-value.   To perform the test assuming
 * equal variances, use
 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double tTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:org.NooLab.math3.stat.inference.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances, under the hypothesis of equal subpopulation variances. To
 * perform a test without the equal variances assumption, use
 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
 * <p>// w w w  . j a  v  a  2 s. c om
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * See {@link #homoscedasticT(double[], double[])} for the formula used to
 * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
 * the degrees of freedom.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double homoscedasticTTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticTTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}