Example usage for org.apache.commons.math3.stat.descriptive StatisticalSummary getVariance

List of usage examples for org.apache.commons.math3.stat.descriptive StatisticalSummary getVariance

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive StatisticalSummary getVariance.

Prototype

double getVariance();

Source Link

Document

Returns the variance of the available values.

Usage

From source file:joinery.impl.Aggregation.java

@SuppressWarnings("unchecked")
public static <V> DataFrame<V> describe(final DataFrame<V> df) {
    final DataFrame<V> desc = new DataFrame<>();
    for (final Object col : df.columns()) {
        for (final Object row : df.index()) {
            final V value = df.get(row, col);
            if (value instanceof StatisticalSummary) {
                if (!desc.columns().contains(col)) {
                    desc.add(col);/*  ww  w .  jav  a  2s.c  o m*/
                    if (desc.isEmpty()) {
                        for (final Object r : df.index()) {
                            for (final Object stat : Arrays.asList("count", "mean", "std", "var", "max",
                                    "min")) {
                                final Object name = name(df, r, stat);
                                desc.append(name, Collections.<V>emptyList());
                            }
                        }
                    }
                }

                final StatisticalSummary summary = StatisticalSummary.class.cast(value);
                desc.set(name(df, row, "count"), col, (V) new Double(summary.getN()));
                desc.set(name(df, row, "mean"), col, (V) new Double(summary.getMean()));
                desc.set(name(df, row, "std"), col, (V) new Double(summary.getStandardDeviation()));
                desc.set(name(df, row, "var"), col, (V) new Double(summary.getVariance()));
                desc.set(name(df, row, "max"), col, (V) new Double(summary.getMax()));
                desc.set(name(df, row, "min"), col, (V) new Double(summary.getMin()));
            }
        }
    }
    return desc;
}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
 * t statistic </a> to use in comparing the mean of the dataset described by
 * <code>sampleStats</code> to <code>mu</code>.
 * <p>//www  . j  a  v  a 2 s.  c  om
 * This statistic can be used to perform a one sample t-test for the mean.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li><code>observed.getN() &ge; 2</code>.
 * </li></ul></p>
 *
 * @param mu comparison constant
 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
 * @return t statistic
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double t(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats);
    return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
 * comparing the mean of the dataset described by <code>sampleStats</code>
 * with the constant <code>mu</code>.
 * <p>/*from w  w w.  ja v a  2 s .c o m*/
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the mean equals
 * <code>mu</code> in favor of the two-sided alternative that the mean
 * is different from <code>mu</code>. For a one-sided test, divide the
 * returned value by 2.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the test depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The sample must contain at least 2 observations.
 * </li></ul></p>
 *
 * @param mu constant value to compare sample mean against
 * @param sampleStats StatisticalSummary describing sample data
 * @return p-value
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double tTest(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats);
    return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, without the
 * assumption of equal subpopulation variances.  Use
 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
 * compute a t-statistic under the equal variances assumption.
 * <p>//w w w .  j  a v a  2  s  .c om
 * This statistic can be used to perform a two-sample t-test to compare
 * sample means.</p>
 * <p>
 * The returned  t-statistic is</p>
 * <p>
 * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of the first sample;
 * <strong><code> n2</code></strong> is the size of the second sample;
 * <strong><code> m1</code></strong> is the mean of the first sample;
 * <strong><code> m2</code></strong> is the mean of the second sample
 * <strong><code> var1</code></strong> is the variance of the first sample;
 * <strong><code> var2</code></strong> is the variance of the second sample
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double t(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a 2-sample t statistic, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, under the
 * assumption of equal subpopulation variances.  To compute a t-statistic
 * without the equal variances assumption, use
 * {@link #t(StatisticalSummary, StatisticalSummary)}.
 * <p>//from   ww  w .  j  av  a  2  s.  c o  m
 * This statistic can be used to perform a (homoscedastic) two-sample
 * t-test to compare sample means.</p>
 * <p>
 * The t-statistic returned is</p>
 * <p>
 * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of first sample;
 * <strong><code> n2</code></strong> is the size of second sample;
 * <strong><code> m1</code></strong> is the mean of first sample;
 * <strong><code> m2</code></strong> is the mean of second sample
 * and <strong><code>var</code></strong> is the pooled variance estimate:
 * </p><p>
 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 * </p><p>
 * with <strong><code>var1</code></strong> the variance of the first sample and
 * <strong><code>var2</code></strong> the variance of the second sample.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double homoscedasticT(final StatisticalSummary sampleStats1,
        final StatisticalSummary sampleStats2) throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances./* w w w  .ja va2 s .  c  o  m*/
 * <p>
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * The test does not assume that the underlying population variances are
 * equal  and it uses approximated degrees of freedom computed from the
 * sample data to compute the p-value.   To perform the test assuming
 * equal variances, use
 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double tTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances, under the hypothesis of equal subpopulation variances. To
 * perform a test without the equal variances assumption, use
 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
 * <p>/*from   w  ww  .j av a2  s  . c  o  m*/
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * See {@link #homoscedasticT(double[], double[])} for the formula used to
 * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
 * the degrees of freedom.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double homoscedasticTTest(final StatisticalSummary sampleStats1,
        final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticTTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:be.ugent.maf.cellmissy.gui.view.table.model.SingleCellStatSummaryTableModel.java

/**
 * Initialize table/*from  w w w .  j a  v a2  s  .co m*/
 */
private void initTable() {
    // list of summaries from the analysis group: number of rows
    List<StatisticalSummary> statisticalSummaries = singleCellAnalysisGroup.getStatisticalSummaries();
    int size = statisticalSummaries.size();
    // columns: 1 + 6 for statistical numbers
    columnNames = new String[7];
    columnNames[0] = "";
    columnNames[1] = "Max";
    columnNames[2] = "Min";
    columnNames[3] = "Mean";
    columnNames[4] = "N";
    columnNames[5] = "SD";
    columnNames[6] = "Variance";
    singleCellAnalysisGroup.getConditionDataHolders();
    data = new Object[size][columnNames.length];
    // fill in data
    for (int rowIndex = 0; rowIndex < data.length; rowIndex++) {
        data[rowIndex][0] = "Cond "
                + (singleCellAnalysisGroup.getConditionDataHolders().get(rowIndex).getPlateCondition());
        // summary for a row
        StatisticalSummary statisticalSummary = statisticalSummaries.get(rowIndex);
        // distribute statistical objects per columns
        data[rowIndex][1] = statisticalSummary.getMax();
        data[rowIndex][2] = statisticalSummary.getMin();
        data[rowIndex][3] = statisticalSummary.getMean();
        data[rowIndex][4] = statisticalSummary.getN();
        data[rowIndex][5] = statisticalSummary.getStandardDeviation();
        data[rowIndex][6] = statisticalSummary.getVariance();

    }
}

From source file:ijfx.core.stats.DefaultImageStatisticsService.java

@Override
public Map<String, Double> summaryStatisticsToMap(StatisticalSummary summaryStats) {

    Map<String, Double> statistics = new HashMap<>();
    statistics.put(LBL_MEAN, summaryStats.getMean());
    statistics.put(LBL_MIN, summaryStats.getMin());
    statistics.put(LBL_MAX, summaryStats.getMax());
    statistics.put(LBL_SD, summaryStats.getStandardDeviation());
    statistics.put(LBL_VARIANCE, summaryStats.getVariance());
    statistics.put(LBL_PIXEL_COUNT, (double) summaryStats.getN());
    return statistics;
}

From source file:org.briljantframework.data.dataframe.DataFrames.java

/**
 * Presents a summary of the given data frame. For each column of {@code df} the returned summary
 * contains one row. Each row is described by four values, the {@code min}, {@code max},
 * {@code mean} and {@code mode}. The first three are presented for numerical columns and the
 * fourth for categorical.//from   w  w  w.j a v a 2 s  . c o  m
 *
 * <pre>
 * {@code
 * > DataFrame df = MixedDataFrame.of(
 *    "a", Vector.of(1, 2, 3, 4, 5, 6),
 *    "b", Vector.of("a", "b", "b", "b", "e", "f"),
 *    "c", Vector.of(1.1, 1.2, 1.3, 1.4, 1.5, 1.6)
 *  );
 * 
 * > DataFrames.summary(df)
 *    mean   var    std    min    max    mode
 * a  3.500  3.500  1.871  1.000  6.000  6
 * b  NA     NA     NA     NA     NA     f
 * c  1.350  0.035  0.187  1.100  1.600  1.1
 * 
 * [3 rows x 6 columns]
 * }
 * </pre>
 *
 * @param df the data frame
 * @return a data frame summarizing {@code df}
 */
public static DataFrame summary(DataFrame df) {
    DataFrame.Builder builder = new MixedDataFrame.Builder();
    builder.set("mean", VectorType.DOUBLE).set("var", VectorType.DOUBLE).set("std", VectorType.DOUBLE)
            .set("min", VectorType.DOUBLE).set("max", VectorType.DOUBLE).set("mode", VectorType.OBJECT);

    for (Object columnKey : df.getColumnIndex().keySet()) {
        Vector column = df.get(columnKey);
        if (Is.numeric(column)) {
            StatisticalSummary summary = column.collect(Number.class, Collectors.statisticalSummary());
            builder.set(columnKey, "mean", summary.getMean()).set(columnKey, "var", summary.getVariance())
                    .set(columnKey, "std", summary.getStandardDeviation())
                    .set(columnKey, "min", summary.getMin()).set(columnKey, "max", summary.getMax());
        }
        builder.set(columnKey, "mode", column.collect(Collectors.mode()));
    }
    return builder.build();
}