Example usage for org.apache.commons.math3.stat.descriptive StatisticalSummary getVariance

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive StatisticalSummary getVariance.

Prototype

double getVariance();

Source Link

Document

Returns the variance of the available values.

Usage

From source file:joinery.impl.Aggregation.java

@SuppressWarnings("unchecked")
public static <V> DataFrame<V> describe(final DataFrame<V> df) {
    final DataFrame<V> desc = new DataFrame<>();
    for (final Object col : df.columns()) {
        for (final Object row : df.index()) {
            final V value = df.get(row, col);
            if (value instanceof StatisticalSummary) {
                if (!desc.columns().contains(col)) {
                    desc.add(col);/*  ww  w .  jav  a  2s.c  o m*/
                    if (desc.isEmpty()) {
                        for (final Object r : df.index()) {
                            for (final Object stat : Arrays.asList("count", "mean", "std", "var", "max",
                                    "min")) {
                                final Object name = name(df, r, stat);
                                desc.append(name, Collections.<V>emptyList());
                            }
                        }
                    }
                }

                final StatisticalSummary summary = StatisticalSummary.class.cast(value);
                desc.set(name(df, row, "count"), col, (V) new Double(summary.getN()));
                desc.set(name(df, row, "mean"), col, (V) new Double(summary.getMean()));
                desc.set(name(df, row, "std"), col, (V) new Double(summary.getStandardDeviation()));
                desc.set(name(df, row, "var"), col, (V) new Double(summary.getVariance()));
                desc.set(name(df, row, "max"), col, (V) new Double(summary.getMax()));
                desc.set(name(df, row, "min"), col, (V) new Double(summary.getMin()));
            }
        }
    }
    return desc;
}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
 * t statistic </a> to use in comparing the mean of the dataset described by
 * <code>sampleStats</code> to <code>mu</code>.
 * <p>//www  . j  a  v  a 2 s.  c  om
 * This statistic can be used to perform a one sample t-test for the mean.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li><code>observed.getN() &ge; 2</code>.
 * </li></ul></p>
 *
 * @param mu comparison constant
 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
 * @return t statistic
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double t(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats);
    return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
 * comparing the mean of the dataset described by <code>sampleStats</code>
 * with the constant <code>mu</code>.
 * <p>/*from w  w w.  ja v a  2 s .c o m*/
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the mean equals
 * <code>mu</code> in favor of the two-sided alternative that the mean
 * is different from <code>mu</code>. For a one-sided test, divide the
 * returned value by 2.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the test depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The sample must contain at least 2 observations.
 * </li></ul></p>
 *
 * @param mu constant value to compare sample mean against
 * @param sampleStats StatisticalSummary describing sample data
 * @return p-value
 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double tTest(final double mu, final StatisticalSummary sampleStats)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats);
    return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, without the
 * assumption of equal subpopulation variances.  Use
 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
 * compute a t-statistic under the equal variances assumption.
 * <p>//w w w .  j  a v a  2  s  .c om
 * This statistic can be used to perform a two-sample t-test to compare
 * sample means.</p>
 * <p>
 * The returned  t-statistic is</p>
 * <p>
 * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of the first sample;
 * <strong><code> n2</code></strong> is the size of the second sample;
 * <strong><code> m1</code></strong> is the mean of the first sample;
 * <strong><code> m2</code></strong> is the mean of the second sample
 * <strong><code> var1</code></strong> is the variance of the first sample;
 * <strong><code> var2</code></strong> is the variance of the second sample
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double t(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Computes a 2-sample t statistic, comparing the means of the datasets
 * described by two {@link StatisticalSummary} instances, under the
 * assumption of equal subpopulation variances.  To compute a t-statistic
 * without the equal variances assumption, use
 * {@link #t(StatisticalSummary, StatisticalSummary)}.
 * <p>//from   ww  w .  j  av  a  2  s.  c o  m
 * This statistic can be used to perform a (homoscedastic) two-sample
 * t-test to compare sample means.</p>
 * <p>
 * The t-statistic returned is</p>
 * <p>
 * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 * </p><p>
 * where <strong><code>n1</code></strong> is the size of first sample;
 * <strong><code> n2</code></strong> is the size of second sample;
 * <strong><code> m1</code></strong> is the mean of first sample;
 * <strong><code> m2</code></strong> is the mean of second sample
 * and <strong><code>var</code></strong> is the pooled variance estimate:
 * </p><p>
 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 * </p><p>
 * with <strong><code>var1</code></strong> the variance of the first sample and
 * <strong><code>var2</code></strong> the variance of the second sample.
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1 StatisticalSummary describing data from the first sample
 * @param sampleStats2 StatisticalSummary describing data from the second sample
 * @return t statistic
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 */
public static double homoscedasticT(final StatisticalSummary sampleStats1,
        final StatisticalSummary sampleStats2) throws NullArgumentException, NumberIsTooSmallException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances./* w w w  .ja va2 s .  c  o  m*/
 * <p>
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * The test does not assume that the underlying population variances are
 * equal  and it uses approximated degrees of freedom computed from the
 * sample data to compute the p-value.   To perform the test assuming
 * equal variances, use
 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 * here</a></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double tTest(final StatisticalSummary sampleStats1, final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:embedded2.ESecure.TTest.java

/**
 * Returns the <i>observed significance level</i>, or
 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
 * comparing the means of the datasets described by two StatisticalSummary
 * instances, under the hypothesis of equal subpopulation variances. To
 * perform a test without the equal variances assumption, use
 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
 * <p>/*from   w  ww  .j av a2  s  . c  o  m*/
 * The number returned is the smallest significance level
 * at which one can reject the null hypothesis that the two means are
 * equal in favor of the two-sided alternative that they are different.
 * For a one-sided test, divide the returned value by 2.</p>
 * <p>
 * See {@link #homoscedasticT(double[], double[])} for the formula used to
 * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
 * the degrees of freedom.</p>
 * <p>
 * <strong>Usage Note:</strong><br>
 * The validity of the p-value depends on the assumptions of the parametric
 * t-test procedure, as discussed
 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 * </p><p>
 * <strong>Preconditions</strong>: <ul>
 * <li>The datasets described by the two Univariates must each contain
 * at least 2 observations.
 * </li></ul></p>
 *
 * @param sampleStats1  StatisticalSummary describing data from the first sample
 * @param sampleStats2  StatisticalSummary describing data from the second sample
 * @return p-value for t-test
 * @throws NullArgumentException if the sample statistics are <code>null</code>
 * @throws NumberIsTooSmallException if the number of samples is &lt; 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public static double homoscedasticTTest(final StatisticalSummary sampleStats1,
        final StatisticalSummary sampleStats2)
        throws NullArgumentException, NumberIsTooSmallException, MaxCountExceededException {

    checkSampleData(sampleStats1);
    checkSampleData(sampleStats2);
    return homoscedasticTTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
            sampleStats2.getVariance(), sampleStats1.getN(), sampleStats2.getN());

}

From source file:be.ugent.maf.cellmissy.gui.view.table.model.SingleCellStatSummaryTableModel.java

/**
 * Initialize table/*from  w w w .  j a  v a2  s  .co m*/
 */
private void initTable() {
    // list of summaries from the analysis group: number of rows
    List<StatisticalSummary> statisticalSummaries = singleCellAnalysisGroup.getStatisticalSummaries();
    int size = statisticalSummaries.size();
    // columns: 1 + 6 for statistical numbers
    columnNames = new String[7];
    columnNames[0] = "";
    columnNames[1] = "Max";
    columnNames[2] = "Min";
    columnNames[3] = "Mean";
    columnNames[4] = "N";
    columnNames[5] = "SD";
    columnNames[6] = "Variance";
    singleCellAnalysisGroup.getConditionDataHolders();
    data = new Object[size][columnNames.length];
    // fill in data
    for (int rowIndex = 0; rowIndex < data.length; rowIndex++) {
        data[rowIndex][0] = "Cond "
                + (singleCellAnalysisGroup.getConditionDataHolders().get(rowIndex).getPlateCondition());
        // summary for a row
        StatisticalSummary statisticalSummary = statisticalSummaries.get(rowIndex);
        // distribute statistical objects per columns
        data[rowIndex][1] = statisticalSummary.getMax();
        data[rowIndex][2] = statisticalSummary.getMin();
        data[rowIndex][3] = statisticalSummary.getMean();
        data[rowIndex][4] = statisticalSummary.getN();
        data[rowIndex][5] = statisticalSummary.getStandardDeviation();
        data[rowIndex][6] = statisticalSummary.getVariance();

    }
}

From source file:ijfx.core.stats.DefaultImageStatisticsService.java

@Override
public Map<String, Double> summaryStatisticsToMap(StatisticalSummary summaryStats) {

    Map<String, Double> statistics = new HashMap<>();
    statistics.put(LBL_MEAN, summaryStats.getMean());
    statistics.put(LBL_MIN, summaryStats.getMin());
    statistics.put(LBL_MAX, summaryStats.getMax());
    statistics.put(LBL_SD, summaryStats.getStandardDeviation());
    statistics.put(LBL_VARIANCE, summaryStats.getVariance());
    statistics.put(LBL_PIXEL_COUNT, (double) summaryStats.getN());
    return statistics;
}

From source file:org.briljantframework.data.dataframe.DataFrames.java

/**
 * Presents a summary of the given data frame. For each column of {@code df} the returned summary
 * contains one row. Each row is described by four values, the {@code min}, {@code max},
 * {@code mean} and {@code mode}. The first three are presented for numerical columns and the
 * fourth for categorical.//from   w  w  w.j a v a 2 s  . c o  m
 *
 * <pre>
 * {@code
 * > DataFrame df = MixedDataFrame.of(
 *    "a", Vector.of(1, 2, 3, 4, 5, 6),
 *    "b", Vector.of("a", "b", "b", "b", "e", "f"),
 *    "c", Vector.of(1.1, 1.2, 1.3, 1.4, 1.5, 1.6)
 *  );
 * 
 * > DataFrames.summary(df)
 *    mean   var    std    min    max    mode
 * a  3.500  3.500  1.871  1.000  6.000  6
 * b  NA     NA     NA     NA     NA     f
 * c  1.350  0.035  0.187  1.100  1.600  1.1
 * 
 * [3 rows x 6 columns]
 * }
 * </pre>
 *
 * @param df the data frame
 * @return a data frame summarizing {@code df}
 */
public static DataFrame summary(DataFrame df) {
    DataFrame.Builder builder = new MixedDataFrame.Builder();
    builder.set("mean", VectorType.DOUBLE).set("var", VectorType.DOUBLE).set("std", VectorType.DOUBLE)
            .set("min", VectorType.DOUBLE).set("max", VectorType.DOUBLE).set("mode", VectorType.OBJECT);

    for (Object columnKey : df.getColumnIndex().keySet()) {
        Vector column = df.get(columnKey);
        if (Is.numeric(column)) {
            StatisticalSummary summary = column.collect(Number.class, Collectors.statisticalSummary());
            builder.set(columnKey, "mean", summary.getMean()).set(columnKey, "var", summary.getVariance())
                    .set(columnKey, "std", summary.getStandardDeviation())
                    .set(columnKey, "min", summary.getMin()).set(columnKey, "max", summary.getMax());
        }
        builder.set(columnKey, "mode", column.collect(Collectors.mode()));
    }
    return builder.build();
}