Example usage for org.apache.commons.math3.stat.descriptive.moment GeometricMean getResult

List of usage examples for org.apache.commons.math3.stat.descriptive.moment GeometricMean getResult


In this page you can find the example usage for org.apache.commons.math3.stat.descriptive.moment GeometricMean getResult.


public double getResult() 

Source Link


From source file:org.deidentifier.arx.aggregates.StatisticsBuilder.java

 * Returns summary statistics for all attributes.
 * /*from ww  w. jav a  2  s . co m*/
 * @param listwiseDeletion A flag enabling list-wise deletion
 * @return
@SuppressWarnings({ "unchecked", "rawtypes" })
public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) {

    // Reset stop flag
    interrupt.value = false;

    Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>();
    Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>();
    Map<String, DataScale> scales = new HashMap<String, DataScale>();
    Map<String, GeometricMean> geomean = new HashMap<String, GeometricMean>();

    // Detect scales
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Meta
        String attribute = handle.getAttributeName(col);
        DataType<?> type = handle.getDataType(attribute);

        // Scale
        DataScale scale = type.getDescription().getScale();

        // Try to replace nominal scale with ordinal scale based on base data type
        if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) {
            if (!(handle.getBaseDataType(attribute) instanceof ARXString) && getHierarchy(col, true) != null) {
                scale = DataScale.ORDINAL;

        // Store
        scales.put(attribute, scale);
        statistics.put(attribute, new DescriptiveStatistics());
        geomean.put(attribute, new GeometricMean());
        ordinal.put(attribute, getSummaryStatisticsOrdinal(handle.getGeneralization(attribute),
                handle.getDataType(attribute), handle.getBaseDataType(attribute), getHierarchy(col, true)));

    // Compute summary statistics
    for (int row = 0; row < handle.getNumRows(); row++) {

        // Check, if we should include this row
        boolean include = true;
        if (listwiseDeletion) {
            for (int col = 0; col < handle.getNumColumns(); col++) {
                if (handle.isOutlier(row) || DataType.isNull(handle.getValue(row, col))) {
                    include = false;

        // Check

        // If yes, add
        if (include) {

            // For each column
            for (int col = 0; col < handle.getNumColumns(); col++) {

                // Meta
                String value = handle.getValue(row, col);
                String attribute = handle.getAttributeName(col);
                DataType<?> type = handle.getDataType(attribute);

                // Analyze
                if (!DataType.isAny(value) && !DataType.isNull(value)) {
                    if (type instanceof DataTypeWithRatioScale) {
                        double doubleValue = ((DataTypeWithRatioScale) type).toDouble(type.parse(value));
                        geomean.get(attribute).increment(doubleValue + 1d);

    // Convert
    Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>();
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Check

        // Depending on scale
        String attribute = handle.getAttributeName(col);
        DataScale scale = scales.get(attribute);
        DataType<T> type = (DataType<T>) handle.getDataType(attribute);
        if (scale == DataScale.NOMINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute, new StatisticsSummary<T>(DataScale.NOMINAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode())));
        } else if (scale == DataScale.ORDINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
                    new StatisticsSummary<T>(DataScale.ORDINAL, stats.getNumberOfMeasures(), stats.getMode(),
                            type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()),
                            stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
        } else if (scale == DataScale.INTERVAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            boolean isPeriod = type.getDescription().getWrappedClass() == Date.class;

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.INTERVAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), isPeriod, true), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), isPeriod, true),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, isPeriod, false), toValue(type, stddev), stddev,
                    toString(type, range, isPeriod, false), toValue(type, range),
                    stats2.getMax() - stats2.getMin(), toString(type, kurtosis, isPeriod, false),
                    toValue(type, kurtosis), kurtosis));
        } else if (scale == DataScale.RATIO) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            GeometricMean geo = geomean.get(attribute);

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.RATIO, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), false, false), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), false, false),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, false, false), toValue(type, stddev), stddev,
                    toString(type, range, false, false), toValue(type, range), range,
                    toString(type, kurtosis, false, false), toValue(type, kurtosis), kurtosis,
                    toString(type, geo.getResult() - 1d, false, false), toValue(type, geo.getResult() - 1d),

    return result;