Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getGeometricMean

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getGeometricMean

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getGeometricMean.

Prototype

public double getGeometricMean() 

Source Link

Document

Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> geometric mean </a> of the available values

Usage

From source file:com.facebook.presto.tests.AbstractTestQueries.java

@Test
public void testTableSamplePoissonizedRescaled() throws Exception {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(BIGINT))
            .getMaterializedRows().get(0).getField(0);

    for (int i = 0; i < 100; i++) {
        String value = (String) computeActual(
                "SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED APPROXIMATE AT 95 CONFIDENCE")
                        .getMaterializedRows().get(0).getField(0);
        stats.addValue(Long.parseLong(value.split(" ")[0]) * 1.0 / total);
    }//www .j  a  v a 2 s . co  m

    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean));
    assertTrue(stats.getVariance() > 0, "Samples all had the exact same size");
}

From source file:io.prestosql.tests.AbstractTestQueries.java

@Test
public void testTableSampleBernoulli() {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows()
            .size();//from   w  w  w.  ja va2 s  .  c o  m

    for (int i = 0; i < 100; i++) {
        List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)")
                .getMaterializedRows();

        assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
        stats.addValue(values.size() * 1.0 / total);
    }

    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
}

From source file:org.apache.solr.client.solrj.io.eval.DescribeEvaluator.java

@Override
public Object doWork(Object value) throws IOException {

    if (!(value instanceof List<?>)) {
        throw new IOException(
                String.format(Locale.ROOT, "Invalid expression %s - expecting a numeric list but found %s",
                        toExpression(constructingFactory), value.getClass().getSimpleName()));
    }//from w w  w  . j  ava 2  s.c  o m

    // we know each value is a BigDecimal or a list of BigDecimals
    DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
    ((List<?>) value).stream().mapToDouble(innerValue -> ((BigDecimal) innerValue).doubleValue())
            .forEach(innerValue -> descriptiveStatistics.addValue(innerValue));

    Map<String, Number> map = new HashMap<>();
    map.put("max", descriptiveStatistics.getMax());
    map.put("mean", descriptiveStatistics.getMean());
    map.put("min", descriptiveStatistics.getMin());
    map.put("stdev", descriptiveStatistics.getStandardDeviation());
    map.put("sum", descriptiveStatistics.getSum());
    map.put("N", descriptiveStatistics.getN());
    map.put("var", descriptiveStatistics.getVariance());
    map.put("kurtosis", descriptiveStatistics.getKurtosis());
    map.put("skewness", descriptiveStatistics.getSkewness());
    map.put("popVar", descriptiveStatistics.getPopulationVariance());
    map.put("geometricMean", descriptiveStatistics.getGeometricMean());
    map.put("sumsq", descriptiveStatistics.getSumsq());

    return new Tuple(map);
}

From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.fuzzydedupe.CrossDomainDupesReducer.java

@Override
public void reduce(TextBytes key, Iterator<TextBytes> values, OutputCollector<TextBytes, TextBytes> output,
        Reporter reporter) throws IOException {

    filter.clear();/*from w  ww.java 2 s  .c  om*/
    double crossDomainDupesCount = 0;
    double totalHitsCount = 0;
    double uniqueRootDomainsCount = 0;
    double uniqueIPs = 0;
    double validDupePatternMatches = 0;

    URLFPV2 rootFP = URLUtils.getURLFPV2FromHost(key.toString());
    URLFPV2 fp = new URLFPV2();
    int sampleCount = 0;
    ArrayList<Integer> ipAddresses = new ArrayList<Integer>();
    JsonArray thisHostsDupes = new JsonArray();
    DescriptiveStatistics lengthStats = new DescriptiveStatistics();

    while (values.hasNext()) {
        JsonArray jsonArray = parser.parse(values.next().toString()).getAsJsonArray();
        for (JsonElement elem : jsonArray) {
            totalHitsCount++;
            fp.setRootDomainHash(elem.getAsJsonObject().get("dh").getAsLong());
            if (fp.getRootDomainHash() != rootFP.getRootDomainHash()) {
                crossDomainDupesCount++;
                fp.setDomainHash(fp.getRootDomainHash());
                fp.setUrlHash(fp.getRootDomainHash());
                // track length average ....
                lengthStats.addValue(elem.getAsJsonObject().get("length").getAsInt());

                if (!filter.isPresent(fp)) {
                    uniqueRootDomainsCount++;
                    filter.add(fp);
                    if (sampleCount < samples.length) {
                        String url = elem.getAsJsonObject().get("url").getAsString();
                        GoogleURL urlObject = new GoogleURL(url);
                        if (knownValidDupesPatterns.matcher(urlObject.getCanonicalURL()).find()) {
                            validDupePatternMatches++;
                        }
                        samples[sampleCount++] = url;
                    }
                }
            } else {
                thisHostsDupes.add(elem);
            }

            int ipAddress = elem.getAsJsonObject().get("ip").getAsInt();

            fp.setRootDomainHash(ipAddress);
            fp.setDomainHash(ipAddress);
            fp.setUrlHash(ipAddress);

            if (!filter.isPresent(fp)) {
                uniqueIPs++;
                filter.add(fp);
                ipAddresses.add(ipAddress);
            }
        }
    }

    if (totalHitsCount > 15 && crossDomainDupesCount >= 2) {

        double otherDomainToLocalScore = otherDomainToLocalDomainScore(totalHitsCount, crossDomainDupesCount);
        double spamIPScore = spamHostScore(totalHitsCount, crossDomainDupesCount, uniqueIPs);

        if (otherDomainToLocalScore >= .50 || spamIPScore > .50) {
            JsonObject objectOut = new JsonObject();

            objectOut.addProperty("ratio", (crossDomainDupesCount / totalHitsCount));
            objectOut.addProperty("totalHits", totalHitsCount);
            objectOut.addProperty("crossDomainDupes", crossDomainDupesCount);
            objectOut.addProperty("uniqueRootDomains", uniqueRootDomainsCount);
            objectOut.addProperty("otherDomainToLocalScore", otherDomainToLocalScore);
            objectOut.addProperty("spamIPScore", spamIPScore);
            objectOut.addProperty("validDupeMatches", validDupePatternMatches);
            objectOut.addProperty("content-len-mean", lengthStats.getMean());
            objectOut.addProperty("content-len-geo-mean", lengthStats.getGeometricMean());

            for (int i = 0; i < sampleCount; ++i) {
                objectOut.addProperty("sample-" + i, samples[i]);
            }
            // compute path edit distance ...
            if (sampleCount > 1) {
                int sampleEditDistanceSize = Math.min(sampleCount, 5);
                DescriptiveStatistics stats = new DescriptiveStatistics();
                for (int j = 0; j < sampleEditDistanceSize; ++j) {
                    for (int k = 0; k < sampleEditDistanceSize; ++k) {
                        if (k != j) {
                            GoogleURL urlObjectA = new GoogleURL(samples[j]);
                            GoogleURL urlObjectB = new GoogleURL(samples[k]);

                            if (urlObjectA.getPath().length() < 100 && urlObjectB.getPath().length() < 100) {
                                stats.addValue(StringUtils.getLevenshteinDistance(urlObjectA.getPath(),
                                        urlObjectB.getPath()));
                            }
                        }
                    }
                }
                if (stats.getMean() != 0.0) {
                    objectOut.addProperty("lev-distance-mean", stats.getMean());
                    objectOut.addProperty("lev-distance-geomean", stats.getGeometricMean());
                }
            }

            JsonArray ipAddressArray = new JsonArray();
            for (int j = 0; j < Math.min(1000, ipAddresses.size()); ++j) {
                ipAddressArray.add(new JsonPrimitive(ipAddresses.get(j)));
            }
            if (ipAddresses.size() != 0) {
                objectOut.add("ipList", ipAddressArray);
            }
            objectOut.add("thisHostDupes", thisHostsDupes);

            output.collect(key, new TextBytes(objectOut.toString()));
        }
    }

}

From source file:org.deidentifier.arx.aggregates.StatisticsBuilder.java

/**
 * Returns summary statistics for all attributes.
 * /* w w  w .  j a  va  2s  .com*/
 * @param listwiseDeletion A flag enabling list-wise deletion
 * @return
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) {

    // Reset stop flag
    interrupt.value = false;

    Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>();
    Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>();
    Map<String, DataScale> scales = new HashMap<String, DataScale>();
    Map<String, GeometricMean> geomean = new HashMap<String, GeometricMean>();

    // Detect scales
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Meta
        String attribute = handle.getAttributeName(col);
        DataType<?> type = handle.getDataType(attribute);

        // Scale
        DataScale scale = type.getDescription().getScale();

        // Try to replace nominal scale with ordinal scale based on base data type
        if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) {
            if (!(handle.getBaseDataType(attribute) instanceof ARXString) && getHierarchy(col, true) != null) {
                scale = DataScale.ORDINAL;
            }
        }

        // Store
        scales.put(attribute, scale);
        statistics.put(attribute, new DescriptiveStatistics());
        geomean.put(attribute, new GeometricMean());
        ordinal.put(attribute, getSummaryStatisticsOrdinal(handle.getGeneralization(attribute),
                handle.getDataType(attribute), handle.getBaseDataType(attribute), getHierarchy(col, true)));
    }

    // Compute summary statistics
    for (int row = 0; row < handle.getNumRows(); row++) {

        // Check, if we should include this row
        boolean include = true;
        if (listwiseDeletion) {
            for (int col = 0; col < handle.getNumColumns(); col++) {
                if (handle.isOutlier(row) || DataType.isNull(handle.getValue(row, col))) {
                    include = false;
                    break;
                }
            }
        }

        // Check
        checkInterrupt();

        // If yes, add
        if (include) {

            // For each column
            for (int col = 0; col < handle.getNumColumns(); col++) {

                // Meta
                String value = handle.getValue(row, col);
                String attribute = handle.getAttributeName(col);
                DataType<?> type = handle.getDataType(attribute);

                // Analyze
                if (!DataType.isAny(value) && !DataType.isNull(value)) {
                    ordinal.get(attribute).addValue(value);
                    if (type instanceof DataTypeWithRatioScale) {
                        double doubleValue = ((DataTypeWithRatioScale) type).toDouble(type.parse(value));
                        statistics.get(attribute).addValue(doubleValue);
                        geomean.get(attribute).increment(doubleValue + 1d);
                    }
                }
            }
        }
    }

    // Convert
    Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>();
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Check
        checkInterrupt();

        // Depending on scale
        String attribute = handle.getAttributeName(col);
        DataScale scale = scales.get(attribute);
        DataType<T> type = (DataType<T>) handle.getDataType(attribute);
        ordinal.get(attribute).analyze();
        if (scale == DataScale.NOMINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute, new StatisticsSummary<T>(DataScale.NOMINAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode())));
        } else if (scale == DataScale.ORDINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute,
                    new StatisticsSummary<T>(DataScale.ORDINAL, stats.getNumberOfMeasures(), stats.getMode(),
                            type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()),
                            stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                            type.parse(stats.getMax())));
        } else if (scale == DataScale.INTERVAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            boolean isPeriod = type.getDescription().getWrappedClass() == Date.class;

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.INTERVAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), isPeriod, true), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), isPeriod, true),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, isPeriod, false), toValue(type, stddev), stddev,
                    toString(type, range, isPeriod, false), toValue(type, range),
                    stats2.getMax() - stats2.getMin(), toString(type, kurtosis, isPeriod, false),
                    toValue(type, kurtosis), kurtosis));
        } else if (scale == DataScale.RATIO) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            GeometricMean geo = geomean.get(attribute);

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.RATIO, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), false, false), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), false, false),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, false, false), toValue(type, stddev), stddev,
                    toString(type, range, false, false), toValue(type, range), range,
                    toString(type, kurtosis, false, false), toValue(type, kurtosis), kurtosis,
                    toString(type, geo.getResult() - 1d, false, false), toValue(type, geo.getResult() - 1d),
                    stats2.getGeometricMean()));
        }
    }

    return result;
}

From source file:org.lightjason.agentspeak.action.buildin.math.statistic.EStatisticValue.java

/**
 * returns a statistic value/* ww  w. ja va  2 s.  com*/
 *
 * @param p_statistic statistic object
 * @return statistic value
 */
public final double value(final DescriptiveStatistics p_statistic) {
    switch (this) {
    case GEOMETRICMEAN:
        return p_statistic.getGeometricMean();

    case MAX:
        return p_statistic.getMax();

    case MIN:
        return p_statistic.getMin();

    case COUNT:
        return p_statistic.getN();

    case POPULATIONVARIANCE:
        return p_statistic.getPopulationVariance();

    case QUADRATICMEAN:
        return p_statistic.getQuadraticMean();

    case STANDARDDEVIATION:
        return p_statistic.getStandardDeviation();

    case SUM:
        return p_statistic.getSum();

    case SUMSQUARE:
        return p_statistic.getSumsq();

    case VARIANCE:
        return p_statistic.getVariance();

    case MEAN:
        return p_statistic.getMean();

    case KURTIOSIS:
        return p_statistic.getKurtosis();

    default:
        throw new CIllegalStateException(
                org.lightjason.agentspeak.common.CCommon.languagestring(this, "unknown", this));
    }
}

From source file:org.lightjason.agentspeak.action.builtin.math.statistic.EStatisticValue.java

/**
 * returns a statistic value/*from  ww w .  j a va  2  s .co  m*/
 *
 * @param p_statistic statistic object
 * @return statistic value
 */
public final double value(@Nonnull final DescriptiveStatistics p_statistic) {
    switch (this) {
    case GEOMETRICMEAN:
        return p_statistic.getGeometricMean();

    case MAX:
        return p_statistic.getMax();

    case MIN:
        return p_statistic.getMin();

    case COUNT:
        return p_statistic.getN();

    case POPULATIONVARIANCE:
        return p_statistic.getPopulationVariance();

    case QUADRATICMEAN:
        return p_statistic.getQuadraticMean();

    case STANDARDDEVIATION:
        return p_statistic.getStandardDeviation();

    case SUM:
        return p_statistic.getSum();

    case SUMSQUARE:
        return p_statistic.getSumsq();

    case VARIANCE:
        return p_statistic.getVariance();

    case MEAN:
        return p_statistic.getMean();

    case KURTIOSIS:
        return p_statistic.getKurtosis();

    default:
        throw new CIllegalStateException(
                org.lightjason.agentspeak.common.CCommon.languagestring(this, "unknown", this));
    }
}

From source file:org.obiba.opal.web.magma.Dtos.java

@SuppressWarnings({ "OverlyLongMethod", "PMD.NcssMethodCount" })
public static Math.ContinuousSummaryDto.Builder asDto(ContinuousVariableSummary summary) {
    DescriptiveStatistics descriptiveStats = summary.getDescriptiveStats();

    Math.DescriptiveStatsDto.Builder descriptiveBuilder = Math.DescriptiveStatsDto.newBuilder()
            .setN(descriptiveStats.getN()).addAllPercentiles(summary.getPercentiles());

    if (isNumeric(descriptiveStats.getMin()))
        descriptiveBuilder.setMin(descriptiveStats.getMin());
    if (isNumeric(descriptiveStats.getMax()))
        descriptiveBuilder.setMax(descriptiveStats.getMax());
    if (isNumeric(descriptiveStats.getMean()))
        descriptiveBuilder.setMean(descriptiveStats.getMean());
    if (isNumeric(descriptiveStats.getSum()))
        descriptiveBuilder.setSum(descriptiveStats.getSum());
    if (isNumeric(descriptiveStats.getSumsq()))
        descriptiveBuilder.setSumsq(descriptiveStats.getSumsq());
    if (isNumeric(descriptiveStats.getStandardDeviation())) {
        descriptiveBuilder.setStdDev(descriptiveStats.getStandardDeviation());
    }/*from  ww w .  j  a v a 2s  .  co m*/
    if (isNumeric(descriptiveStats.getVariance()))
        descriptiveBuilder.setVariance(descriptiveStats.getVariance());
    if (isNumeric(descriptiveStats.getSkewness()))
        descriptiveBuilder.setSkewness(descriptiveStats.getSkewness());
    if (isNumeric(descriptiveStats.getGeometricMean())) {
        descriptiveBuilder.setGeometricMean(descriptiveStats.getGeometricMean());
    }
    if (isNumeric(descriptiveStats.getKurtosis()))
        descriptiveBuilder.setKurtosis(descriptiveStats.getKurtosis());
    double median = descriptiveStats.apply(new Median());
    if (isNumeric(median))
        descriptiveBuilder.setMedian(median);
    if (isNumeric(descriptiveStats.getVariance()))
        descriptiveBuilder.setVariance(descriptiveStats.getVariance());

    Math.ContinuousSummaryDto.Builder continuousBuilder = Math.ContinuousSummaryDto.newBuilder()
            .addAllDistributionPercentiles(summary.getDistributionPercentiles());
    for (IntervalFrequency.Interval interval : summary.getIntervalFrequencies()) {
        Math.IntervalFrequencyDto.Builder freqBuilder = Math.IntervalFrequencyDto.newBuilder()
                .setFreq(interval.getFreq());
        if (isNumeric(interval.getLower()))
            freqBuilder.setLower(interval.getLower());
        if (isNumeric(interval.getUpper()))
            freqBuilder.setUpper(interval.getUpper());
        if (isNumeric(interval.getDensity()))
            freqBuilder.setDensity(interval.getDensity());
        if (isNumeric(interval.getDensityPct()))
            freqBuilder.setDensityPct(interval.getDensityPct());
        continuousBuilder.addIntervalFrequency(freqBuilder);
    }

    for (ContinuousVariableSummary.Frequency frequency : summary.getFrequencies()) {
        Math.FrequencyDto.Builder freqBuilder = Math.FrequencyDto.newBuilder() //
                .setValue(frequency.getValue()) //
                .setFreq(frequency.getFreq())//
                .setMissing(frequency.isMissing());
        if (isNumeric(frequency.getPct()))
            freqBuilder.setPct(frequency.getPct());
        continuousBuilder.addFrequencies(freqBuilder);
    }

    return continuousBuilder.setSummary(descriptiveBuilder);
}