List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMax
public double getMax()
From source file:org.apache.metron.statistics.outlier.MedianAbsoluteDeviationTest.java
@Test public void testLongTailed() { TDistribution generator = new TDistribution(new MersenneTwister(0L), 100); DescriptiveStatistics stats = new DescriptiveStatistics(); List<MedianAbsoluteDeviationFunctions.State> states = new ArrayList<>(); MedianAbsoluteDeviationFunctions.State currentState = null; //initialize the state currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, NULL)", ImmutableMap.of("states", states)); for (int i = 0, j = 0; i < 10000; ++i, ++j) { Double d = generator.sample(); stats.addValue(d);/*from ww w . j av a 2 s . c o m*/ run("OUTLIER_MAD_ADD(currentState, data)", ImmutableMap.of("currentState", currentState, "data", d)); if (j >= 1000) { j = 0; List<MedianAbsoluteDeviationFunctions.State> stateWindow = new ArrayList<>(); for (int stateIndex = Math.max(0, states.size() - 5); stateIndex < states.size(); ++stateIndex) { stateWindow.add(states.get(stateIndex)); } currentState = (MedianAbsoluteDeviationFunctions.State) run( "OUTLIER_MAD_STATE_MERGE(states, currentState)", ImmutableMap.of("states", stateWindow, "currentState", currentState)); } } { Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMin())); Assert.assertTrue("Score: " + score + " is not an outlier despite being a minimum.", score > 3.5); } { Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMax())); Assert.assertTrue("Score: " + score + " is not an outlier despite being a maximum", score > 3.5); } { Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() + 4 * stats.getStandardDeviation())); Assert.assertTrue( "Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5); } { Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() - 4 * stats.getStandardDeviation())); Assert.assertTrue( "Score: " + score + " is not an outlier despite being 4 std deviations away from the mean", score > 3.5); } { Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean())); Assert.assertFalse("Score: " + score + " is an outlier despite being the mean", score > 3.5); } }
From source file:org.apache.metron.statistics.StatisticalBinningPerformanceDriver.java
public static void main(String... argv) { DescriptiveStatistics perfStats = new DescriptiveStatistics(); OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider(); List<Double> values = new ArrayList<>(); GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(0L)); for (int i = 0; i < NUM_DATA_POINTS; ++i) { //get the data point out of the [0,1] range double d = 1000 * gaussian.nextNormalizedDouble(); values.add(d);//from w ww .jav a 2 s.c o m statsProvider.addValue(d); } for (int perfRun = 0; perfRun < NUM_RUNS; ++perfRun) { StellarStatisticsFunctions.StatsBin bin = new StellarStatisticsFunctions.StatsBin(); long start = System.currentTimeMillis(); Random r = new Random(0); for (int i = 0; i < TRIALS_PER_RUN; ++i) { //grab a random value and fuzz it a bit so we make sure there's no cheating via caching in t-digest. bin.apply(ImmutableList.of(statsProvider, values.get(r.nextInt(values.size())) - 3.5, PERCENTILES)); } perfStats.addValue(System.currentTimeMillis() - start); } System.out.println("Min/25th/50th/75th/Max Milliseconds: " + perfStats.getMin() + " / " + perfStats.getPercentile(25) + " / " + perfStats.getPercentile(50) + " / " + perfStats.getPercentile(75) + " / " + perfStats.getMax()); }
From source file:org.apache.solr.client.solrj.io.eval.DescribeEvaluator.java
@Override public Object doWork(Object value) throws IOException { if (!(value instanceof List<?>)) { throw new IOException( String.format(Locale.ROOT, "Invalid expression %s - expecting a numeric list but found %s", toExpression(constructingFactory), value.getClass().getSimpleName())); }/*from w w w . j a v a2 s. c om*/ // we know each value is a BigDecimal or a list of BigDecimals DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(); ((List<?>) value).stream().mapToDouble(innerValue -> ((BigDecimal) innerValue).doubleValue()) .forEach(innerValue -> descriptiveStatistics.addValue(innerValue)); Map<String, Number> map = new HashMap<>(); map.put("max", descriptiveStatistics.getMax()); map.put("mean", descriptiveStatistics.getMean()); map.put("min", descriptiveStatistics.getMin()); map.put("stdev", descriptiveStatistics.getStandardDeviation()); map.put("sum", descriptiveStatistics.getSum()); map.put("N", descriptiveStatistics.getN()); map.put("var", descriptiveStatistics.getVariance()); map.put("kurtosis", descriptiveStatistics.getKurtosis()); map.put("skewness", descriptiveStatistics.getSkewness()); map.put("popVar", descriptiveStatistics.getPopulationVariance()); map.put("geometricMean", descriptiveStatistics.getGeometricMean()); map.put("sumsq", descriptiveStatistics.getSumsq()); return new Tuple(map); }
From source file:org.deidentifier.arx.aggregates.StatisticsBuilder.java
/** * Returns summary statistics for all attributes. * //from w w w . j av a 2 s .c om * @param listwiseDeletion A flag enabling list-wise deletion * @return */ @SuppressWarnings({ "unchecked", "rawtypes" }) public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) { // Reset stop flag interrupt.value = false; Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>(); Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>(); Map<String, DataScale> scales = new HashMap<String, DataScale>(); Map<String, GeometricMean> geomean = new HashMap<String, GeometricMean>(); // Detect scales for (int col = 0; col < handle.getNumColumns(); col++) { // Meta String attribute = handle.getAttributeName(col); DataType<?> type = handle.getDataType(attribute); // Scale DataScale scale = type.getDescription().getScale(); // Try to replace nominal scale with ordinal scale based on base data type if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) { if (!(handle.getBaseDataType(attribute) instanceof ARXString) && getHierarchy(col, true) != null) { scale = DataScale.ORDINAL; } } // Store scales.put(attribute, scale); statistics.put(attribute, new DescriptiveStatistics()); geomean.put(attribute, new GeometricMean()); ordinal.put(attribute, getSummaryStatisticsOrdinal(handle.getGeneralization(attribute), handle.getDataType(attribute), handle.getBaseDataType(attribute), getHierarchy(col, true))); } // Compute summary statistics for (int row = 0; row < handle.getNumRows(); row++) { // Check, if we should include this row boolean include = true; if (listwiseDeletion) { for (int col = 0; col < handle.getNumColumns(); col++) { if (handle.isOutlier(row) || DataType.isNull(handle.getValue(row, col))) { include = false; break; } } } // Check checkInterrupt(); // If yes, add if (include) { // For each column for (int col = 0; col < handle.getNumColumns(); col++) { // Meta String value = handle.getValue(row, col); String attribute = handle.getAttributeName(col); DataType<?> type = handle.getDataType(attribute); // Analyze if (!DataType.isAny(value) && !DataType.isNull(value)) { ordinal.get(attribute).addValue(value); if (type instanceof DataTypeWithRatioScale) { double doubleValue = ((DataTypeWithRatioScale) type).toDouble(type.parse(value)); statistics.get(attribute).addValue(doubleValue); geomean.get(attribute).increment(doubleValue + 1d); } } } } } // Convert Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>(); for (int col = 0; col < handle.getNumColumns(); col++) { // Check checkInterrupt(); // Depending on scale String attribute = handle.getAttributeName(col); DataScale scale = scales.get(attribute); DataType<T> type = (DataType<T>) handle.getDataType(attribute); ordinal.get(attribute).analyze(); if (scale == DataScale.NOMINAL) { StatisticsSummaryOrdinal stats = ordinal.get(attribute); result.put(attribute, new StatisticsSummary<T>(DataScale.NOMINAL, stats.getNumberOfMeasures(), stats.getMode(), type.parse(stats.getMode()))); } else if (scale == DataScale.ORDINAL) { StatisticsSummaryOrdinal stats = ordinal.get(attribute); result.put(attribute, new StatisticsSummary<T>(DataScale.ORDINAL, stats.getNumberOfMeasures(), stats.getMode(), type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(), type.parse(stats.getMax()))); } else if (scale == DataScale.INTERVAL) { StatisticsSummaryOrdinal stats = ordinal.get(attribute); DescriptiveStatistics stats2 = statistics.get(attribute); boolean isPeriod = type.getDescription().getWrappedClass() == Date.class; // TODO: Something is wrong with commons math's kurtosis double kurtosis = stats2.getKurtosis(); kurtosis = kurtosis < 0d ? Double.NaN : kurtosis; double range = stats2.getMax() - stats2.getMin(); double stddev = Math.sqrt(stats2.getVariance()); result.put(attribute, new StatisticsSummary<T>(DataScale.INTERVAL, stats.getNumberOfMeasures(), stats.getMode(), type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(), type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false), toValue(type, stats2.getMean()), stats2.getMean(), toString(type, stats2.getVariance(), isPeriod, true), toValue(type, stats2.getVariance()), stats2.getVariance(), toString(type, stats2.getPopulationVariance(), isPeriod, true), toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(), toString(type, stddev, isPeriod, false), toValue(type, stddev), stddev, toString(type, range, isPeriod, false), toValue(type, range), stats2.getMax() - stats2.getMin(), toString(type, kurtosis, isPeriod, false), toValue(type, kurtosis), kurtosis)); } else if (scale == DataScale.RATIO) { StatisticsSummaryOrdinal stats = ordinal.get(attribute); DescriptiveStatistics stats2 = statistics.get(attribute); GeometricMean geo = geomean.get(attribute); // TODO: Something is wrong with commons math's kurtosis double kurtosis = stats2.getKurtosis(); kurtosis = kurtosis < 0d ? Double.NaN : kurtosis; double range = stats2.getMax() - stats2.getMin(); double stddev = Math.sqrt(stats2.getVariance()); result.put(attribute, new StatisticsSummary<T>(DataScale.RATIO, stats.getNumberOfMeasures(), stats.getMode(), type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(), type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false), toValue(type, stats2.getMean()), stats2.getMean(), toString(type, stats2.getVariance(), false, false), toValue(type, stats2.getVariance()), stats2.getVariance(), toString(type, stats2.getPopulationVariance(), false, false), toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(), toString(type, stddev, false, false), toValue(type, stddev), stddev, toString(type, range, false, false), toValue(type, range), range, toString(type, kurtosis, false, false), toValue(type, kurtosis), kurtosis, toString(type, geo.getResult() - 1d, false, false), toValue(type, geo.getResult() - 1d), stats2.getGeometricMean())); } } return result; }
From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGeneratorHTML.java
@Override protected void addRow(QueryData queryData) { sb.append("<tr>\n"); // column: ID sb.append("<td>" + queryData.id + "</td>\n"); // column: SPARQL query sb.append("<td><pre>" + queryData.query.toString().replace("<", "<").replace(">", ">") + "</pre></td>\n"); // column: SPARQL query type sb.append("<td>" + queryData.queryType + "</td>\n"); // query graph // QueryToGraphExporter.exportYedGraph(queryData.query, new File("")); // sb.append("<td><img src=\"" + graphFile.getPath() + "\" alt=\"query graph\"></td>\n"); // column: depth sb.append("<td class='number'>" + queryData.maxTreeDepth + "</td>\n"); // column: #instances sb.append("<td class='number'>" + queryData.nrOfInstances + "</td>\n"); // columns: optimal CBD sizes (min, max, avg) DescriptiveStatistics optimalCBDSizeStats = queryData.optimalCBDSizeStats; sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMin() + "</td>\n"); sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMax() + "</td>\n"); sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMean() + "</td>\n"); // columns: generic CBD sizes (min, max, avg) DescriptiveStatistics genericCBDSizeStats = queryData.defaultCBDSizesStats; sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMin() + "</td>\n"); sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMax() + "</td>\n"); sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMean() + "</td>\n"); sb.append("</tr>\n"); }
From source file:org.dllearner.algorithms.qtl.experiments.EvaluationDataset.java
public void analyze() { ConciseBoundedDescriptionGenerator cbdGen = new SymmetricConciseBoundedDescriptionGeneratorImpl( ks.getQueryExecutionFactory()); String separator = "\t"; String tsv = sparqlQueries.entrySet().stream().map(entry -> { StringBuilder sb = new StringBuilder(); // ID//from w w w.j a va2 s.c o m String id = entry.getKey(); sb.append(id).append(separator); // query Query q = entry.getValue(); sb.append(q.toString().replace("\n", " ")); try { // get query result List<String> result = SPARQLUtils.getResult(ks.getQueryExecutionFactory(), q); sb.append(separator).append(result.size()); // query type SPARQLUtils.QueryType queryType = SPARQLUtils.getQueryType(q); sb.append(separator).append(queryType.name()); // check CBD sizes and time Monitor mon = MonitorFactory.getTimeMonitor("CBD"); mon.reset(); DescriptiveStatistics sizeStats = new DescriptiveStatistics(); result.stream().map(r -> { System.out.println(r); mon.start(); Model cbd = cbdGen.getConciseBoundedDescription(r, 2); mon.stop(); return cbd; }).map(Model::size).forEach(sizeStats::addValue); // show min., max. and avg. size sb.append(separator).append(sizeStats.getMin()); sb.append(separator).append(sizeStats.getMax()); sb.append(separator).append(sizeStats.getMean()); // show min., max. and avg. CBD time sb.append(separator).append(mon.getTotal()); sb.append(separator).append(mon.getMin()); sb.append(separator).append(mon.getMax()); sb.append(separator).append(mon.getAvg()); } catch (Exception e) { e.printStackTrace(); } return sb; }).collect(Collectors.joining("\n")); System.out.println(tsv); }
From source file:org.hawkular.client.test.metrics.openshift.CollectionRateDetailTest.java
private void getData(String metricID, String testID, long start, long end, Duration timeBucket) { Reporter.log("Fetching large data set... may take a couple minutes", true); List<DataPoint<Double>> rawData = client().metrics().gauge() .findGaugeDataWithId(metricID, String.valueOf(start), String.valueOf(end), null, null, null) .getEntity();//from w w w . j av a 2s . c om Assert.assertNotNull(rawData, testID); Reporter.log("raw datapoints: " + rawData.size(), true); List<Long> zeroList = findZeroValues(rawData); Assert.assertTrue(zeroList == null || zeroList.size() == 0, testID); Map<Long, Integer> hist = OpenshiftBaseTest.makeHistogram(rawData, timeBucket); Double[] result = hist.entrySet().stream().map(x -> new Double(x.getValue())) .toArray(size -> new Double[size]); double[] d = ArrayUtils.toPrimitive(result); // drop the first and last as they are usually outliers double[] samples = Arrays.copyOfRange(d, 1, d.length - 1); DescriptiveStatistics stats = new DescriptiveStatistics(samples); Reporter.log(hist.toString(), true); Reporter.log("size: " + stats.getN(), true); Reporter.log("min/max: " + stats.getMin() + "/" + stats.getMax(), true); Reporter.log("mean: " + stats.getMean(), true); Reporter.log("variance: " + stats.getVariance(), true); Reporter.log("stddev: " + stats.getStandardDeviation(), true); }
From source file:org.jenetics.stat.DoubleMomentStatisticsTest.java
@Test(dataProvider = "sampleCounts") public void summary(final Integer sampleCounts, final Double epsilon) { final List<Double> numbers = numbers(sampleCounts); final DescriptiveStatistics expected = new DescriptiveStatistics(); numbers.forEach(expected::addValue); final DoubleMomentStatistics summary = numbers.stream() .collect(toDoubleMomentStatistics(Double::doubleValue)); Assert.assertEquals(summary.getCount(), numbers.size()); assertEqualsDouble(min(summary.getMin()), expected.getMin(), 0.0); assertEqualsDouble(max(summary.getMax()), expected.getMax(), 0.0); assertEqualsDouble(summary.getSum(), expected.getSum(), epsilon); assertEqualsDouble(summary.getMean(), expected.getMean(), epsilon); assertEqualsDouble(summary.getVariance(), expected.getVariance(), epsilon); assertEqualsDouble(summary.getSkewness(), expected.getSkewness(), epsilon); assertEqualsDouble(summary.getKurtosis(), expected.getKurtosis(), epsilon); }
From source file:org.jenetics.stat.DoubleMomentStatisticsTest.java
@Test(dataProvider = "parallelSampleCounts") public void parallelSummary(final Integer sampleCounts, final Double epsilon) { final List<Double> numbers = numbers(sampleCounts); final DescriptiveStatistics expected = new DescriptiveStatistics(); numbers.forEach(expected::addValue); final DoubleMomentStatistics summary = numbers.parallelStream() .collect(toDoubleMomentStatistics(Double::doubleValue)); Assert.assertEquals(summary.getCount(), numbers.size()); assertEqualsDouble(min(summary.getMin()), expected.getMin(), 0.0); assertEqualsDouble(max(summary.getMax()), expected.getMax(), 0.0); assertEqualsDouble(summary.getSum(), expected.getSum(), epsilon); assertEqualsDouble(summary.getMean(), expected.getMean(), epsilon); assertEqualsDouble(summary.getVariance(), expected.getVariance(), epsilon); assertEqualsDouble(summary.getSkewness(), expected.getSkewness(), epsilon); assertEqualsDouble(summary.getKurtosis(), expected.getKurtosis(), epsilon); }
From source file:org.jenetics.stat.IntMomentStatisticsTest.java
@Test(dataProvider = "sampleCounts") public void summary(final Integer sampleCounts, final Double epsilon) { final List<Integer> numbers = numbers(sampleCounts); final DescriptiveStatistics expected = new DescriptiveStatistics(); numbers.forEach(expected::addValue); final IntMomentStatistics summary = numbers.stream().collect(toIntMomentStatistics(Integer::intValue)); Assert.assertEquals(summary.getCount(), numbers.size()); assertEqualsDouble(min(summary.getMin()), expected.getMin(), 0.0); assertEqualsDouble(max(summary.getMax()), expected.getMax(), 0.0); assertEqualsDouble(summary.getSum(), expected.getSum(), epsilon); assertEqualsDouble(summary.getMean(), expected.getMean(), epsilon); assertEqualsDouble(summary.getVariance(), expected.getVariance(), epsilon); assertEqualsDouble(summary.getSkewness(), expected.getSkewness(), epsilon); assertEqualsDouble(summary.getKurtosis(), expected.getKurtosis(), epsilon); }