List of usage examples for org.apache.commons.math.stat.descriptive StatisticalSummary getStandardDeviation
double getStandardDeviation();
From source file:org.datacleaner.beans.NumberAnalyzer.java
@Override public NumberAnalyzerResult getResult() { CrosstabDimension measureDimension = new CrosstabDimension(DIMENSION_MEASURE); measureDimension.addCategory(MEASURE_ROW_COUNT); measureDimension.addCategory(MEASURE_NULL_COUNT); measureDimension.addCategory(MEASURE_HIGHEST_VALUE); measureDimension.addCategory(MEASURE_LOWEST_VALUE); measureDimension.addCategory(MEASURE_SUM); measureDimension.addCategory(MEASURE_MEAN); measureDimension.addCategory(MEASURE_GEOMETRIC_MEAN); measureDimension.addCategory(MEASURE_STANDARD_DEVIATION); measureDimension.addCategory(MEASURE_VARIANCE); measureDimension.addCategory(MEASURE_SECOND_MOMENT); measureDimension.addCategory(MEASURE_SUM_OF_SQUARES); if (descriptiveStatistics) { measureDimension.addCategory(MEASURE_MEDIAN); measureDimension.addCategory(MEASURE_PERCENTILE25); measureDimension.addCategory(MEASURE_PERCENTILE75); measureDimension.addCategory(MEASURE_SKEWNESS); measureDimension.addCategory(MEASURE_KURTOSIS); }/*from w w w . j a va2 s . co m*/ CrosstabDimension columnDimension = new CrosstabDimension(DIMENSION_COLUMN); for (InputColumn<? extends Number> column : _columns) { columnDimension.addCategory(column.getName()); } Crosstab<Number> crosstab = new Crosstab<Number>(Number.class, columnDimension, measureDimension); for (InputColumn<? extends Number> column : _columns) { CrosstabNavigator<Number> nav = crosstab.navigate().where(columnDimension, column.getName()); NumberAnalyzerColumnDelegate delegate = _columnDelegates.get(column); StatisticalSummary s = delegate.getStatistics(); int nullCount = delegate.getNullCount(); nav.where(measureDimension, MEASURE_NULL_COUNT).put(nullCount); if (nullCount > 0) { addAttachment(nav, delegate.getNullAnnotation(), column); } int numRows = delegate.getNumRows(); nav.where(measureDimension, MEASURE_ROW_COUNT).put(numRows); long nonNullCount = s.getN(); if (nonNullCount > 0) { final double highestValue = s.getMax(); final double lowestValue = s.getMin(); final double sum = s.getSum(); final double mean = s.getMean(); final double standardDeviation = s.getStandardDeviation(); final double variance = s.getVariance(); final double geometricMean; final double secondMoment; final double sumOfSquares; if (descriptiveStatistics) { final DescriptiveStatistics descriptiveStats = (DescriptiveStatistics) s; geometricMean = descriptiveStats.getGeometricMean(); sumOfSquares = descriptiveStats.getSumsq(); secondMoment = new SecondMoment().evaluate(descriptiveStats.getValues()); } else { final SummaryStatistics summaryStats = (SummaryStatistics) s; geometricMean = summaryStats.getGeometricMean(); secondMoment = summaryStats.getSecondMoment(); sumOfSquares = summaryStats.getSumsq(); } nav.where(measureDimension, MEASURE_HIGHEST_VALUE).put(highestValue); addAttachment(nav, delegate.getMaxAnnotation(), column); nav.where(measureDimension, MEASURE_LOWEST_VALUE).put(lowestValue); addAttachment(nav, delegate.getMinAnnotation(), column); nav.where(measureDimension, MEASURE_SUM).put(sum); nav.where(measureDimension, MEASURE_MEAN).put(mean); nav.where(measureDimension, MEASURE_GEOMETRIC_MEAN).put(geometricMean); nav.where(measureDimension, MEASURE_STANDARD_DEVIATION).put(standardDeviation); nav.where(measureDimension, MEASURE_VARIANCE).put(variance); nav.where(measureDimension, MEASURE_SUM_OF_SQUARES).put(sumOfSquares); nav.where(measureDimension, MEASURE_SECOND_MOMENT).put(secondMoment); if (descriptiveStatistics) { final DescriptiveStatistics descriptiveStatistics = (DescriptiveStatistics) s; final double kurtosis = descriptiveStatistics.getKurtosis(); final double skewness = descriptiveStatistics.getSkewness(); final double median = descriptiveStatistics.getPercentile(50.0); final double percentile25 = descriptiveStatistics.getPercentile(25.0); final double percentile75 = descriptiveStatistics.getPercentile(75.0); nav.where(measureDimension, MEASURE_MEDIAN).put(median); nav.where(measureDimension, MEASURE_PERCENTILE25).put(percentile25); nav.where(measureDimension, MEASURE_PERCENTILE75).put(percentile75); nav.where(measureDimension, MEASURE_SKEWNESS).put(skewness); nav.where(measureDimension, MEASURE_KURTOSIS).put(kurtosis); } } } return new NumberAnalyzerResult(_columns, crosstab); }
From source file:org.datacleaner.beans.NumberAnalyzerResultReducer.java
@Override protected Serializable reduceValues(List<Object> slaveValues, String column, String measure, Collection<? extends NumberAnalyzerResult> results, Class<?> valueClass) { if (SUM_MEASURES.contains(measure)) { return sum(slaveValues); } else if (NumberAnalyzer.MEASURE_HIGHEST_VALUE.equals(measure)) { return maximum(slaveValues); } else if (NumberAnalyzer.MEASURE_LOWEST_VALUE.equals(measure)) { return minimum(slaveValues); } else if (NumberAnalyzer.MEASURE_MEAN.equals(measure)) { StatisticalSummary summary = getSummary(column, results); return summary.getMean(); } else if (NumberAnalyzer.MEASURE_STANDARD_DEVIATION.equals(measure)) { StatisticalSummary summary = getSummary(column, results); return summary.getStandardDeviation(); } else if (NumberAnalyzer.MEASURE_VARIANCE.equals(measure)) { StatisticalSummary summary = getSummary(column, results); return summary.getVariance(); }// w ww.j av a 2 s .c om logger.warn("Encountered non-reduceable measure '{}'. Slave values are: {}", measure, slaveValues); return null; }