List of usage examples for org.jfree.data.statistics BoxAndWhiskerCalculator calculateQ1
public static double calculateQ1(List values)
From source file:com.diversityarrays.kdxplore.stats.DateSimpleStatistics.java
public DateSimpleStatistics(String statsName, List<KdxSample> samples, Integer nStdDevForOutlier) { super(statsName, Date.class); nSampleMeasurements = samples.size(); Bag<String> bag = new HashBag<>(); List<Long> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : samples) { switch (TraitValue.classify(sm.getTraitValue())) { case NA://from w w w . j av a2 s .c o m ++nNA; break; case SET: try { Date date = dateFormat.parse(sm.getTraitValue()); long millis = date.getTime(); values.add(millis); bag.add(String.valueOf(millis)); } catch (ParseException e) { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: minValue = null; maxValue = null; mode = null; median = null; mean = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = new Date(values.get(0)); median = mean; minValue = mean; maxValue = mean; mode = dateFormat.format(mean); variance = null; stddev = null; nOutliers = null; stderr = null; break; default: Collections.sort(values); minValue = new Date(values.get(0)); maxValue = new Date(values.get(values.size() - 1)); mean = new Date((minValue.getTime() + maxValue.getTime()) / 2); long median_l = StatsUtil.computeLongMedian(values); median = new Date(median_l); List<String> modes = StatsUtil.computeMode(bag, null); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); long millis = Long.parseLong(s); Date d = new Date(millis); sb.append(dateFormat.format(d)); sep = " , "; } mode = sb.toString(); // - - - - // Now for variance, stddev, stderr, nOutliers Instant start = minValue.toInstant(); long meanDays = ChronoUnit.DAYS.between(start, mean.toInstant()); double s2 = 0; for (Long v : values) { s2 += (v - meanDays) * (v - meanDays); } variance = s2 / (nValidValues - 1); stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); int nout = 0; if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); for (Long value : values) { if (value < lowerOutlierThreshold) { ++nout; lowOutliers.add(new Date(value)); } else if (value > upperOutlierThreshold) { ++nout; highOutliers.add(new Date(value)); } if (lowerOutlierThreshold < value || value < upperOutlierThreshold) { ++nout; } } } else { double lowerOutlierThreshold = meanDays - (nStdDevForOutlier * stddev); double upperOutlierThreshold = meanDays + (nStdDevForOutlier * stddev); for (Long v : values) { Date d = new Date(v); long nDays = ChronoUnit.DAYS.between(start, d.toInstant()); if (nDays < lowerOutlierThreshold) { ++nout; lowOutliers.add(d); } else if (nDays > upperOutlierThreshold) { ++nout; highOutliers.add(d); } } } nOutliers = nout; break; } }
From source file:com.diversityarrays.kdxplore.stats.IntegerSimpleStatistics.java
public IntegerSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, Integer nStdDevForOutlier, NumericTraitValidationProcessor tvp) { super(statsName, Integer.class); nSampleMeasurements = sampleMeasurements.size(); long sum = 0; @SuppressWarnings("unused") double ssq = 0; Bag<String> svalues = new HashBag<String>(); List<Integer> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA://from w w w .j a va 2 s .c o m ++nNA; break; case SET: Either<TraitValueType, Number> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { try { Number number = either.right(); if (number == null) { ++nMissing; } else { int i = number.intValue(); sum += i; ssq += i * 1.0 * i; values.add(i); svalues.add(String.valueOf(i)); } } catch (NumberFormatException e) { ++nInvalid; } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: mean = null; mode = null; median = null; minValue = null; maxValue = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = values.get(0).intValue(); mode = mean.toString(); median = mean; minValue = mean; maxValue = mean; variance = null; stddev = null; nOutliers = null; stderr = null; break; default: mean = (int) sum / nValidValues; if (USE_TWO_PASS) { double s2 = 0; for (Number n : values) { double i = n.doubleValue(); s2 += (i - mean) * (i - mean); } variance = s2 / (nValidValues - 1); } else { variance = (ssq - (sum * sum) / nValidValues) / (nValidValues - 1); } stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); Collections.sort(values); minValue = values.get(0).intValue(); maxValue = values.get(values.size() - 1).intValue(); median = StatsUtil.computeIntegerMedian(values); List<String> modes = StatsUtil.computeMode(svalues, tvp); String numberFormat = tvp == null ? null : tvp.getStringNumberFormat(); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, Number> either = tvp.isTraitValueValid(s); if (either.isRight()) { Number number = either.right(); if (Integer.class.isAssignableFrom(tvp.getNumberClass())) { sb.append(number.intValue()); } else if (numberFormat == null) { sb.append(s); } else { sb.append(number.intValue()); } } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); quartile1 = (int) Math.round(q1); quartile3 = (int) Math.round(q3); if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } else { double lowerOutlierThreshold = mean - (nStdDevForOutlier * stddev); double upperOutlierThreshold = mean + (nStdDevForOutlier * stddev); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } break; } }
From source file:com.diversityarrays.kdxplore.stats.DoubleSimpleStatistics.java
public DoubleSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, Integer nStdDevForOutlier, NumericTraitValidationProcessor tvp) { super(statsName, Double.class); String stringFormat = tvp.getStringNumberFormat(); if (tvp.validationRule.isIntegralRange()) { throw new IllegalStateException("ValidationRule is for an integralRange: " + tvp.validationRule); }/*from ww w .j a va2 s.c o m*/ decimalFormat = tvp.validationRule.getDecimalFormat(); nSampleMeasurements = sampleMeasurements.size(); double sum = 0; @SuppressWarnings("unused") double ssq = 0; Bag<String> svalues = new HashBag<String>(); List<Double> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA: ++nNA; break; case SET: Either<TraitValueType, Number> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { try { Number number = either.right(); if (number == null) { ++nMissing; } else { double d = number.doubleValue(); sum += d; ssq += d * d; values.add(d); if (stringFormat == null) { svalues.add(traitValue); } else { svalues.add(String.format(stringFormat, d)); } } } catch (NumberFormatException e) { ++nInvalid; } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: mean = null; mode = null; median = null; minValue = null; maxValue = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = values.get(0); mode = mean.toString(); median = mean; minValue = mean; maxValue = mean; variance = null; stddev = null; nOutliers = null; stderr = null; break; default: mean = sum / nValidValues; if (USE_TWO_PASS) { double s2 = 0; for (Double d : values) { s2 += (d - mean) * (d - mean); } variance = s2 / (nValidValues - 1); } else { variance = (ssq - (sum * sum) / nValidValues) / (nValidValues - 1); } stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); Collections.sort(values); minValue = values.get(0); maxValue = values.get(values.size() - 1); median = StatsUtil.computeDoubleMedian(values); List<String> modes = StatsUtil.computeMode(svalues, tvp); String numberFormat = tvp == null ? null : tvp.getStringNumberFormat(); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, Number> either = tvp.isTraitValueValid(s); if (either.isRight()) { Number number = either.right(); if (Integer.class.isAssignableFrom(tvp.getNumberClass())) { sb.append(number.intValue()); } else if (numberFormat == null) { sb.append(s); } else { sb.append(String.format(numberFormat, number.doubleValue())); } } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); quartile1 = q1; quartile3 = q3; if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } else { double lowerOutlierThreshold = mean - (nStdDevForOutlier * stddev); double upperOutlierThreshold = mean + (nStdDevForOutlier * stddev); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } break; } }