List of usage examples for org.apache.commons.collections15.bag HashBag HashBag
public HashBag()
HashBag
. From source file:com.diversityarrays.kdxplore.stats.TextSimpleStatistics.java
public TextSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, TraitValidationProcessor<String> tvp) { super(statsName, String.class); nSampleMeasurements = sampleMeasurements.size(); Bag<String> svalues = new HashBag<>(); List<String> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA:/* w ww. j a v a 2s .co m*/ ++nNA; break; case SET: Either<TraitValueType, String> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { String tv = either.right(); if (tv == null) { ++nMissing; } else { values.add(tv); svalues.add(traitValue); } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: minValue = null; maxValue = null; mode = null; median = null; break; case 1: mode = values.get(0); median = mode; minValue = mode; maxValue = mode; break; default: Map<String, MixedString> mixedByValue = new HashMap<>(); for (String v : svalues) { MixedString ms = MixedString.createFloatingMixed(v, MixedString.DIFFERENTIATE_ON_STRINGS_WHEN_NUMERICALLY_EQUAL); mixedByValue.put(v, ms); } Comparator<String> comparator = new Comparator<String>() { @Override public int compare(String o1, String o2) { MixedString lft = mixedByValue.get(o1); MixedString ryt = mixedByValue.get(o2); int diff; if (lft == null) { if (ryt == null) { diff = 0; } else { diff = -1; } } else if (ryt == null) { diff = 1; } else { diff = lft.compareTo(ryt); } return diff; } }; Collections.sort(values, comparator); minValue = values.get(0); maxValue = values.get(values.size() - 1); median = StatsUtil.computeStringMedian(values); List<String> modes = StatsUtil.computeMode(svalues, null); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, String> either = tvp.isTraitValueValid(s); if (either.isRight()) { sb.append(either.right()); } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); break; } }
From source file:com.diversityarrays.kdxplore.stats.CategoricalSimpleStatistics.java
public CategoricalSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, CategoricalTraitValidationProcessor tvp) { super(statsName, String.class); nSampleMeasurements = sampleMeasurements.size(); Bag<String> svalues = new HashBag<>(); List<String> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA://from w ww . j ava 2s . c om ++nNA; break; case SET: Either<TraitValueType, String> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { String tv = either.right(); if (tv == null) { ++nMissing; } else { values.add(tv); } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: minValue = null; maxValue = null; mode = null; median = null; break; case 1: mode = values.get(0); median = mode; minValue = mode; maxValue = mode; break; default: Map<String, Integer> orderByChoice = new HashMap<>(); int order = 0; for (String choice : tvp.getChoices()) { orderByChoice.put(choice, ++order); } Comparator<String> comparator = new Comparator<String>() { @Override public int compare(String o1, String o2) { Integer lft = orderByChoice.get(o1); if (lft == null) { lft = 0; } Integer ryt = orderByChoice.get(o2); if (ryt == null) { ryt = 0; } return lft.compareTo(ryt); } }; Collections.sort(values, comparator); minValue = values.get(0); maxValue = values.get(values.size() - 1); median = StatsUtil.computeStringMedian(values); // TODO q1, q3 List<String> modes = StatsUtil.computeMode(svalues, null); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, String> either = tvp.isTraitValueValid(s); if (either.isRight()) { sb.append(either.right()); } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); break; } }
From source file:com.diversityarrays.kdxplore.stats.DateSimpleStatistics.java
public DateSimpleStatistics(String statsName, List<KdxSample> samples, Integer nStdDevForOutlier) { super(statsName, Date.class); nSampleMeasurements = samples.size(); Bag<String> bag = new HashBag<>(); List<Long> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : samples) { switch (TraitValue.classify(sm.getTraitValue())) { case NA://from ww w . j a v a2 s.co m ++nNA; break; case SET: try { Date date = dateFormat.parse(sm.getTraitValue()); long millis = date.getTime(); values.add(millis); bag.add(String.valueOf(millis)); } catch (ParseException e) { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: minValue = null; maxValue = null; mode = null; median = null; mean = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = new Date(values.get(0)); median = mean; minValue = mean; maxValue = mean; mode = dateFormat.format(mean); variance = null; stddev = null; nOutliers = null; stderr = null; break; default: Collections.sort(values); minValue = new Date(values.get(0)); maxValue = new Date(values.get(values.size() - 1)); mean = new Date((minValue.getTime() + maxValue.getTime()) / 2); long median_l = StatsUtil.computeLongMedian(values); median = new Date(median_l); List<String> modes = StatsUtil.computeMode(bag, null); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); long millis = Long.parseLong(s); Date d = new Date(millis); sb.append(dateFormat.format(d)); sep = " , "; } mode = sb.toString(); // - - - - // Now for variance, stddev, stderr, nOutliers Instant start = minValue.toInstant(); long meanDays = ChronoUnit.DAYS.between(start, mean.toInstant()); double s2 = 0; for (Long v : values) { s2 += (v - meanDays) * (v - meanDays); } variance = s2 / (nValidValues - 1); stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); int nout = 0; if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); for (Long value : values) { if (value < lowerOutlierThreshold) { ++nout; lowOutliers.add(new Date(value)); } else if (value > upperOutlierThreshold) { ++nout; highOutliers.add(new Date(value)); } if (lowerOutlierThreshold < value || value < upperOutlierThreshold) { ++nout; } } } else { double lowerOutlierThreshold = meanDays - (nStdDevForOutlier * stddev); double upperOutlierThreshold = meanDays + (nStdDevForOutlier * stddev); for (Long v : values) { Date d = new Date(v); long nDays = ChronoUnit.DAYS.between(start, d.toInstant()); if (nDays < lowerOutlierThreshold) { ++nout; lowOutliers.add(d); } else if (nDays > upperOutlierThreshold) { ++nout; highOutliers.add(d); } } } nOutliers = nout; break; } }
From source file:com.diversityarrays.kdxplore.stats.IntegerSimpleStatistics.java
public IntegerSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, Integer nStdDevForOutlier, NumericTraitValidationProcessor tvp) { super(statsName, Integer.class); nSampleMeasurements = sampleMeasurements.size(); long sum = 0; @SuppressWarnings("unused") double ssq = 0; Bag<String> svalues = new HashBag<String>(); List<Integer> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA:/* w w w.ja v a 2 s. c om*/ ++nNA; break; case SET: Either<TraitValueType, Number> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { try { Number number = either.right(); if (number == null) { ++nMissing; } else { int i = number.intValue(); sum += i; ssq += i * 1.0 * i; values.add(i); svalues.add(String.valueOf(i)); } } catch (NumberFormatException e) { ++nInvalid; } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: mean = null; mode = null; median = null; minValue = null; maxValue = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = values.get(0).intValue(); mode = mean.toString(); median = mean; minValue = mean; maxValue = mean; variance = null; stddev = null; nOutliers = null; stderr = null; break; default: mean = (int) sum / nValidValues; if (USE_TWO_PASS) { double s2 = 0; for (Number n : values) { double i = n.doubleValue(); s2 += (i - mean) * (i - mean); } variance = s2 / (nValidValues - 1); } else { variance = (ssq - (sum * sum) / nValidValues) / (nValidValues - 1); } stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); Collections.sort(values); minValue = values.get(0).intValue(); maxValue = values.get(values.size() - 1).intValue(); median = StatsUtil.computeIntegerMedian(values); List<String> modes = StatsUtil.computeMode(svalues, tvp); String numberFormat = tvp == null ? null : tvp.getStringNumberFormat(); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, Number> either = tvp.isTraitValueValid(s); if (either.isRight()) { Number number = either.right(); if (Integer.class.isAssignableFrom(tvp.getNumberClass())) { sb.append(number.intValue()); } else if (numberFormat == null) { sb.append(s); } else { sb.append(number.intValue()); } } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); quartile1 = (int) Math.round(q1); quartile3 = (int) Math.round(q3); if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } else { double lowerOutlierThreshold = mean - (nStdDevForOutlier * stddev); double upperOutlierThreshold = mean + (nStdDevForOutlier * stddev); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } break; } }
From source file:com.diversityarrays.kdxplore.stats.DoubleSimpleStatistics.java
public DoubleSimpleStatistics(String statsName, List<KdxSample> sampleMeasurements, Integer nStdDevForOutlier, NumericTraitValidationProcessor tvp) { super(statsName, Double.class); String stringFormat = tvp.getStringNumberFormat(); if (tvp.validationRule.isIntegralRange()) { throw new IllegalStateException("ValidationRule is for an integralRange: " + tvp.validationRule); }/*w ww . j a v a 2 s. c o m*/ decimalFormat = tvp.validationRule.getDecimalFormat(); nSampleMeasurements = sampleMeasurements.size(); double sum = 0; @SuppressWarnings("unused") double ssq = 0; Bag<String> svalues = new HashBag<String>(); List<Double> values = new ArrayList<>(nSampleMeasurements); for (KdxSample sm : sampleMeasurements) { String traitValue = sm.getTraitValue(); switch (TraitValue.classify(traitValue)) { case NA: ++nNA; break; case SET: Either<TraitValueType, Number> either = tvp.isTraitValueValid(traitValue); if (either.isRight()) { try { Number number = either.right(); if (number == null) { ++nMissing; } else { double d = number.doubleValue(); sum += d; ssq += d * d; values.add(d); if (stringFormat == null) { svalues.add(traitValue); } else { svalues.add(String.format(stringFormat, d)); } } } catch (NumberFormatException e) { ++nInvalid; } } else { ++nInvalid; } break; case MISSING: case UNSET: default: ++nMissing; break; } } nValidValues = values.size(); switch (nValidValues) { case 0: mean = null; mode = null; median = null; minValue = null; maxValue = null; variance = null; stddev = null; nOutliers = null; stderr = null; break; case 1: mean = values.get(0); mode = mean.toString(); median = mean; minValue = mean; maxValue = mean; variance = null; stddev = null; nOutliers = null; stderr = null; break; default: mean = sum / nValidValues; if (USE_TWO_PASS) { double s2 = 0; for (Double d : values) { s2 += (d - mean) * (d - mean); } variance = s2 / (nValidValues - 1); } else { variance = (ssq - (sum * sum) / nValidValues) / (nValidValues - 1); } stddev = Math.sqrt(variance); stderr = stddev / Math.sqrt(nValidValues); Collections.sort(values); minValue = values.get(0); maxValue = values.get(values.size() - 1); median = StatsUtil.computeDoubleMedian(values); List<String> modes = StatsUtil.computeMode(svalues, tvp); String numberFormat = tvp == null ? null : tvp.getStringNumberFormat(); StringBuilder sb = new StringBuilder(); String sep = ""; for (String s : modes) { sb.append(sep); if (tvp == null) { sb.append(s); } else { Either<TraitValueType, Number> either = tvp.isTraitValueValid(s); if (either.isRight()) { Number number = either.right(); if (Integer.class.isAssignableFrom(tvp.getNumberClass())) { sb.append(number.intValue()); } else if (numberFormat == null) { sb.append(s); } else { sb.append(String.format(numberFormat, number.doubleValue())); } } else { sb.append(s); } } sep = " , "; } mode = sb.toString(); double q1 = BoxAndWhiskerCalculator.calculateQ1(values); double q3 = BoxAndWhiskerCalculator.calculateQ3(values); quartile1 = q1; quartile3 = q3; if (nStdDevForOutlier == null) { double interQuartileRange = q3 - q1; double lowerOutlierThreshold = q1 - (interQuartileRange * 1.5); double upperOutlierThreshold = q3 + (interQuartileRange * 1.5); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } else { double lowerOutlierThreshold = mean - (nStdDevForOutlier * stddev); double upperOutlierThreshold = mean + (nStdDevForOutlier * stddev); collectOutliers(values, lowerOutlierThreshold, upperOutlierThreshold); } break; } }
From source file:de.uniba.wiai.kinf.pw.projects.lillytab.reasoner.ABoxRoleTest.java
@Test public void dualRoleInheritanceTest() throws EInconsistencyException { final IRBox<String, String, String, String> rbox = _abox.getTBox().getRBox(); rbox.getAssertedRBox().addRole("parent", RoleType.OBJECT_PROPERTY); rbox.getAssertedRBox().addRole("r0", RoleType.OBJECT_PROPERTY); rbox.getAssertedRBox().addRole("r1", RoleType.OBJECT_PROPERTY); rbox.getAssertedRBox().addSubRole("parent", "r0"); rbox.getAssertedRBox().addSubRole("parent", "r1"); final IABoxNode<String, String, String, String> aNode = _abox.getOrAddIndividualNode("a"); final IABoxNode<String, String, String, String> bNode = _abox.getOrAddIndividualNode("b"); aNode.getRABox().getAssertedSuccessors().put("r0", bNode.getNodeID()); aNode.getRABox().getAssertedSuccessors().put("r1", bNode.getNodeID()); final Bag<String> outRoles = new HashBag<>(); for (String role : aNode.getRABox().getOutgoingRoles()) { outRoles.add(role);//ww w . j av a 2 s. c o m } assertEquals(1, outRoles.getCount("parent")); }
From source file:com.diversityarrays.kdxplore.curate.TypedSampleMeasurement.java
/** * Return null if all the DeviceNames are unique else * the shortFormat if it is sufficient to uniquely identify the TypedSampleMeasurements * @param coll// w w w. j a va 2s .c o m * @param shortFormat * @param longFormat * @return */ static public DateFormat getDateFormatForUniqueIdent(Collection<TypedSampleMeasurement> coll, DateFormat shortFormat, DateFormat longFormat) { Map<String, Bag<String>> datesByDeviceName = new HashMap<>(); boolean foundDuplicateDeviceName = false; for (TypedSampleMeasurement tsm : coll) { if (DeviceType.KDSMART == tsm.deviceIdentifier.getDeviceType()) { String deviceName = tsm.deviceIdentifier.getDeviceName(); if (datesByDeviceName.containsKey(deviceName)) { foundDuplicateDeviceName = true; } else { datesByDeviceName.put(deviceName, new HashBag<>()); } } } if (!foundDuplicateDeviceName) { return null; } for (TypedSampleMeasurement tsm : coll) { if (DeviceType.KDSMART == tsm.deviceIdentifier.getDeviceType()) { String deviceName = tsm.deviceIdentifier.getDeviceName(); Bag<String> bag = datesByDeviceName.get(deviceName); if (tsm.sampleGroupDate != null) { String s = longFormat.format(tsm.sampleGroupDate); bag.add(s); } } } for (Bag<String> bag : datesByDeviceName.values()) { for (String key : bag.uniqueSet()) { if (bag.getCount(key) > 1) { return longFormat; } } } return shortFormat; }
From source file:de.dhke.projects.cutil.collections.aspect.AspectMultiMapValueCollectionTest.java
/** * Test of iterator method, of class AspectMultiMapValueCollection. *///from ww w .j a va 2s . c om @Test public void testIterator() { _aspectMap.put("_", "a"); Bag<String> values = new HashBag<>(); Iterator<String> iter = _values.iterator(); while (iter.hasNext()) values.add(iter.next()); assertEquals(2, values.getCount("a")); assertEquals(1, values.getCount("A")); assertEquals(1, values.getCount("b")); assertEquals(1, values.getCount("B")); assertEquals(1, values.getCount("c")); assertEquals(1, values.getCount("C")); assertEquals(0, values.getCount("d")); assertEquals(0, values.getCount("D")); assertEquals(_values.size(), values.size()); }
From source file:com.diversityarrays.kdxplore.trials.SampleGroupViewer.java
private String getBriefSummary(SampleGroup sampleGroup) throws IOException { Bag<Integer> plotIdsWithSpecimens = new HashBag<>(); Set<Integer> plotIdsWithScores = new HashSet<>(); int[] results = new int[3]; java.util.function.Predicate<KdxSample> visitor = new java.util.function.Predicate<KdxSample>() { @Override//from w w w. jav a2 s. c om public boolean test(KdxSample s) { plotIdsWithScores.add(s.getPlotId()); int snum = s.getSpecimenNumber(); if (snum <= 0) { ++results[0]; // Plot level sample count } else { ++results[1]; // Individual level sample count plotIdsWithSpecimens.add(s.getPlotId()); results[2] = Math.max(results[2], snum); // maximum specimen number } return true; } }; boolean scored = true; kdxdb.visitKdxSamplesForSampleGroup(sampleGroup, KdxploreDatabase.SampleLevel.BOTH, scored, visitor); int nPlotSamples = results[0]; int nSpecimenSamples = results[1]; int totalScoredSamples = nPlotSamples + nSpecimenSamples; int maxSpecimenNumber = results[2]; int nPlotsWithSpecimens = plotIdsWithSpecimens.size(); int nPlotsWithScores = plotIdsWithScores.size(); int maxCount = 0; for (Integer plotId : plotIdsWithSpecimens.uniqueSet()) { int count = plotIdsWithSpecimens.getCount(plotId); maxCount = Math.max(maxCount, count); } StringBuilder sb = new StringBuilder("<HTML>"); sb.append("<br><B>Scored Samples:</b> ").append(totalScoredSamples); sb.append("<br><B>Plot Level Samples:</b> ").append(nPlotSamples); sb.append("<br><B>Individual Samples:</b> ").append(nSpecimenSamples); sb.append("<br>"); sb.append("<br><B>Max Individual number:</b> ").append(maxSpecimenNumber); sb.append("<br><B># Plots with Individuals:</b> ").append(nPlotsWithSpecimens); sb.append("<br>"); sb.append("<br><B># Plots with scored samples:</b> ").append(nPlotsWithScores); sb.append("<br><B>Max # individual scores per plot:</b> ").append(maxCount); return sb.toString(); }
From source file:com.diversityarrays.kdxplore.importdata.bms.BmsExcelImportCallable.java
public void report(PrintStream ps) { ps.println("Collected " + plots.size() + " plots"); Bag<String> plotTypeCounts = new HashBag<>(); for (Plot plot : plots) { plotTypeCounts.add(plot.getPlotType()); }//ww w . j a va 2 s .c om ps.println("Found PlotTypes:"); for (String plotType : plotTypeCounts.uniqueSet()) { ps.println("\t" + plotType + ": " + plotTypeCounts.getCount(plotType)); } ps.println("Created " + traitByName.size() + " Traits"); for (Trait trait : traitByName.values()) { ps.println("\t" + trait + " [id=" + trait.getTraitId() + "]"); } ps.println("Created " + plotAttributeByName.size() + " Plot Attributes"); for (PlotAttribute pa : plotAttributeByName.values()) { ps.println("\t" + pa); } ps.println("Created " + nPlotAttributeValues + " Plot Attribute Values"); ps.println("Created " + nSamples + " Samples"); if (!lineNumbersByMessage.isEmpty()) { ps.println("Warnings:"); for (String msg : lineNumbersByMessage.keySet()) { Set<Integer> range = lineNumbersByMessage.get(msg); ps.println(range.size() + " times: " + msg); } } }