List of usage examples for org.apache.commons.math3.stat Frequency getCount
public long getCount(char v)
From source file:com.sop4j.SimpleStatistics.java
public static void main(String[] args) { final MersenneTwister rng = new MersenneTwister(); // used for RNG... READ THE DOCS!!! final int[] values = new int[NUM_VALUES]; final DescriptiveStatistics descriptiveStats = new DescriptiveStatistics(); // stores values final SummaryStatistics summaryStats = new SummaryStatistics(); // doesn't store values final Frequency frequency = new Frequency(); // add numbers into our stats for (int i = 0; i < NUM_VALUES; ++i) { values[i] = rng.nextInt(MAX_VALUE); descriptiveStats.addValue(values[i]); summaryStats.addValue(values[i]); frequency.addValue(values[i]);//from w w w.j a v a 2 s . c o m } // print out some standard stats System.out.println("MIN: " + summaryStats.getMin()); System.out.println("AVG: " + String.format("%.3f", summaryStats.getMean())); System.out.println("MAX: " + summaryStats.getMax()); // get some more complex stats only offered by DescriptiveStatistics System.out.println("90%: " + descriptiveStats.getPercentile(90)); System.out.println("MEDIAN: " + descriptiveStats.getPercentile(50)); System.out.println("SKEWNESS: " + String.format("%.4f", descriptiveStats.getSkewness())); System.out.println("KURTOSIS: " + String.format("%.4f", descriptiveStats.getKurtosis())); // quick and dirty stats (need a little help from Guava to convert from int[] to double[]) System.out.println("MIN: " + StatUtils.min(Doubles.toArray(Ints.asList(values)))); System.out.println("AVG: " + String.format("%.4f", StatUtils.mean(Doubles.toArray(Ints.asList(values))))); System.out.println("MAX: " + StatUtils.max(Doubles.toArray(Ints.asList(values)))); // some stats based upon frequencies System.out.println("NUM OF 7s: " + frequency.getCount(7)); System.out.println("CUMULATIVE FREQUENCY OF 7: " + frequency.getCumFreq(7)); System.out.println("PERCENTAGE OF 7s: " + frequency.getPct(7)); }
From source file:com.github.rinde.rinsim.scenario.generator.PoissonProcessTest.java
/** * Checks whether the observations conform to a Poisson process with the * specified intensity. Uses a chi square test with the specified confidence. * The null hypothesis is that the observations are the result of a poisson * process.//from ww w . j a v a2 s .com * @param observations * @param intensity * @param confidence * @return <code>true</code> if the observations */ static boolean isPoissonProcess(Frequency observations, double intensity, double length, double confidence) { final PoissonDistribution pd = new PoissonDistribution(length * intensity); final Iterator<?> it = observations.valuesIterator(); final long[] observed = new long[observations.getUniqueCount()]; final double[] expected = new double[observations.getUniqueCount()]; int index = 0; while (it.hasNext()) { final Long l = (Long) it.next(); observed[index] = observations.getCount(l); expected[index] = pd.probability(l.intValue()) * observations.getSumFreq(); if (expected[index] == 0) { return false; } index++; } final double chi = TestUtils.chiSquareTest(expected, observed); return !(chi < confidence); }
From source file:com.itemanalysis.psychometrics.statistics.FrequencyTest.java
/** * Test of toString method, of class Frequency. *//*from w w w. j a va2 s .co m*/ @Test public void testToString() { String[] data = { "a", "a", "a", "a", "a", "a", "b", "b", "b", "c", "c" }; long[] expected = { 6, 3, 2 }; Frequency f = new Frequency(); for (int i = 0; i < data.length; i++) { f.addValue(data[i]); } Iterator<Comparable<?>> iter = f.valuesIterator(); int index = 0; while (iter.hasNext()) { assertEquals("[row " + index + "]", expected[index], f.getCount(iter.next())); index++; } }
From source file:ch.bfh.lca._15h.server.statisticServices.StatisticHandler.java
/** * Calculates the frequencies of genders within an dataSource. The result is given in a GenericResultRow * @param name Name of Age (group)/*from ww w . ja v a 2s . co m*/ * @param dataSource Source with datas of DPCs * @return GenericResultRow (Contains in field 0 the description of the Age (group), * in field 1 the amount of male within that group * in field 2 the amount of females within the group. */ private GenericResultRow genderFrequency(String name, DataSource dataSource) { Frequency freqGender = new Frequency(); GenericResultRow grr = new DBResultRow(); for (DoctorPatientContact dpc : dataSource) { freqGender.addValue(dpc.getPatSex().getValue()); } String[] colName = { "AgeGroup", "Male", "Fenale" }; Object[] values = { name, freqGender.getCount(0), freqGender.getCount(1) }; grr.setValues(colName, values); return grr; }
From source file:com.itemanalysis.psychometrics.statistics.TwoWayTable.java
public long getCount(Comparable<?> rowValue, Comparable<?> colValue) { if (rowValue instanceof Integer && colValue instanceof Integer) { return getCount(((Integer) rowValue).longValue(), ((Integer) rowValue).longValue()); }/*from ww w.ja v a 2 s . co m*/ long result = 0; try { Frequency freq = tableRows.get(rowValue); if (freq != null) { result = freq.getCount(colValue); } } catch (ClassCastException ex) { //ignore and return 0 -- ClassCastException will be thrown if rho is not comparable } return result; }
From source file:com.itemanalysis.psychometrics.irt.estimation.ItemResponseFileSummary.java
private ItemResponseVector[] readTapData() { byte[][] tap = new byte[35][18]; try {//from w w w.j av a 2 s. c o m File f = FileUtils.toFile(this.getClass().getResource("/testdata/tap-data.txt")); BufferedReader br = new BufferedReader(new FileReader(f)); String line = ""; String[] s = null; int row = 0; while ((line = br.readLine()) != null) { s = line.split(","); for (int j = 0; j < s.length; j++) { tap[row][j] = Byte.parseByte(s[j]); } row++; } br.close(); } catch (IOException ex) { ex.printStackTrace(); } Frequency freq = new Frequency(); for (int i = 0; i < tap.length; i++) { freq.addValue(Arrays.toString(tap[i])); } ItemResponseVector[] responseData = new ItemResponseVector[freq.getUniqueCount()]; ItemResponseVector irv = null; Iterator<Comparable<?>> iter = freq.valuesIterator(); int index = 0; //create array of ItemResponseVector objects while (iter.hasNext()) { //get response string from frequency summary and convert to byte array Comparable<?> value = iter.next(); String s = value.toString(); s = s.substring(1, s.lastIndexOf("]")); String[] sa = s.split(","); byte[] rv = new byte[sa.length]; for (int i = 0; i < sa.length; i++) { rv[i] = Byte.parseByte(sa[i].trim()); } //create response vector objects irv = new ItemResponseVector(rv, Long.valueOf(freq.getCount(value)).doubleValue()); responseData[index] = irv; index++; } // //display results of summary // for(int i=0;i<responseData.length;i++){ // System.out.println(responseData[i].toString() + ": " + responseData[i].getFrequency()); // } return responseData; }
From source file:com.itemanalysis.jmetrik.stats.frequency.FrequencyAnalysis.java
public void publishTable(VariableAttributes v) { TextTableColumnFormat[] cformats = new TextTableColumnFormat[6]; cformats[0] = new TextTableColumnFormat(); cformats[0].setStringFormat(11, TextTableColumnFormat.OutputAlignment.LEFT); cformats[1] = new TextTableColumnFormat(); cformats[1].setIntFormat(10, TextTableColumnFormat.OutputAlignment.RIGHT); cformats[2] = new TextTableColumnFormat(); cformats[2].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT); cformats[3] = new TextTableColumnFormat(); cformats[3].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT); cformats[4] = new TextTableColumnFormat(); cformats[4].setIntFormat(10, TextTableColumnFormat.OutputAlignment.RIGHT); cformats[5] = new TextTableColumnFormat(); cformats[5].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT); Frequency temp = frequencyTables.get(v); TextTable table = new TextTable(); table.addAllColumnFormats(cformats, 4); table.getRowAt(0).addHeader(0, 6, v.getName().toString(), TextTablePosition.CENTER); table.getRowAt(1).addHorizontalRule(0, 6, "="); table.getRowAt(2).addHeader(0, 1, "Value", TextTablePosition.CENTER); table.getRowAt(2).addHeader(1, 1, "Frequency", TextTablePosition.CENTER); table.getRowAt(2).addHeader(2, 1, "Percent", TextTablePosition.CENTER); table.getRowAt(2).addHeader(3, 1, "Valid Pct.", TextTablePosition.CENTER); table.getRowAt(2).addHeader(4, 1, "Cum. Freq.", TextTablePosition.CENTER); table.getRowAt(2).addHeader(5, 1, "Cum. Pct.", TextTablePosition.CENTER); table.getRowAt(3).addHorizontalRule(0, 6, "-"); Iterator<Comparable<?>> iter = temp.valuesIterator(); int index = 4; double pctSum = 0.0; long validSum = temp.getSumFreq(); long miss = (long) (maxProgress - validSum); while (iter.hasNext()) { table.addRow(new TextTableRow(cformats.length), cformats); Comparable<?> value = iter.next(); table.addStringAt(index, 0, value.toString()); table.addLongAt(index, 1, temp.getCount(value)); table.addDoubleAt(index, 2, ((double) temp.getCount(value) / maxProgress) * 100); table.addDoubleAt(index, 3, temp.getPct(value) * 100); table.addLongAt(index, 4, temp.getCumFreq(value)); table.addDoubleAt(index, 5, temp.getCumPct(value) * 100); index++;// ww w . j av a 2 s .co m pctSum += (double) temp.getCount(value) / (double) maxProgress; } table.addRow(new TextTableRow(cformats.length), cformats); table.addStringAt(index, 0, "Valid Total"); table.addLongAt(index, 1, validSum); table.addDoubleAt(index, 2, pctSum * 100); table.addDoubleAt(index, 3, (double) validSum / (double) (maxProgress - miss) * 100); index++; table.addRow(new TextTableRow(cformats.length), cformats); table.addStringAt(index, 0, "Missing"); table.addLongAt(index, 1, miss); table.addDoubleAt(index, 2, ((double) miss / maxProgress) * 100); index++; table.addRow(new TextTableRow(cformats.length), cformats); table.addStringAt(index, 0, "Grand Total"); table.addLongAt(index, 1, (long) maxProgress); table.addDoubleAt(index, 2, ((double) (miss + temp.getSumFreq()) / maxProgress) * 100); index++; table.addRow(new TextTableRow(cformats.length), cformats); table.getRowAt(index).addHorizontalRule(0, 6, "="); publish(table.toString() + "\n"); }
From source file:com.itemanalysis.psychometrics.irt.estimation.ItemResponseFileSummary.java
/** * Summarize comma delimited file. It will extract the data beginning in the column indicated by start * and it will continue for nItems columns. * * @param f file to summarize//w ww. j ava 2s . c o m * @param start the column index of the first item. It is zero based. If teh data start in the first column, then start=0. * @param nItems number of items to read from the file. It will begin at the column indicated by start. * @param headerIncluded true if header is included. False otherwise. The header will be omitted. * @return an array of item resposne vectors */ public ItemResponseVector[] getCondensedResponseVectors(File f, int start, int nItems, boolean headerIncluded) { Frequency freq = new Frequency(); String responseString = ""; try { BufferedReader br = new BufferedReader(new FileReader(f)); String line = ""; String[] s = null; if (headerIncluded) br.readLine();//skip header while ((line = br.readLine()) != null) { s = line.split(","); line = ""; for (int j = 0; j < nItems; j++) { line += s[j + start]; } freq.addValue(line); } br.close(); } catch (IOException ex) { ex.printStackTrace(); } ItemResponseVector[] responseData = new ItemResponseVector[freq.getUniqueCount()]; ItemResponseVector irv = null; Iterator<Comparable<?>> iter = freq.valuesIterator(); int index = 0; byte[] rv = null; //create array of ItemResponseVector objects while (iter.hasNext()) { Comparable<?> value = iter.next(); responseString = value.toString(); int n = responseString.length(); rv = new byte[n]; String response = ""; for (int i = 0; i < n; i++) { response = String.valueOf(responseString.charAt(i)).toString(); rv[i] = Byte.parseByte(response); } //create response vector objects irv = new ItemResponseVector(rv, Long.valueOf(freq.getCount(value)).doubleValue()); responseData[index] = irv; index++; } return responseData; }
From source file:com.ggvaidya.scinames.summary.NameStabilityView.java
public void init() { Project project = projectView.getProject(); // What do we actually need to do? boolean flag_calculateNameSimilarity = (toCalculate & NAME_SIMILARITY) == NAME_SIMILARITY; boolean flag_calculateClusterSimilarity = (toCalculate & CLUSTER_SIMILARITY) == CLUSTER_SIMILARITY; boolean flag_calculateCircumscriptionSimilarity = (toCalculate & CIRCUMSCRIPTIONAL_SIMILARITY) == CIRCUMSCRIPTIONAL_SIMILARITY; // Setup stage. stage.setTitle("Name stability between " + project.getDatasets().size() + " datasets"); // Setup table. controller.getTableEditableProperty().set(false); //controller.setTableColumnResizeProperty(TableView.CONSTRAINED_RESIZE_POLICY); ObservableList<TableColumn> cols = controller.getTableColumnsProperty(); cols.clear();//from w ww . j a v a 2 s.c om // Precalculating. Table<Dataset, String, String> precalc = HashBasedTable.create(); // Set up columns. cols.add(createTableColumnFromPrecalc(precalc, "dataset")); cols.add(createTableColumnFromPrecalc(precalc, "date")); cols.add(createTableColumnFromPrecalc(precalc, "year")); cols.add(createTableColumnFromPrecalc(precalc, "count_binomial")); cols.add(createTableColumnFromPrecalc(precalc, "count_genera")); cols.add(createTableColumnFromPrecalc(precalc, "count_monotypic_genera")); cols.add(createTableColumnFromPrecalc(precalc, "names_added")); //cols.add(createTableColumnFromPrecalc(precalc, "names_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "names_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "names_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_added")); //cols.add(createTableColumnFromPrecalc(precalc, "species_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "species_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "mean_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "median_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "mode_binomials_per_genera_list")); /* All them stability calculations */ if (flag_calculateNameSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_last")); } if (flag_calculateClusterSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_last")); } if (flag_calculateCircumscriptionSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_last")); } Set<String> recognitionColumns = new HashSet<>(); // Calculate binomials per dataset. Map<Name, Set<Dataset>> datasetsPerName = new HashMap<>(); // Prepare to loop! List<Dataset> checklists = project.getChecklists(); // BIRD HACK! Include all datasets! // checklists = project.getDatasets(); // Set table items. We're only interested in checklists, because // there's no such thing as "name stability" between non-checklist datasets. controller.getTableItemsProperty().set(FXCollections.observableArrayList(checklists)); List<Dataset> prevChecklists = new LinkedList<>(); Dataset firstChecklist = checklists.get(0); Dataset lastChecklist = checklists.get(checklists.size() - 1); // TODO: This used to be prevDataset, but prevChecklist makes a lot more sense, since we // want to compare checklists with each other, ignoring datasets. Would be nice if someone // with copious free time could look over the calculations and make sure they don't assume // that the previous checklist is also the previous dataset? Dataset prevChecklist = null; int index = -1; for (Dataset ds : checklists) { index++; Dataset nextChecklist = (index < (checklists.size() - 1) ? checklists.get(index + 1) : null); precalc.put(ds, "dataset", ds.getName()); precalc.put(ds, "date", ds.getDate().asYYYYmmDD("-")); precalc.put(ds, "year", ds.getDate().getYearAsString()); Set<Name> recognizedBinomials = project.getRecognizedNames(ds).stream().flatMap(n -> n.asBinomial()) .collect(Collectors.toSet()); precalc.put(ds, "count_binomial", String.valueOf(recognizedBinomials.size())); Set<Name> recognizedGenera = recognizedBinomials.stream().flatMap(n -> n.asGenus()) .collect(Collectors.toSet()); precalc.put(ds, "count_genera", String.valueOf(recognizedGenera.size())); precalc.put(ds, "mean_binomials_per_genera", new BigDecimal(((double) recognizedBinomials.size()) / recognizedGenera.size()) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); Map<Name, List<Name>> countBinomialsPerGenus = recognizedBinomials.stream() // Eliminate names that have zero (or more than one?!) genus name. .filter(n -> (n.asGenus().count() == 1)) .collect(Collectors.groupingBy(n -> n.asGenus().findAny().get())); /* LOGGER.info("Debugging: list of " + recognizedGenera.size() + " genera: " + recognizedGenera.stream().map(n -> n.getFullName()).collect(Collectors.joining(", ")) ); */ precalc.put(ds, "count_monotypic_genera", String.valueOf(countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1).count())); /* LOGGER.info("Debugging: list of monotypic genera: " + countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1) .map(entry -> entry.getKey().getFullName()) .collect(Collectors.joining(", ")) ); */ // Species added and deleted Set<Name> namesAdded = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.ADDITION)) .flatMap(ch -> ch.getToStream()).collect(Collectors.toSet()); Set<Name> namesDeleted = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.DELETION)) .flatMap(ch -> ch.getFromStream()).collect(Collectors.toSet()); // TODO: This isn't so useful -- the more useful measure would be the number of all species added // and all species deleted, making sure there isn't a cluster-al overlap. precalc.put(ds, "names_added", String.valueOf(namesAdded.size())); //precalc.put(ds, "names_added_list", namesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "names_deleted", String.valueOf(namesDeleted.size())); //precalc.put(ds, "names_deleted_list", namesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Eliminate names that have been added, but were previously recognized at the species level. Set<Name> speciesAdded = namesAdded; if (prevChecklist != null) { Set<Name> prevRecognizedNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(prevChecklist)).stream() .flatMap(nc -> nc.getNames().stream()).collect(Collectors.toSet()); speciesAdded = namesAdded.stream().filter(n -> !prevRecognizedNames.contains(n)) .collect(Collectors.toSet()); } // Eliminate names that are still represented in the checklist by a species cluster. // (Note that this includes cases where a subspecies is removed, but another subspecies // or the nominal species is still recognized!) Set<Name> currentlyRecognizedBinomialNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(ds)).stream().flatMap(nc -> nc.getNames().stream()) .flatMap(n -> n.asBinomial()).collect(Collectors.toSet()); Set<Name> speciesDeleted = namesDeleted.stream() .filter(n -> !n.asBinomial().anyMatch(bn -> currentlyRecognizedBinomialNames.contains(bn))) .collect(Collectors.toSet()); precalc.put(ds, "species_added", String.valueOf(speciesAdded.size())); precalc.put(ds, "species_added_list", speciesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "species_deleted", String.valueOf(speciesDeleted.size())); precalc.put(ds, "species_deleted_list", speciesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Measures of species per genera java.util.Map<String, Set<Name>> binomialsPerGenera = recognizedBinomials.stream() .collect(Collectors.toMap(n -> n.getGenus(), n -> { Set<Name> set = new HashSet<Name>(); set.add(n); return set; }, (a, b) -> { a.addAll(b); return a; })); List<Integer> binomialsPerGeneraCounts = binomialsPerGenera.values().stream().map(set -> set.size()) .sorted().collect(Collectors.toList()); Frequency freq = new Frequency(); for (String genus : binomialsPerGenera.keySet()) { // Blech. for (Name binom : binomialsPerGenera.get(genus)) { freq.addValue(genus); } } List<Comparable<?>> modeGenera = freq.getMode(); precalc.put(ds, "mode_binomials_per_genera_list", modeGenera.stream().map(o -> o.toString() + ": " + freq.getCount(o) + " binomials") .collect(Collectors.joining("; "))); double[] binomialsPerGeneraCountsAsDouble = binomialsPerGeneraCounts.stream() .mapToDouble(Integer::doubleValue).toArray(); Median median = new Median(); precalc.put(ds, "median_binomials_per_genera", String.valueOf(median.evaluate(binomialsPerGeneraCountsAsDouble))); if (firstChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_first", String.valueOf(getBinomialNamesIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "names_identical_to_first_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_first_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_first_pc_first", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, firstChecklist, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_first", String.valueOf(getBinomialClustersIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "clusters_identical_to_first_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_first", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_first", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_first_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_first", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (lastChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_last", String.valueOf(getBinomialNamesIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "names_identical_to_last_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_last_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_last_pc_last", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_last", String.valueOf(getBinomialClustersIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "clusters_identical_to_last_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_last", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_last", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_last_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_last", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, lastChecklist, lastChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (prevChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_prev", String.valueOf(getBinomialNamesIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "names_identical_to_prev_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_prev", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_prev", String.valueOf(getBinomialClustersIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "clusters_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_prev", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, prevChecklist, prevChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } // FYI, getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() should always be equal to the number of species in the dataset. } if (nextChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_next", String.valueOf(getBinomialNamesIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "names_identical_to_next_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_next_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_next_pc_next", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_next", String.valueOf(getBinomialClustersIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "clusters_identical_to_next_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_next", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_next", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_next_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_next", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, nextChecklist, nextChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } /* // For the visualization thingie. int total = prevChecklists.size(); List<Integer> counts = new LinkedList<>(); for(Name name: recognizedBinomials) { int prevRecognized = 0; if(!datasetsPerName.containsKey(name)) { datasetsPerName.put(name, new HashSet<>()); } else { prevRecognized = datasetsPerName.get(name).size(); } datasetsPerName.get(name).add(ds); counts.add( (int)( ((double)prevRecognized)/total*100 ) ); } Map<Integer, List<Integer>> countsByPercentage = counts.stream().sorted().collect(Collectors.groupingBy(n -> (int)(n/10)*10)); for(int percentage: countsByPercentage.keySet()) { precalc.put(ds, "previously_recognized_" + percentage + "pc", String.valueOf(countsByPercentage.get(percentage).size())); recognitionColumns.add("previously_recognized_" + percentage + "pc"); } prevChecklists.add(ds); */ // Set up the previous checklist for the next loop. prevChecklist = ds; } /* LinkedList<String> recognitionColumnsList = new LinkedList<>(recognitionColumns); recognitionColumnsList.sort(null); for(String colName: recognitionColumnsList) { cols.add(createTableColumnFromPrecalc(precalc, colName)); }*/ }
From source file:org.apache.solr.client.solrj.io.eval.FrequencyTableEvaluator.java
@Override public Object doWork(Object... values) throws IOException { if (Arrays.stream(values).anyMatch(item -> null == item)) { return null; }//www . j a v a2 s. c om List<?> sourceValues; if (values.length == 1) { sourceValues = values[0] instanceof List<?> ? (List<?>) values[0] : Arrays.asList(values[0]); } else { throw new IOException( String.format(Locale.ROOT, "Invalid expression %s - expecting at least one value but found %d", toExpression(constructingFactory), containedEvaluators.size())); } Frequency frequency = new Frequency(); for (Object o : sourceValues) { Number number = (Number) o; frequency.addValue(number.longValue()); } List<Tuple> histogramBins = new ArrayList<>(); Iterator iterator = frequency.valuesIterator(); while (iterator.hasNext()) { Long value = (Long) iterator.next(); Map<String, Number> map = new HashMap<>(); map.put("value", value.longValue()); map.put("count", frequency.getCount(value)); map.put("cumFreq", frequency.getCumFreq(value)); map.put("cumPct", frequency.getCumPct(value)); map.put("pct", frequency.getPct(value)); histogramBins.add(new Tuple(map)); } return histogramBins; }