Example usage for org.apache.commons.math3.stat Frequency getCount

List of usage examples for org.apache.commons.math3.stat Frequency getCount

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat Frequency getCount.

Prototype

public long getCount(char v) 

Source Link

Document

Returns the number of values = v.

Usage

From source file:com.sop4j.SimpleStatistics.java

public static void main(String[] args) {
    final MersenneTwister rng = new MersenneTwister(); // used for RNG... READ THE DOCS!!!
    final int[] values = new int[NUM_VALUES];

    final DescriptiveStatistics descriptiveStats = new DescriptiveStatistics(); // stores values
    final SummaryStatistics summaryStats = new SummaryStatistics(); // doesn't store values
    final Frequency frequency = new Frequency();

    // add numbers into our stats
    for (int i = 0; i < NUM_VALUES; ++i) {
        values[i] = rng.nextInt(MAX_VALUE);

        descriptiveStats.addValue(values[i]);
        summaryStats.addValue(values[i]);
        frequency.addValue(values[i]);//from   w w  w.j  a v  a  2  s  .  c  o m
    }

    // print out some standard stats
    System.out.println("MIN: " + summaryStats.getMin());
    System.out.println("AVG: " + String.format("%.3f", summaryStats.getMean()));
    System.out.println("MAX: " + summaryStats.getMax());

    // get some more complex stats only offered by DescriptiveStatistics
    System.out.println("90%: " + descriptiveStats.getPercentile(90));
    System.out.println("MEDIAN: " + descriptiveStats.getPercentile(50));
    System.out.println("SKEWNESS: " + String.format("%.4f", descriptiveStats.getSkewness()));
    System.out.println("KURTOSIS: " + String.format("%.4f", descriptiveStats.getKurtosis()));

    // quick and dirty stats (need a little help from Guava to convert from int[] to double[])
    System.out.println("MIN: " + StatUtils.min(Doubles.toArray(Ints.asList(values))));
    System.out.println("AVG: " + String.format("%.4f", StatUtils.mean(Doubles.toArray(Ints.asList(values)))));
    System.out.println("MAX: " + StatUtils.max(Doubles.toArray(Ints.asList(values))));

    // some stats based upon frequencies
    System.out.println("NUM OF 7s: " + frequency.getCount(7));
    System.out.println("CUMULATIVE FREQUENCY OF 7: " + frequency.getCumFreq(7));
    System.out.println("PERCENTAGE OF 7s: " + frequency.getPct(7));
}

From source file:com.github.rinde.rinsim.scenario.generator.PoissonProcessTest.java

/**
 * Checks whether the observations conform to a Poisson process with the
 * specified intensity. Uses a chi square test with the specified confidence.
 * The null hypothesis is that the observations are the result of a poisson
 * process.//from ww w . j  a v  a2  s  .com
 * @param observations
 * @param intensity
 * @param confidence
 * @return <code>true</code> if the observations
 */
static boolean isPoissonProcess(Frequency observations, double intensity, double length, double confidence) {
    final PoissonDistribution pd = new PoissonDistribution(length * intensity);

    final Iterator<?> it = observations.valuesIterator();
    final long[] observed = new long[observations.getUniqueCount()];
    final double[] expected = new double[observations.getUniqueCount()];

    int index = 0;
    while (it.hasNext()) {
        final Long l = (Long) it.next();
        observed[index] = observations.getCount(l);
        expected[index] = pd.probability(l.intValue()) * observations.getSumFreq();
        if (expected[index] == 0) {
            return false;
        }
        index++;
    }
    final double chi = TestUtils.chiSquareTest(expected, observed);
    return !(chi < confidence);
}

From source file:com.itemanalysis.psychometrics.statistics.FrequencyTest.java

/**
 * Test of toString method, of class Frequency.
 *//*from w  w w. j a va2  s  .co  m*/
@Test
public void testToString() {
    String[] data = { "a", "a", "a", "a", "a", "a", "b", "b", "b", "c", "c" };
    long[] expected = { 6, 3, 2 };

    Frequency f = new Frequency();
    for (int i = 0; i < data.length; i++) {
        f.addValue(data[i]);
    }

    Iterator<Comparable<?>> iter = f.valuesIterator();
    int index = 0;
    while (iter.hasNext()) {
        assertEquals("[row " + index + "]", expected[index], f.getCount(iter.next()));
        index++;
    }
}

From source file:ch.bfh.lca._15h.server.statisticServices.StatisticHandler.java

/**
 * Calculates the frequencies of genders within an dataSource. The result is given in a GenericResultRow
 * @param name Name of Age (group)/*from ww w .  ja v  a 2s  . co m*/
 * @param dataSource Source with datas of DPCs
 * @return GenericResultRow (Contains in field 0 the description of the Age (group),
 *   in field 1 the amount of male within that group
 *   in field 2 the amount of females within the group.
 */
private GenericResultRow genderFrequency(String name, DataSource dataSource) {
    Frequency freqGender = new Frequency();
    GenericResultRow grr = new DBResultRow();

    for (DoctorPatientContact dpc : dataSource) {
        freqGender.addValue(dpc.getPatSex().getValue());
    }

    String[] colName = { "AgeGroup", "Male", "Fenale" };
    Object[] values = { name, freqGender.getCount(0), freqGender.getCount(1) };

    grr.setValues(colName, values);
    return grr;
}

From source file:com.itemanalysis.psychometrics.statistics.TwoWayTable.java

public long getCount(Comparable<?> rowValue, Comparable<?> colValue) {
    if (rowValue instanceof Integer && colValue instanceof Integer) {
        return getCount(((Integer) rowValue).longValue(), ((Integer) rowValue).longValue());
    }/*from ww  w.ja v a  2  s  .  co m*/
    long result = 0;
    try {
        Frequency freq = tableRows.get(rowValue);
        if (freq != null) {
            result = freq.getCount(colValue);
        }
    } catch (ClassCastException ex) {
        //ignore and return 0 -- ClassCastException will be thrown if rho is not comparable
    }
    return result;
}

From source file:com.itemanalysis.psychometrics.irt.estimation.ItemResponseFileSummary.java

private ItemResponseVector[] readTapData() {
    byte[][] tap = new byte[35][18];
    try {//from  w w  w.j av  a  2  s.  c o m
        File f = FileUtils.toFile(this.getClass().getResource("/testdata/tap-data.txt"));
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = "";
        String[] s = null;
        int row = 0;
        while ((line = br.readLine()) != null) {
            s = line.split(",");
            for (int j = 0; j < s.length; j++) {
                tap[row][j] = Byte.parseByte(s[j]);
            }
            row++;
        }
        br.close();

    } catch (IOException ex) {
        ex.printStackTrace();
    }

    Frequency freq = new Frequency();
    for (int i = 0; i < tap.length; i++) {
        freq.addValue(Arrays.toString(tap[i]));
    }

    ItemResponseVector[] responseData = new ItemResponseVector[freq.getUniqueCount()];
    ItemResponseVector irv = null;
    Iterator<Comparable<?>> iter = freq.valuesIterator();
    int index = 0;

    //create array of ItemResponseVector objects
    while (iter.hasNext()) {
        //get response string from frequency summary and convert to byte array
        Comparable<?> value = iter.next();
        String s = value.toString();
        s = s.substring(1, s.lastIndexOf("]"));
        String[] sa = s.split(",");
        byte[] rv = new byte[sa.length];
        for (int i = 0; i < sa.length; i++) {
            rv[i] = Byte.parseByte(sa[i].trim());
        }

        //create response vector objects
        irv = new ItemResponseVector(rv, Long.valueOf(freq.getCount(value)).doubleValue());
        responseData[index] = irv;
        index++;
    }
    //        //display results of summary
    //        for(int i=0;i<responseData.length;i++){
    //            System.out.println(responseData[i].toString() + ": " + responseData[i].getFrequency());
    //        }

    return responseData;
}

From source file:com.itemanalysis.jmetrik.stats.frequency.FrequencyAnalysis.java

public void publishTable(VariableAttributes v) {
    TextTableColumnFormat[] cformats = new TextTableColumnFormat[6];
    cformats[0] = new TextTableColumnFormat();
    cformats[0].setStringFormat(11, TextTableColumnFormat.OutputAlignment.LEFT);
    cformats[1] = new TextTableColumnFormat();
    cformats[1].setIntFormat(10, TextTableColumnFormat.OutputAlignment.RIGHT);
    cformats[2] = new TextTableColumnFormat();
    cformats[2].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT);
    cformats[3] = new TextTableColumnFormat();
    cformats[3].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT);
    cformats[4] = new TextTableColumnFormat();
    cformats[4].setIntFormat(10, TextTableColumnFormat.OutputAlignment.RIGHT);
    cformats[5] = new TextTableColumnFormat();
    cformats[5].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT);

    Frequency temp = frequencyTables.get(v);

    TextTable table = new TextTable();
    table.addAllColumnFormats(cformats, 4);
    table.getRowAt(0).addHeader(0, 6, v.getName().toString(), TextTablePosition.CENTER);
    table.getRowAt(1).addHorizontalRule(0, 6, "=");
    table.getRowAt(2).addHeader(0, 1, "Value", TextTablePosition.CENTER);
    table.getRowAt(2).addHeader(1, 1, "Frequency", TextTablePosition.CENTER);
    table.getRowAt(2).addHeader(2, 1, "Percent", TextTablePosition.CENTER);
    table.getRowAt(2).addHeader(3, 1, "Valid Pct.", TextTablePosition.CENTER);
    table.getRowAt(2).addHeader(4, 1, "Cum. Freq.", TextTablePosition.CENTER);
    table.getRowAt(2).addHeader(5, 1, "Cum. Pct.", TextTablePosition.CENTER);
    table.getRowAt(3).addHorizontalRule(0, 6, "-");

    Iterator<Comparable<?>> iter = temp.valuesIterator();

    int index = 4;
    double pctSum = 0.0;
    long validSum = temp.getSumFreq();
    long miss = (long) (maxProgress - validSum);

    while (iter.hasNext()) {
        table.addRow(new TextTableRow(cformats.length), cformats);

        Comparable<?> value = iter.next();
        table.addStringAt(index, 0, value.toString());
        table.addLongAt(index, 1, temp.getCount(value));
        table.addDoubleAt(index, 2, ((double) temp.getCount(value) / maxProgress) * 100);
        table.addDoubleAt(index, 3, temp.getPct(value) * 100);
        table.addLongAt(index, 4, temp.getCumFreq(value));
        table.addDoubleAt(index, 5, temp.getCumPct(value) * 100);
        index++;// ww  w  . j av  a 2 s .co  m
        pctSum += (double) temp.getCount(value) / (double) maxProgress;
    }

    table.addRow(new TextTableRow(cformats.length), cformats);
    table.addStringAt(index, 0, "Valid Total");
    table.addLongAt(index, 1, validSum);
    table.addDoubleAt(index, 2, pctSum * 100);
    table.addDoubleAt(index, 3, (double) validSum / (double) (maxProgress - miss) * 100);
    index++;

    table.addRow(new TextTableRow(cformats.length), cformats);
    table.addStringAt(index, 0, "Missing");
    table.addLongAt(index, 1, miss);
    table.addDoubleAt(index, 2, ((double) miss / maxProgress) * 100);
    index++;

    table.addRow(new TextTableRow(cformats.length), cformats);
    table.addStringAt(index, 0, "Grand Total");
    table.addLongAt(index, 1, (long) maxProgress);
    table.addDoubleAt(index, 2, ((double) (miss + temp.getSumFreq()) / maxProgress) * 100);
    index++;

    table.addRow(new TextTableRow(cformats.length), cformats);
    table.getRowAt(index).addHorizontalRule(0, 6, "=");
    publish(table.toString() + "\n");
}

From source file:com.itemanalysis.psychometrics.irt.estimation.ItemResponseFileSummary.java

/**
 * Summarize comma delimited file. It will extract the data beginning in the column indicated by start
 * and it will continue for nItems columns.
 *
 * @param f file to summarize//w ww.  j ava 2s .  c o m
 * @param start the column index of the first item. It is zero based. If teh data start in the first column, then start=0.
 * @param nItems number of items to read from the file. It will begin at the column indicated by start.
 * @param headerIncluded true if header is included. False otherwise. The header will be omitted.
 * @return an array of item resposne vectors
 */
public ItemResponseVector[] getCondensedResponseVectors(File f, int start, int nItems, boolean headerIncluded) {
    Frequency freq = new Frequency();
    String responseString = "";

    try {
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = "";
        String[] s = null;
        if (headerIncluded)
            br.readLine();//skip header
        while ((line = br.readLine()) != null) {
            s = line.split(",");
            line = "";

            for (int j = 0; j < nItems; j++) {
                line += s[j + start];
            }
            freq.addValue(line);
        }
        br.close();

    } catch (IOException ex) {
        ex.printStackTrace();
    }

    ItemResponseVector[] responseData = new ItemResponseVector[freq.getUniqueCount()];
    ItemResponseVector irv = null;
    Iterator<Comparable<?>> iter = freq.valuesIterator();
    int index = 0;
    byte[] rv = null;

    //create array of ItemResponseVector objects
    while (iter.hasNext()) {
        Comparable<?> value = iter.next();
        responseString = value.toString();

        int n = responseString.length();
        rv = new byte[n];

        String response = "";
        for (int i = 0; i < n; i++) {
            response = String.valueOf(responseString.charAt(i)).toString();
            rv[i] = Byte.parseByte(response);
        }

        //create response vector objects
        irv = new ItemResponseVector(rv, Long.valueOf(freq.getCount(value)).doubleValue());
        responseData[index] = irv;
        index++;
    }
    return responseData;

}

From source file:com.ggvaidya.scinames.summary.NameStabilityView.java

public void init() {
    Project project = projectView.getProject();

    // What do we actually need to do?
    boolean flag_calculateNameSimilarity = (toCalculate & NAME_SIMILARITY) == NAME_SIMILARITY;
    boolean flag_calculateClusterSimilarity = (toCalculate & CLUSTER_SIMILARITY) == CLUSTER_SIMILARITY;
    boolean flag_calculateCircumscriptionSimilarity = (toCalculate
            & CIRCUMSCRIPTIONAL_SIMILARITY) == CIRCUMSCRIPTIONAL_SIMILARITY;

    // Setup stage.
    stage.setTitle("Name stability between " + project.getDatasets().size() + " datasets");

    // Setup table.
    controller.getTableEditableProperty().set(false);
    //controller.setTableColumnResizeProperty(TableView.CONSTRAINED_RESIZE_POLICY);
    ObservableList<TableColumn> cols = controller.getTableColumnsProperty();
    cols.clear();//from   w ww  .  j  a  v a  2  s.c om

    // Precalculating.
    Table<Dataset, String, String> precalc = HashBasedTable.create();

    // Set up columns.
    cols.add(createTableColumnFromPrecalc(precalc, "dataset"));
    cols.add(createTableColumnFromPrecalc(precalc, "date"));
    cols.add(createTableColumnFromPrecalc(precalc, "year"));
    cols.add(createTableColumnFromPrecalc(precalc, "count_binomial"));
    cols.add(createTableColumnFromPrecalc(precalc, "count_genera"));
    cols.add(createTableColumnFromPrecalc(precalc, "count_monotypic_genera"));
    cols.add(createTableColumnFromPrecalc(precalc, "names_added"));
    //cols.add(createTableColumnFromPrecalc(precalc, "names_added_list"));
    cols.add(createTableColumnFromPrecalc(precalc, "names_deleted"));
    //cols.add(createTableColumnFromPrecalc(precalc, "names_deleted_list"));
    cols.add(createTableColumnFromPrecalc(precalc, "species_added"));
    //cols.add(createTableColumnFromPrecalc(precalc, "species_added_list"));
    cols.add(createTableColumnFromPrecalc(precalc, "species_deleted"));
    //cols.add(createTableColumnFromPrecalc(precalc, "species_deleted_list"));
    cols.add(createTableColumnFromPrecalc(precalc, "mean_binomials_per_genera"));
    cols.add(createTableColumnFromPrecalc(precalc, "median_binomials_per_genera"));
    cols.add(createTableColumnFromPrecalc(precalc, "mode_binomials_per_genera_list"));

    /* All them stability calculations */
    if (flag_calculateNameSimilarity) {
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_prev"));

        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_next"));

        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_first"));

        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_last"));
    }

    if (flag_calculateClusterSimilarity) {
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_prev"));

        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_next"));

        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_first"));

        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_last"));
    }

    if (flag_calculateCircumscriptionSimilarity) {
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_prev"));

        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_next"));

        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_first"));

        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_this"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_union"));
        cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_last"));
    }

    Set<String> recognitionColumns = new HashSet<>();

    // Calculate binomials per dataset.
    Map<Name, Set<Dataset>> datasetsPerName = new HashMap<>();

    // Prepare to loop!
    List<Dataset> checklists = project.getChecklists();

    // BIRD HACK! Include all datasets!
    // checklists = project.getDatasets();

    // Set table items. We're only interested in checklists, because
    // there's no such thing as "name stability" between non-checklist datasets.
    controller.getTableItemsProperty().set(FXCollections.observableArrayList(checklists));

    List<Dataset> prevChecklists = new LinkedList<>();
    Dataset firstChecklist = checklists.get(0);
    Dataset lastChecklist = checklists.get(checklists.size() - 1);

    // TODO: This used to be prevDataset, but prevChecklist makes a lot more sense, since we
    // want to compare checklists with each other, ignoring datasets. Would be nice if someone
    // with copious free time could look over the calculations and make sure they don't assume
    // that the previous checklist is also the previous dataset?
    Dataset prevChecklist = null;

    int index = -1;
    for (Dataset ds : checklists) {
        index++;

        Dataset nextChecklist = (index < (checklists.size() - 1) ? checklists.get(index + 1) : null);

        precalc.put(ds, "dataset", ds.getName());
        precalc.put(ds, "date", ds.getDate().asYYYYmmDD("-"));
        precalc.put(ds, "year", ds.getDate().getYearAsString());

        Set<Name> recognizedBinomials = project.getRecognizedNames(ds).stream().flatMap(n -> n.asBinomial())
                .collect(Collectors.toSet());
        precalc.put(ds, "count_binomial", String.valueOf(recognizedBinomials.size()));

        Set<Name> recognizedGenera = recognizedBinomials.stream().flatMap(n -> n.asGenus())
                .collect(Collectors.toSet());
        precalc.put(ds, "count_genera", String.valueOf(recognizedGenera.size()));
        precalc.put(ds, "mean_binomials_per_genera",
                new BigDecimal(((double) recognizedBinomials.size()) / recognizedGenera.size())
                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());

        Map<Name, List<Name>> countBinomialsPerGenus = recognizedBinomials.stream()
                // Eliminate names that have zero (or more than one?!) genus name.
                .filter(n -> (n.asGenus().count() == 1))
                .collect(Collectors.groupingBy(n -> n.asGenus().findAny().get()));

        /*
        LOGGER.info("Debugging: list of " + recognizedGenera.size() + " genera: " + 
           recognizedGenera.stream().map(n -> n.getFullName()).collect(Collectors.joining(", "))
        );
        */

        precalc.put(ds, "count_monotypic_genera", String.valueOf(countBinomialsPerGenus.entrySet().stream()
                .filter(entry -> new HashSet<>(entry.getValue()).size() == 1).count()));

        /*
        LOGGER.info("Debugging: list of monotypic genera: " + 
           countBinomialsPerGenus.entrySet().stream()
              .filter(entry -> new HashSet<>(entry.getValue()).size() == 1)
              .map(entry -> entry.getKey().getFullName())
              .collect(Collectors.joining(", "))
        );
        */

        // Species added and deleted
        Set<Name> namesAdded = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.ADDITION))
                .flatMap(ch -> ch.getToStream()).collect(Collectors.toSet());
        Set<Name> namesDeleted = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.DELETION))
                .flatMap(ch -> ch.getFromStream()).collect(Collectors.toSet());

        // TODO: This isn't so useful -- the more useful measure would be the number of all species added
        // and all species deleted, making sure there isn't a cluster-al overlap.
        precalc.put(ds, "names_added", String.valueOf(namesAdded.size()));
        //precalc.put(ds, "names_added_list", namesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", ")));
        precalc.put(ds, "names_deleted", String.valueOf(namesDeleted.size()));
        //precalc.put(ds, "names_deleted_list", namesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", ")));

        // Eliminate names that have been added, but were previously recognized at the species level.
        Set<Name> speciesAdded = namesAdded;
        if (prevChecklist != null) {
            Set<Name> prevRecognizedNames = project.getNameClusterManager()
                    .getClusters(project.getRecognizedNames(prevChecklist)).stream()
                    .flatMap(nc -> nc.getNames().stream()).collect(Collectors.toSet());
            speciesAdded = namesAdded.stream().filter(n -> !prevRecognizedNames.contains(n))
                    .collect(Collectors.toSet());
        }

        // Eliminate names that are still represented in the checklist by a species cluster.
        // (Note that this includes cases where a subspecies is removed, but another subspecies
        // or the nominal species is still recognized!)
        Set<Name> currentlyRecognizedBinomialNames = project.getNameClusterManager()
                .getClusters(project.getRecognizedNames(ds)).stream().flatMap(nc -> nc.getNames().stream())
                .flatMap(n -> n.asBinomial()).collect(Collectors.toSet());
        Set<Name> speciesDeleted = namesDeleted.stream()
                .filter(n -> !n.asBinomial().anyMatch(bn -> currentlyRecognizedBinomialNames.contains(bn)))
                .collect(Collectors.toSet());

        precalc.put(ds, "species_added", String.valueOf(speciesAdded.size()));
        precalc.put(ds, "species_added_list",
                speciesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", ")));
        precalc.put(ds, "species_deleted", String.valueOf(speciesDeleted.size()));
        precalc.put(ds, "species_deleted_list",
                speciesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", ")));

        // Measures of species per genera
        java.util.Map<String, Set<Name>> binomialsPerGenera = recognizedBinomials.stream()
                .collect(Collectors.toMap(n -> n.getGenus(), n -> {
                    Set<Name> set = new HashSet<Name>();
                    set.add(n);
                    return set;
                }, (a, b) -> {
                    a.addAll(b);
                    return a;
                }));

        List<Integer> binomialsPerGeneraCounts = binomialsPerGenera.values().stream().map(set -> set.size())
                .sorted().collect(Collectors.toList());

        Frequency freq = new Frequency();
        for (String genus : binomialsPerGenera.keySet()) {
            // Blech.
            for (Name binom : binomialsPerGenera.get(genus)) {
                freq.addValue(genus);
            }
        }
        List<Comparable<?>> modeGenera = freq.getMode();
        precalc.put(ds, "mode_binomials_per_genera_list",
                modeGenera.stream().map(o -> o.toString() + ": " + freq.getCount(o) + " binomials")
                        .collect(Collectors.joining("; ")));

        double[] binomialsPerGeneraCountsAsDouble = binomialsPerGeneraCounts.stream()
                .mapToDouble(Integer::doubleValue).toArray();
        Median median = new Median();
        precalc.put(ds, "median_binomials_per_genera",
                String.valueOf(median.evaluate(binomialsPerGeneraCountsAsDouble)));

        if (firstChecklist == null) {
            //            precalc.put(ds, "names_identical_to_first", "NA");
            //            precalc.put(ds, "names_identical_to_first_pc", "NA");
        } else {
            if (flag_calculateNameSimilarity) {
                precalc.put(ds, "names_identical_to_first",
                        String.valueOf(getBinomialNamesIntersection(project, ds, firstChecklist).size()));
                precalc.put(ds, "names_identical_to_first_pc_this",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size()
                                / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN)
                                        .toPlainString());
                precalc.put(ds, "names_identical_to_first_pc_union",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size()
                                / getBinomialNamesUnion(project, ds, firstChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "names_identical_to_first_pc_first",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size()
                                / getBinomialNamesUnion(project, firstChecklist, firstChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateClusterSimilarity) {
                int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials)
                        .size();
                if (clustersForDataset != recognizedBinomials.size()) {
                    throw new RuntimeException(
                            "We have " + clustersForDataset + " clusters for this dataset, but "
                                    + recognizedBinomials.size() + " recognized binomials. What?");
                }
                precalc.put(ds, "clusters_identical_to_first",
                        String.valueOf(getBinomialClustersIntersection(project, ds, firstChecklist).size()));
                precalc.put(ds, "clusters_identical_to_first_pc_this",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, firstChecklist).size()
                                        / getBinomialClustersUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_first_pc_union",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, firstChecklist).size()
                                        / getBinomialClustersUnion(project, ds, firstChecklist).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_first_pc_first", new BigDecimal(
                        (double) getBinomialClustersIntersection(project, ds, firstChecklist).size()
                                / getBinomialClustersUnion(project, firstChecklist, firstChecklist).size()
                                * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateCircumscriptionSimilarity) {
                precalc.put(ds, "circumscriptions_identical_to_first", String
                        .valueOf(getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size()));
                precalc.put(ds, "circumscriptions_identical_to_first_pc_this", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_first_pc_union", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, ds, firstChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_first_pc_first", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, firstChecklist, firstChecklist).size()
                                * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }
        }

        if (lastChecklist == null) {
            //            precalc.put(ds, "names_identical_to_first", "NA");
            //            precalc.put(ds, "names_identical_to_first_pc", "NA");
        } else {
            if (flag_calculateNameSimilarity) {
                precalc.put(ds, "names_identical_to_last",
                        String.valueOf(getBinomialNamesIntersection(project, ds, lastChecklist).size()));
                precalc.put(ds, "names_identical_to_last_pc_this",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size()
                                / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN)
                                        .toPlainString());
                precalc.put(ds, "names_identical_to_last_pc_union",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size()
                                / getBinomialNamesUnion(project, ds, lastChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "names_identical_to_last_pc_last",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size()
                                / getBinomialNamesUnion(project, lastChecklist, lastChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateClusterSimilarity) {
                int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials)
                        .size();
                if (clustersForDataset != recognizedBinomials.size()) {
                    throw new RuntimeException(
                            "We have " + clustersForDataset + " clusters for this dataset, but "
                                    + recognizedBinomials.size() + " recognized binomials. What?");
                }
                precalc.put(ds, "clusters_identical_to_last",
                        String.valueOf(getBinomialClustersIntersection(project, ds, lastChecklist).size()));
                precalc.put(ds, "clusters_identical_to_last_pc_this",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, lastChecklist).size()
                                        / getBinomialClustersUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_last_pc_union",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, lastChecklist).size()
                                        / getBinomialClustersUnion(project, ds, lastChecklist).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_last_pc_last", new BigDecimal(
                        (double) getBinomialClustersIntersection(project, ds, lastChecklist).size()
                                / getBinomialClustersUnion(project, lastChecklist, lastChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateCircumscriptionSimilarity) {
                precalc.put(ds, "circumscriptions_identical_to_last", String
                        .valueOf(getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size()));
                precalc.put(ds, "circumscriptions_identical_to_last_pc_this",
                        new BigDecimal(
                                (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size()
                                        / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_last_pc_union", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, ds, lastChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_last_pc_last", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, lastChecklist, lastChecklist).size()
                                * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }
        }

        if (prevChecklist == null) {
            //            precalc.put(ds, "names_identical_to_prev", "NA");
            //            precalc.put(ds, "names_identical_to_prev_pc", "NA");            
        } else {
            if (flag_calculateNameSimilarity) {
                precalc.put(ds, "names_identical_to_prev",
                        String.valueOf(getBinomialNamesIntersection(project, ds, prevChecklist).size()));
                precalc.put(ds, "names_identical_to_prev_pc_this",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size()
                                / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN)
                                        .toPlainString());
                precalc.put(ds, "names_identical_to_prev_pc_union",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size()
                                / getBinomialNamesUnion(project, ds, prevChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "names_identical_to_prev_pc_prev",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size()
                                / getBinomialNamesUnion(project, prevChecklist, prevChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateClusterSimilarity) {
                int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials)
                        .size();
                if (clustersForDataset != recognizedBinomials.size()) {
                    throw new RuntimeException(
                            "We have " + clustersForDataset + " clusters for this dataset, but "
                                    + recognizedBinomials.size() + " recognized binomials. What?");
                }
                precalc.put(ds, "clusters_identical_to_prev",
                        String.valueOf(getBinomialClustersIntersection(project, ds, prevChecklist).size()));
                precalc.put(ds, "clusters_identical_to_prev_pc_this",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, prevChecklist).size()
                                        / getBinomialClustersUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_prev_pc_union",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, prevChecklist).size()
                                        / getBinomialClustersUnion(project, ds, prevChecklist).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_prev_pc_prev", new BigDecimal(
                        (double) getBinomialClustersIntersection(project, ds, prevChecklist).size()
                                / getBinomialClustersUnion(project, prevChecklist, prevChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateCircumscriptionSimilarity) {
                precalc.put(ds, "circumscriptions_identical_to_prev", String
                        .valueOf(getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size()));
                precalc.put(ds, "circumscriptions_identical_to_prev_pc_this",
                        new BigDecimal(
                                (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size()
                                        / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_prev_pc_union", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_prev_pc_prev", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, prevChecklist, prevChecklist).size()
                                * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            // FYI, getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() should always be equal to the number of species in the dataset.
        }

        if (nextChecklist == null) {
            //         precalc.put(ds, "names_identical_to_prev", "NA");
            //         precalc.put(ds, "names_identical_to_prev_pc", "NA");            
        } else {
            if (flag_calculateNameSimilarity) {
                precalc.put(ds, "names_identical_to_next",
                        String.valueOf(getBinomialNamesIntersection(project, ds, nextChecklist).size()));
                precalc.put(ds, "names_identical_to_next_pc_this",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size()
                                / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN)
                                        .toPlainString());
                precalc.put(ds, "names_identical_to_next_pc_union",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size()
                                / getBinomialNamesUnion(project, ds, nextChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "names_identical_to_next_pc_next",
                        new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size()
                                / getBinomialNamesUnion(project, nextChecklist, nextChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateClusterSimilarity) {
                int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials)
                        .size();
                if (clustersForDataset != recognizedBinomials.size()) {
                    throw new RuntimeException(
                            "We have " + clustersForDataset + " clusters for this dataset, but "
                                    + recognizedBinomials.size() + " recognized binomials. What?");
                }
                precalc.put(ds, "clusters_identical_to_next",
                        String.valueOf(getBinomialClustersIntersection(project, ds, nextChecklist).size()));
                precalc.put(ds, "clusters_identical_to_next_pc_this",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, nextChecklist).size()
                                        / getBinomialClustersUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_next_pc_union",
                        new BigDecimal(
                                (double) getBinomialClustersIntersection(project, ds, nextChecklist).size()
                                        / getBinomialClustersUnion(project, ds, nextChecklist).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "clusters_identical_to_next_pc_next", new BigDecimal(
                        (double) getBinomialClustersIntersection(project, ds, nextChecklist).size()
                                / getBinomialClustersUnion(project, nextChecklist, nextChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }

            if (flag_calculateCircumscriptionSimilarity) {
                precalc.put(ds, "circumscriptions_identical_to_next", String
                        .valueOf(getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size()));
                precalc.put(ds, "circumscriptions_identical_to_next_pc_this",
                        new BigDecimal(
                                (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size()
                                        / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100)
                                                .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_next_pc_union", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, ds, nextChecklist).size() * 100)
                                        .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
                precalc.put(ds, "circumscriptions_identical_to_next_pc_next", new BigDecimal(
                        (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size()
                                / getBinomialTaxonConceptsUnion(project, nextChecklist, nextChecklist).size()
                                * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString());
            }
        }

        /*
        // For the visualization thingie.
        int total = prevChecklists.size();
        List<Integer> counts = new LinkedList<>();
        for(Name name: recognizedBinomials) {
           int prevRecognized = 0;
                   
           if(!datasetsPerName.containsKey(name)) {
              datasetsPerName.put(name, new HashSet<>());
           } else {
              prevRecognized = datasetsPerName.get(name).size();
           }
                   
           datasetsPerName.get(name).add(ds);
           counts.add(
              (int)(
          ((double)prevRecognized)/total*100
              )
           );
        }
                
        Map<Integer, List<Integer>> countsByPercentage = counts.stream().sorted().collect(Collectors.groupingBy(n -> (int)(n/10)*10));
        for(int percentage: countsByPercentage.keySet()) {
           precalc.put(ds, "previously_recognized_" + percentage + "pc", String.valueOf(countsByPercentage.get(percentage).size()));   
           recognitionColumns.add("previously_recognized_" + percentage + "pc");
        }
        prevChecklists.add(ds);
        */

        // Set up the previous checklist for the next loop.
        prevChecklist = ds;
    }

    /*
    LinkedList<String> recognitionColumnsList = new LinkedList<>(recognitionColumns);
    recognitionColumnsList.sort(null);      
    for(String colName: recognitionColumnsList) {
       cols.add(createTableColumnFromPrecalc(precalc, colName));
    }*/
}

From source file:org.apache.solr.client.solrj.io.eval.FrequencyTableEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {
    if (Arrays.stream(values).anyMatch(item -> null == item)) {
        return null;
    }//www  . j a v  a2 s.  c  om

    List<?> sourceValues;

    if (values.length == 1) {
        sourceValues = values[0] instanceof List<?> ? (List<?>) values[0] : Arrays.asList(values[0]);
    } else {
        throw new IOException(
                String.format(Locale.ROOT, "Invalid expression %s - expecting at least one value but found %d",
                        toExpression(constructingFactory), containedEvaluators.size()));
    }

    Frequency frequency = new Frequency();

    for (Object o : sourceValues) {
        Number number = (Number) o;
        frequency.addValue(number.longValue());
    }

    List<Tuple> histogramBins = new ArrayList<>();

    Iterator iterator = frequency.valuesIterator();

    while (iterator.hasNext()) {
        Long value = (Long) iterator.next();
        Map<String, Number> map = new HashMap<>();
        map.put("value", value.longValue());
        map.put("count", frequency.getCount(value));
        map.put("cumFreq", frequency.getCumFreq(value));
        map.put("cumPct", frequency.getCumPct(value));
        map.put("pct", frequency.getPct(value));
        histogramBins.add(new Tuple(map));
    }
    return histogramBins;
}