Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue.

Prototype

public void addValue(double v) 

Source Link

Document

Adds the value to the dataset.

Usage

From source file:net.adamjak.thomas.graph.application.run.TestRunner.java

private void save(Map<String, Object> results, boolean rawData) {
    SnarkTestTypes testType = (SnarkTestTypes) results.get("testType");

    if (this.outputFile.getName().split("\\.")[this.outputFile.getName().split("\\.").length - 1].toLowerCase()
            .equals("ods")) {

        String[] columnNames;//from w  w w.jav a  2  s  .com
        Object[][] data;

        if (testType == SnarkTestTypes.ALL_ALGORITHMS) {
            GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

            columnNames = String.valueOf("Algorithm,Graph ID,Avarage time,Standard deviation,Minimum,Maximum")
                    .split(",");
            data = new Object[graphTestResult[0].length][6];

            for (int cls = 0; cls < graphTestResult[0][0].length; cls++) {
                Class<?> c = (Class<?>) graphTestResult[0][0][cls].getValue("algorithmClass");

                for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                    SummaryStatistics summaryStatistics = new SummaryStatistics();

                    for (int run = 0; run < graphTestResult.length; run++) {
                        summaryStatistics
                                .addValue((double) graphTestResult[run][graph][cls].getValue("timeInSeconds"));
                    }

                    data[graph][0] = c.getSimpleName();
                    data[graph][1] = graph;
                    data[graph][2] = summaryStatistics.getMean();
                    data[graph][3] = summaryStatistics.getStandardDeviation();
                    data[graph][4] = summaryStatistics.getMin();
                    data[graph][5] = summaryStatistics.getMax();
                }
            }
        } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) {
            GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

            columnNames = String
                    .valueOf("Graph ID,Start vertex,Avarage time,Standard deviation,Minimum,Maximum")
                    .split(",");
            data = new Object[graphTestResult[0].length][6];

            for (int vid = 0; vid < graphTestResult[0][0].length; vid++) {
                for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                    SummaryStatistics summaryStatistics = new SummaryStatistics();

                    for (int run = 0; run < graphTestResult.length; run++) {
                        summaryStatistics
                                .addValue((double) graphTestResult[run][graph][vid].getValue("timeInSeconds"));
                    }

                    data[graph][0] = graph;
                    data[graph][1] = vid;
                    data[graph][2] = summaryStatistics.getMean();
                    data[graph][3] = summaryStatistics.getStandardDeviation();
                    data[graph][4] = summaryStatistics.getMin();
                    data[graph][5] = summaryStatistics.getMax();
                }
            }
        } else {
            GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

            columnNames = String.valueOf("Graph ID,Avarage time,Standard deviation,Minimum,Maximum").split(",");
            data = new Object[graphTestResult[0].length][5];

            for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                SummaryStatistics summaryStatistics = new SummaryStatistics();

                for (int run = 0; run < graphTestResult.length; run++) {
                    summaryStatistics.addValue((double) graphTestResult[run][graph].getValue("timeInSeconds"));
                }

                data[graph][0] = graph;
                data[graph][1] = summaryStatistics.getMean();
                data[graph][2] = summaryStatistics.getStandardDeviation();
                data[graph][3] = summaryStatistics.getMin();
                data[graph][4] = summaryStatistics.getMax();
            }
        }

        try {
            SpreadSheet.createEmpty(new JTable(data, columnNames).getModel()).saveAs(outputFile);
        } catch (IOException e) {
            e.printStackTrace();
        }

        if (rawData == true) {
            if (testType == SnarkTestTypes.ALL_ALGORITHMS) {
                GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

                columnNames = String.valueOf("Class,Run,Graph,Time").split(",");
                data = new Object[graphTestResult.length * graphTestResult[0].length
                        * graphTestResult[0][0].length][4];

                int row = 0;
                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        for (int k = 0; k < graphTestResult[i][j].length; k++) {
                            data[row][0] = graphTestResult[i][j][k].getValue("algorithmClass");
                            data[row][1] = i;
                            data[row][2] = j;
                            data[row][3] = graphTestResult[i][j][k].getValue("time");
                            row++;
                        }
                    }
                }
            } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) {
                GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

                columnNames = String.valueOf("Run,Graph,Vertex,Time").split(",");
                data = new Object[graphTestResult.length * graphTestResult[0].length
                        * graphTestResult[0][0].length][4];

                int row = 0;
                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        for (int k = 0; k < graphTestResult[i][j].length; k++) {
                            data[row][0] = i;
                            data[row][1] = j;
                            data[row][2] = k;
                            data[row][3] = graphTestResult[i][j][k].getValue("time");
                            row++;
                        }
                    }
                }
            } else if (testType == SnarkTestTypes.ALGORITHM_COMPARATION) {
                GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

                columnNames = String.valueOf("Run,Graph,Time,Class").split(",");
                data = new Object[graphTestResult.length * graphTestResult[0].length][4];

                int row = 0;
                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        data[row][0] = i;
                        data[row][1] = j;
                        data[row][2] = graphTestResult[i][j].getValue("time");
                        data[row][3] = ((Class<?>) graphTestResult[i][j]
                                .getValue(GraphTestResult.SNARK_TESTER_CLASS_KEY)).getSimpleName();
                        row++;
                    }
                }
            } else {
                GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

                columnNames = String.valueOf("Run,Graph,Time").split(",");
                data = new Object[graphTestResult.length * graphTestResult[0].length][3];

                int row = 0;
                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        data[row][0] = i;
                        data[row][1] = j;
                        data[row][2] = graphTestResult[i][j].getValue("time");
                        row++;
                    }
                }
            }

            try {
                SpreadSheet.createEmpty(new JTable(data, columnNames).getModel()).saveAs(outputFile);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } else {
        StringBuilder sbData = new StringBuilder();

        if (testType == SnarkTestTypes.ALL_ALGORITHMS) {
            GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

            sbData.append(",,All data,,,,,Data without extremes,,,,,\n");
            sbData.append(
                    "Graph ID,Graph ID,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n");

            for (int cls = 0; cls < graphTestResult[0][0].length; cls++) {
                Class<?> c = (Class<?>) graphTestResult[0][0][cls].getValue("algorithmClass");

                for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                    DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();

                    for (int run = 0; run < graphTestResult.length; run++) {
                        descriptiveStatistics
                                .addValue((double) graphTestResult[run][graph][cls].getValue("timeInSeconds"));
                    }

                    DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils
                            .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005);

                    sbData.append(c.getSimpleName());
                    sbData.append(",");
                    sbData.append(graph);
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMean());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getStandardDeviation());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMin());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMax());
                    sbData.append(",");
                    sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics,
                            StatisticsUtils.NormCritical.U0050));
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMean());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMin());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMax());
                    sbData.append(",");
                    sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes,
                            StatisticsUtils.NormCritical.U0050));
                    sbData.append("\n");
                }
            }
        } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) {
            GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

            sbData.append(",,All data,,,,,Data without extremes,,,,,\n");
            sbData.append(
                    "Graph ID,Start vertex,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n");

            for (int vid = 0; vid < graphTestResult[0][0].length; vid++) {
                for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                    DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();

                    for (int run = 0; run < graphTestResult.length; run++) {
                        descriptiveStatistics
                                .addValue((double) graphTestResult[run][graph][vid].getValue("timeInSeconds"));
                    }

                    DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils
                            .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005);

                    sbData.append(graph);
                    sbData.append(",");
                    sbData.append(vid);
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMean());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getStandardDeviation());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMin());
                    sbData.append(",");
                    sbData.append(descriptiveStatistics.getMax());
                    sbData.append(",");
                    sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics,
                            StatisticsUtils.NormCritical.U0050));
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMean());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMin());
                    sbData.append(",");
                    sbData.append(descriptiveStatisticsWithoutExtremes.getMax());
                    sbData.append(",");
                    sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes,
                            StatisticsUtils.NormCritical.U0050));
                    sbData.append("\n");
                }
            }
        } else {

            GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

            sbData.append(",All data,,,,,Data without extremes,,,,,\n");
            sbData.append(
                    "Graph ID,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n");

            for (int graph = 0; graph < graphTestResult[0].length; graph++) {
                DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();

                for (int run = 0; run < graphTestResult.length; run++) {
                    descriptiveStatistics
                            .addValue((double) graphTestResult[run][graph].getValue("timeInSeconds"));
                }

                DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils
                        .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005);

                sbData.append(graph);
                sbData.append(",");
                sbData.append(descriptiveStatistics.getMean());
                sbData.append(",");
                sbData.append(descriptiveStatistics.getStandardDeviation());
                sbData.append(",");
                sbData.append(descriptiveStatistics.getMin());
                sbData.append(",");
                sbData.append(descriptiveStatistics.getMax());
                sbData.append(",");
                sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics,
                        StatisticsUtils.NormCritical.U0050));
                sbData.append(",");
                sbData.append(descriptiveStatisticsWithoutExtremes.getMean());
                sbData.append(",");
                sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation());
                sbData.append(",");
                sbData.append(descriptiveStatisticsWithoutExtremes.getMin());
                sbData.append(",");
                sbData.append(descriptiveStatisticsWithoutExtremes.getMax());
                sbData.append(",");
                sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes,
                        StatisticsUtils.NormCritical.U0050));
                sbData.append("\n");
            }

        }

        this.saveStringIntoFile(this.outputFile, sbData.toString());

        if (rawData == true) {
            StringBuilder sbRawData = new StringBuilder();

            if (testType == SnarkTestTypes.ALL_ALGORITHMS) {
                GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

                sbRawData.append("Class,Run,Graph,Time\n");

                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        for (int k = 0; k < graphTestResult[i][j].length; k++) {
                            sbRawData.append(graphTestResult[i][j][k].getValue("algorithmClass"));
                            sbRawData.append(",");
                            sbRawData.append(i);
                            sbRawData.append(",");
                            sbRawData.append(j);
                            sbRawData.append(",");
                            sbRawData.append(graphTestResult[i][j][k].getValue("time"));
                            sbRawData.append("\n");
                        }
                    }
                }
            } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) {
                GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData");

                sbRawData.append("Run,Graph,Vertex,Time\n");

                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        for (int k = 0; k < graphTestResult[i][j].length; k++) {
                            sbRawData.append(i);
                            sbRawData.append(",");
                            sbRawData.append(j);
                            sbRawData.append(",");
                            sbRawData.append(k);
                            sbRawData.append(",");
                            sbRawData.append(graphTestResult[i][j][k].getValue("time"));
                            sbRawData.append("\n");
                        }
                    }
                }
            } else if (testType == SnarkTestTypes.ALGORITHM_COMPARATION) {
                GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

                sbRawData.append("Run,Graph,Time,Class\n");

                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        sbRawData.append(i);
                        sbRawData.append(",");
                        sbRawData.append(j);
                        sbRawData.append(",");
                        sbRawData.append(graphTestResult[i][j].getValue("time"));
                        sbRawData.append(",");
                        sbRawData.append(((Class<?>) graphTestResult[i][j]
                                .getValue(GraphTestResult.SNARK_TESTER_CLASS_KEY)).getSimpleName());
                        sbRawData.append("\n");
                    }
                }
            } else {
                GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData");

                sbRawData.append("Run,Graph,Time\n");

                for (int i = 0; i < graphTestResult.length; i++) {
                    for (int j = 0; j < graphTestResult[i].length; j++) {
                        sbRawData.append(i);
                        sbRawData.append(",");
                        sbRawData.append(j);
                        sbRawData.append(",");
                        sbRawData.append(graphTestResult[i][j].getValue("time"));
                        sbRawData.append("\n");
                    }
                }
            }

            this.saveStringIntoFile(new File(this.outputFile.getParent(), "raw_" + this.outputFile.getName()),
                    sbRawData.toString());
        }
    }
}

From source file:gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) {
    long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions,
            RoundingMode.CEILING);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;/*w  w w.  j  a v a 2s.com*/
    Iterator<HistogramGroup> it = groups.iterator();
    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        // Move the candidate to a new bucket if the attempted total is 2x of interval
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    // If the last group is used as the last partition point
    if (count == 0) {
        // Exchange the last partition point with global high watermark
        partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark));
    } else {
        // Summarize last group
        statistics.addValue(count);
        // Add global high watermark as last point
        partitionPoints.add(Long.toString(expectedHighWatermark));
    }

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:knop.utils.stats.DataSet.java

/**
 * Gets the column statistics./*from  w  w w.  j  av  a2s.c  o  m*/
 *
 * @param column the column
 * @return the column statistics
 */
public DescriptiveStatistics getColumnStatistics(String column) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i != getColumnSize(column); i++) {

        Object value = getValue(column, i);
        double doubleValue;
        try {
            doubleValue = (Double) value;
            stats.addValue(doubleValue);
        } catch (Exception e) {
        }
    }
    return stats;
}

From source file:com.linuxbox.enkive.statistics.consolidation.EmbeddedConsolidator.java

@Override
protected void consolidateMaps(Map<String, Object> consolidatedData, List<Map<String, Object>> serviceData,
        ConsolidationKeyHandler keyDef, LinkedList<String> dataPath) {
    Map<String, Object> statConsolidatedData = new HashMap<String, Object>();
    if (keyDef.getMethods() != null) {
        // loop over stat consolidation methods
        Collection<String> methods = new LinkedList<String>(keyDef.getMethods());
        if (!keyDef.isPoint()) {
            methods.add(CONSOLIDATION_SUM);
        }/*from w  w w. j  av a2s.co  m*/
        for (String method : methods) {
            DescriptiveStatistics statsMaker = new DescriptiveStatistics();
            Object dataVal = null;
            dataVal = null;
            // loop over data for consolidation Method
            LinkedList<String> tempPath = new LinkedList<String>(dataPath);
            if (keyDef.isPoint()) {
                tempPath.add(method);
            } else {
                tempPath.add(CONSOLIDATION_SUM);
            }
            double input = -1;
            for (Map<String, Object> dataMap : serviceData) {
                // go to end of path & get variable
                input = -1;
                dataVal = getDataVal(dataMap, tempPath);
                if (dataVal != null) {
                    // extract relevant data from end of path
                    input = statToDouble(dataVal);
                    if (input > -1) {
                        // add to stat maker if relevant
                        statsMaker.addValue(input);
                    }
                }
            }
            // store in map if method is valid
            methodMapBuilder(method, statsMaker, statConsolidatedData);
        }

        // store stat methods' data on main consolidated map
        putOnPath(dataPath, consolidatedData, statConsolidatedData);
    }
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java

/**
 * Transitivity score is a collection of scores computed for each pair of nodes A and B with
 * multiple paths between them. A single score for such a pair is a ratio of the longest
 * path from A to B and the shortest path.
 * <p/>/*from w  w w.  ja v  a  2  s.com*/
 * It uses Bellman-Ford algorithm to compute the shortest and longest paths
 *
 * @param graph graph (must be DAG)
 * @return statistics
 */
private static DescriptiveStatistics computeTransitivityScores(Graph graph) {
    // find all out-degree > 1 nodes
    Set<Node> sourceNodes = new HashSet<>();
    Set<Node> targetNodes = new HashSet<>();
    for (Node n : graph) {
        if (n.getOutDegree() > 1) {
            sourceNodes.add(n);
        }

        if (n.getInDegree() > 1) {
            targetNodes.add(n);
        }
    }

    FileSourceDGS source = new FileSourceDGS();
    source.addSink(graph);

    DescriptiveStatistics result = new DescriptiveStatistics();

    for (Node sourceNode : sourceNodes) {
        // set positive weight first
        for (Edge e : graph.getEdgeSet()) {
            e.setAttribute(WEIGHT, 1.0);
        }

        BellmanFord bfShortest = new BellmanFord(WEIGHT, sourceNode.getId());
        bfShortest.init(graph);
        bfShortest.compute();

        // now negative weight for longest-path
        for (Edge e : graph.getEdgeSet()) {
            e.setAttribute(WEIGHT, -1.0);
        }

        BellmanFord bfLongest = new BellmanFord(WEIGHT, sourceNode.getId());
        bfLongest.init(graph);
        bfLongest.compute();

        for (Node targetNode : targetNodes) {
            Path shortestPath = bfShortest.getShortestPath(targetNode);
            Path longestPath = bfLongest.getShortestPath(targetNode);

            int shortestPathLength = shortestPath.getEdgeCount();
            int longestPathLength = longestPath.getEdgeCount();

            if (shortestPathLength == 1 && longestPathLength > 1) {
                // update statistics
                result.addValue((double) longestPathLength / (double) shortestPathLength);
            }
        }
    }

    return result;
}

From source file:com.itemanalysis.jmetrik.stats.descriptives.DescriptiveAnalysis.java

public void summarize() throws SQLException {
    Statement stmt = null;/*from   w  w  w. j a va 2s.  c o m*/
    ResultSet rs = null;

    DescriptiveStatistics temp = null;

    Table sqlTable = new Table(tableName.getNameForDatabase());
    SelectQuery select = new SelectQuery();
    for (VariableAttributes v : variables) {
        select.addColumn(sqlTable, v.getName().nameForDatabase());
    }
    stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY);
    rs = stmt.executeQuery(select.toString());

    double value = Double.NaN;
    while (rs.next()) {
        for (VariableAttributes v : variables) {
            temp = data.get(v);
            if (temp == null) {
                temp = new DescriptiveStatistics();
                data.put(v, temp);
            }

            //only increment for non null doubles
            value = rs.getDouble(v.getName().nameForDatabase());
            if (!rs.wasNull()) {
                temp.addValue(value);
            }
        }
        updateProgress();
    }

    rs.close();
    stmt.close();

    for (VariableAttributes v : data.keySet()) {
        publishTable(v);
    }

}

From source file:io.hops.experiments.controller.Master.java

private void startBlockReportingCommander() throws IOException, ClassNotFoundException {
    System.out.println("Starting BlockReporting Benchmark ...");
    prompt();/*from   w w  w  . j  a  va2  s  .c o m*/
    BlockReportingBenchmarkCommand.Request request = new BlockReportingBenchmarkCommand.Request(
            config.getBlockReportBenchMarkDuration(), config.getBlockReportingMinTimeBeforeNextReport(),
            config.getBlockReportingMaxTimeBeforeNextReport());

    sendToAllSlaves(request, 0/*delay*/);

    Collection<Object> responses = receiveFromAllSlaves(Integer.MAX_VALUE);
    DescriptiveStatistics successfulOps = new DescriptiveStatistics();
    DescriptiveStatistics failedOps = new DescriptiveStatistics();
    DescriptiveStatistics speed = new DescriptiveStatistics();
    DescriptiveStatistics avgTimePerReport = new DescriptiveStatistics();
    DescriptiveStatistics avgTimeTogetANewNameNode = new DescriptiveStatistics();
    DescriptiveStatistics noOfNNs = new DescriptiveStatistics();

    for (Object obj : responses) {
        if (!(obj instanceof BlockReportingBenchmarkCommand.Response)) {
            throw new IllegalStateException("Wrong response received from the client");
        } else {
            BlockReportingBenchmarkCommand.Response response = (BlockReportingBenchmarkCommand.Response) obj;
            successfulOps.addValue(response.getSuccessfulOps());
            failedOps.addValue(response.getFailedOps());
            speed.addValue(response.getSpeed());
            avgTimePerReport.addValue(response.getAvgTimePerReport());
            avgTimeTogetANewNameNode.addValue(response.getAvgTimeTogetNewNameNode());
            noOfNNs.addValue(response.getNnCount());
        }
    }

    BlockReportBMResults result = new BlockReportBMResults(config.getNamenodeCount(),
            (int) Math.floor(noOfNNs.getMean()), config.getNdbNodesCount(), speed.getSum(),
            successfulOps.getSum(), failedOps.getSum(), avgTimePerReport.getMean(),
            avgTimeTogetANewNameNode.getMean());

    printMasterResultMessages(result);
}

From source file:com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMADIntegrationTest.java

@Test
public void runAccuracyBenchmark() throws IOException {
    Map<String, List<String>> benchmarks = JSONUtil.INSTANCE.load(
            new FileInputStream(new File(new File(benchmarkRoot), "combined_labels.json")),
            new TypeReference<Map<String, List<String>>>() {
            });// ww  w  .  j a  va 2 s  .  co  m
    Assert.assertTrue(benchmarks.size() > 0);
    Map<ConfusionMatrix.ConfusionEntry, Long> overallConfusionMatrix = new HashMap<>();
    DescriptiveStatistics globalExpectedScores = new DescriptiveStatistics();
    long total = 0;
    for (Map.Entry<String, List<String>> kv : benchmarks.entrySet()) {
        File dataFile = new File(new File(benchmarkRoot), kv.getKey());
        File plotFile = new File(new File(benchmarkRoot), kv.getKey() + ".dat");
        Assert.assertTrue(dataFile.exists());
        Set<Long> expectedOutliers = Sets.newHashSet(Iterables.transform(kv.getValue(), STR_TO_TS));
        OutlierRunner runner = new OutlierRunner(outlierConfig, extractorConfigStr);
        final long[] numObservations = { 0L };
        final long[] lastTimestamp = { Long.MIN_VALUE };
        final DescriptiveStatistics timeDiffStats = new DescriptiveStatistics();
        final Map<Long, Outlier> outlierMap = new HashMap<>();
        final PrintWriter pw = new PrintWriter(plotFile);
        List<Outlier> outliers = runner.run(dataFile, 1, EnumSet.of(Severity.SEVERE_OUTLIER),
                new Function<Map.Entry<DataPoint, Outlier>, Void>() {
                    @Nullable
                    @Override
                    public Void apply(@Nullable Map.Entry<DataPoint, Outlier> kv) {
                        DataPoint dataPoint = kv.getKey();
                        Outlier outlier = kv.getValue();
                        pw.println(dataPoint.getTimestamp() + " " + outlier.getDataPoint().getValue() + " "
                                + ((outlier.getSeverity() == Severity.SEVERE_OUTLIER) ? "outlier" : "normal"));
                        outlierMap.put(dataPoint.getTimestamp(), outlier);
                        numObservations[0] += 1;
                        if (lastTimestamp[0] != Long.MIN_VALUE) {
                            timeDiffStats.addValue(dataPoint.getTimestamp() - lastTimestamp[0]);
                        }
                        lastTimestamp[0] = dataPoint.getTimestamp();
                        return null;
                    }
                });
        pw.close();
        total += numObservations[0];
        Set<Long> calculatedOutliers = Sets
                .newHashSet(Iterables.transform(outliers, OutlierRunner.OUTLIER_TO_TS));
        double stdDevDiff = Math.sqrt(timeDiffStats.getVariance());
        System.out.println("Running data from " + kv.getKey() + " - E[time delta]: "
                + ConfusionMatrix.timeConversion((long) timeDiffStats.getMean()) + ", StdDev[time delta]: "
                + ConfusionMatrix.timeConversion((long) stdDevDiff) + " mean: " + runner.getMean());
        Map<ConfusionMatrix.ConfusionEntry, Long> confusionMatrix = ConfusionMatrix.getConfusionMatrix(
                expectedOutliers, calculatedOutliers, numObservations[0], (long) timeDiffStats.getMean(), 3 //stdDevDiff > 30000?0:3
                , outlierMap, globalExpectedScores);

        ConfusionMatrix.printConfusionMatrix(confusionMatrix);
        overallConfusionMatrix = ConfusionMatrix.merge(overallConfusionMatrix, confusionMatrix);
    }
    System.out.println("Really ran " + total);
    ConfusionMatrix.printConfusionMatrix(overallConfusionMatrix);
    ConfusionMatrix.printStats("Global Expected Outlier Scores", globalExpectedScores);
}

From source file:com.screenslicer.core.scrape.type.ComparableNode.java

public ComparableNode(final Node node) {
    this.node = node;
    List<Node> separated = node.childNodes();
    int children = 0;
    int childBlocks = 0;
    int childFormatting = 0;
    int childContent = 0;
    int childItems = 0;
    int childDecoration = 0;
    int anchorChildren = 0;
    int textChildren = 0;
    int anchorTextChildren = 0;
    int anchorChildItems = 0;
    int textChildItems = 0;
    int anchorTextChildItems = 0;
    int itemChars = 0;
    int itemAnchorChars = 0;
    List<String> firstChildTags = null;
    List<List<String>> orderedTags = new ArrayList<List<String>>();
    List<String> allChildTags = new ArrayList<String>();
    ArrayList<List<String>> childTags = new ArrayList<List<String>>();
    boolean childrenConsistent = true;
    String childName = null;// ww w.  j  av a  2  s. co m
    boolean childrenSame = true;
    double avgChildLengthDouble = 0d;
    int nodeStrLen = Util.trimmedLen(node.toString());
    DescriptiveStatistics statAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statChars = new DescriptiveStatistics();
    DescriptiveStatistics statDescendants = new DescriptiveStatistics();
    DescriptiveStatistics statFields = new DescriptiveStatistics();
    DescriptiveStatistics statLevels = new DescriptiveStatistics();
    DescriptiveStatistics statLongestField = new DescriptiveStatistics();
    DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics();
    DescriptiveStatistics statTextAnchors = new DescriptiveStatistics();
    DescriptiveStatistics statStrLen = new DescriptiveStatistics();
    DescriptiveStatistics statItemChars = new DescriptiveStatistics();
    DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics();
    for (Node child : separated) {
        if (!Util.isEmpty(child)) {
            children++;
            int childStrLen = Util.trimmedLen(child.toString());
            avgChildLengthDouble += childStrLen;
            NodeCounter counter = new NodeCounter(child);
            if (Util.isItem(child.nodeName())) {
                ++childItems;
                anchorChildItems += counter.anchors() > 0 ? 1 : 0;
                textChildItems += counter.fields() > 0 ? 1 : 0;
                anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;
                itemChars += counter.chars();
                itemAnchorChars += counter.anchorChars();
                statItemChars.addValue(counter.chars());
                statItemAnchorChars.addValue(counter.anchorChars());
            }
            if (Util.isBlock(child.nodeName())) {
                ++childBlocks;
            }
            if (Util.isDecoration(child.nodeName())) {
                ++childDecoration;
            }
            if (Util.isFormatting(child.nodeName())) {
                ++childFormatting;
            }
            if (Util.isContent(child)) {
                ++childContent;
            }

            anchorChildren += counter.anchors() > 0 ? 1 : 0;
            textChildren += counter.fields() > 0 ? 1 : 0;
            anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0;

            statAnchorChars.addValue(counter.anchorChars());
            statAnchors.addValue(counter.anchors());
            statChars.addValue(counter.chars());
            statDescendants.addValue(counter.descendants());
            statFields.addValue(counter.fields());
            statLevels.addValue(counter.levels());
            statLongestField.addValue(counter.longestField());
            statNonAnchorChars.addValue(counter.nonAnchorChars());
            statTextAnchors.addValue(counter.textAnchors());
            statStrLen.addValue(childStrLen);

            List<String> curChildTags = counter.tags();
            allChildTags = Util.join(allChildTags, curChildTags);
            childTags.add(curChildTags);
            if (firstChildTags == null) {
                firstChildTags = curChildTags;
            } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) {
                childrenConsistent = false;
            }

            if (childName == null) {
                childName = child.nodeName();
            } else if (childrenSame && !childName.equals(child.nodeName())) {
                childrenSame = false;
            }

            if (!Util.contains(counter.orderedTags(), orderedTags)) {
                orderedTags.add(counter.orderedTags());
            }
        }
    }
    avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children;
    int avgChildLength = (int) avgChildLengthDouble;
    double avgChildDiff = 0;
    int maxChildDiff = 0;
    for (List<String> tagList : childTags) {
        avgChildDiff += allChildTags.size() - tagList.size();
        maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size());
    }
    avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size();

    childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent;

    NodeCounter counter = new NodeCounter(separated);
    int siblings = 0;
    for (Node sibling : node.parent().childNodes()) {
        if (!Util.isEmpty(sibling)) {
            siblings++;
        }
    }
    this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(),
            counter.content(), div(counter.items(), children), div(counter.blocks(), children),
            div(counter.decoration(), children), div(counter.formatting(), children),
            div(counter.content(), children),

            childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength,

            counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(),
            div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)),

            counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(),
            itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(),
            div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems),
            div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent),
            div(counter.nonAnchorChars(), counter.anchors()),
            div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars,
            div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()),
            div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children),

            counter.descendants(), counter.levels(), div(counter.descendants(), children),
            div(children, counter.levels()), siblings, children,

            maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent),
            orderedTags.size(),

            mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL),
            distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL),
            mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL),
            distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL),
            mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL),
            distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL),
            mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL),
            distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL),
            mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL),
            distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL),
            mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL),
            distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL),
            mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL),
            distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL),
            mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL),
            distance(counter.textAnchors(), RESULT_GROUP_LARGE),
            distance(counter.textAnchors(), RESULT_GROUP_SMALL),

            Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()),
            evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()),
            smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()),

            Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()),
            Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems),
            Math.abs(children - textChildren), Math.abs(childItems - textChildItems),
            Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems),
            evenlyDivisible(children, counter.textAnchors()),
            evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren),
            evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren),
            evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren),
            evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()),
            smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren),
            smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren),
            smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren),
            smallestMod(childItems, anchorTextChildItems),

            Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems),
            Math.abs(anchorTextChildren - anchorTextChildItems),

            toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()),
            toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()),
            toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()),
            toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()),
            toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()),
            toInt(statDescendants.getMean()), toInt(statFields.getSkewness()),
            toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()),
            toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()),
            toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()),
            toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()),
            toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()),
            toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()),
            toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()),
            toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()),
            toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()),
            toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()),
            toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()),
            toInt(statItemAnchorChars.getMean()), };
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step1DebateFilter.java

/**
 * Processes the debates and extract the required debates with arguments
 *
 * @param inputDir  all debates//from  w w  w  .j av a2 s  .c o m
 * @param outputDir output
 * @throws IOException IO Exception
 */
public static void processData(String inputDir, File outputDir) throws IOException {
    // collect some lengths statistics
    DescriptiveStatistics filteredWordCountStatistics = new DescriptiveStatistics();

    Frequency frequency = new Frequency();

    final int lowerBoundaries = MEDIAN - ARGUMENT_LENGTH_PLUS_MINUS_RANGE;
    final int upperBoundaries = MEDIAN + ARGUMENT_LENGTH_PLUS_MINUS_RANGE;

    // read all debates and filter them
    for (File file : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) {
        Debate debate = DebateSerializer.deserializeFromXML(FileUtils.readFileToString(file, "utf-8"));

        // only selected debates
        if (selectedDebates.contains(debate.getDebateMetaData().getUrl())) {

            Debate debateCopy = new Debate();
            debateCopy.setDebateMetaData(debate.getDebateMetaData());

            // for counting first level arguments (those without parents) for each of the two stances
            Map<String, Integer> argumentStancesCounts = new TreeMap<>();

            for (Argument argument : debate.getArgumentList()) {
                boolean keepArgument = false;

                // hack: clean the data -- update stance for "tv" vs. "TV"
                if ("tv".equalsIgnoreCase(argument.getStance())) {
                    argument.setStance("TV");
                }

                // we have a first-level argument
                if (argument.getParentId() == null) {
                    // now check the length
                    int wordCount = argument.getText().split("\\s+").length;

                    if (wordCount >= lowerBoundaries && wordCount <= upperBoundaries) {
                        String stance = argument.getStance();

                        // update counts
                        if (!argumentStancesCounts.containsKey(stance)) {
                            argumentStancesCounts.put(stance, 0);
                        }
                        argumentStancesCounts.put(stance, argumentStancesCounts.get(stance) + 1);

                        // keep it
                        keepArgument = true;

                        // update statistics; delete later
                        filteredWordCountStatistics.addValue(wordCount);
                        frequency.addValue((wordCount / 10) * 10);

                    }
                }

                // copy to the result
                if (keepArgument) {
                    debateCopy.getArgumentList().add(argument);
                }
            }
            // get number of first-level arguments for each side
            Iterator<Map.Entry<String, Integer>> tempIter = argumentStancesCounts.entrySet().iterator();

            if (argumentStancesCounts.size() > 2) {
                //                    System.out.println("More stances: " + argumentStancesCounts);
            }

            Integer val1 = tempIter.hasNext() ? tempIter.next().getValue() : 0;
            Integer val2 = tempIter.hasNext() ? tempIter.next().getValue() : 0;

            if ((val1 + val2) >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_DEBATE) {
                if (val1 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE
                        && val2 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE) {
                    System.out.println(debate.getDebateMetaData().getUrl() + "\t"
                            + debate.getDebateMetaData().getTitle() + "\t" + argumentStancesCounts);

                    // write the output
                    String xml = DebateSerializer.serializeToXML(debateCopy);
                    FileUtils.writeStringToFile(new File(outputDir, file.getName()), xml, "utf-8");
                }
            }
        }
    }
}