List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue
public void addValue(double v)
From source file:net.adamjak.thomas.graph.application.run.TestRunner.java
private void save(Map<String, Object> results, boolean rawData) { SnarkTestTypes testType = (SnarkTestTypes) results.get("testType"); if (this.outputFile.getName().split("\\.")[this.outputFile.getName().split("\\.").length - 1].toLowerCase() .equals("ods")) { String[] columnNames;//from w w w.jav a 2 s .com Object[][] data; if (testType == SnarkTestTypes.ALL_ALGORITHMS) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); columnNames = String.valueOf("Algorithm,Graph ID,Avarage time,Standard deviation,Minimum,Maximum") .split(","); data = new Object[graphTestResult[0].length][6]; for (int cls = 0; cls < graphTestResult[0][0].length; cls++) { Class<?> c = (Class<?>) graphTestResult[0][0][cls].getValue("algorithmClass"); for (int graph = 0; graph < graphTestResult[0].length; graph++) { SummaryStatistics summaryStatistics = new SummaryStatistics(); for (int run = 0; run < graphTestResult.length; run++) { summaryStatistics .addValue((double) graphTestResult[run][graph][cls].getValue("timeInSeconds")); } data[graph][0] = c.getSimpleName(); data[graph][1] = graph; data[graph][2] = summaryStatistics.getMean(); data[graph][3] = summaryStatistics.getStandardDeviation(); data[graph][4] = summaryStatistics.getMin(); data[graph][5] = summaryStatistics.getMax(); } } } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); columnNames = String .valueOf("Graph ID,Start vertex,Avarage time,Standard deviation,Minimum,Maximum") .split(","); data = new Object[graphTestResult[0].length][6]; for (int vid = 0; vid < graphTestResult[0][0].length; vid++) { for (int graph = 0; graph < graphTestResult[0].length; graph++) { SummaryStatistics summaryStatistics = new SummaryStatistics(); for (int run = 0; run < graphTestResult.length; run++) { summaryStatistics .addValue((double) graphTestResult[run][graph][vid].getValue("timeInSeconds")); } data[graph][0] = graph; data[graph][1] = vid; data[graph][2] = summaryStatistics.getMean(); data[graph][3] = summaryStatistics.getStandardDeviation(); data[graph][4] = summaryStatistics.getMin(); data[graph][5] = summaryStatistics.getMax(); } } } else { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); columnNames = String.valueOf("Graph ID,Avarage time,Standard deviation,Minimum,Maximum").split(","); data = new Object[graphTestResult[0].length][5]; for (int graph = 0; graph < graphTestResult[0].length; graph++) { SummaryStatistics summaryStatistics = new SummaryStatistics(); for (int run = 0; run < graphTestResult.length; run++) { summaryStatistics.addValue((double) graphTestResult[run][graph].getValue("timeInSeconds")); } data[graph][0] = graph; data[graph][1] = summaryStatistics.getMean(); data[graph][2] = summaryStatistics.getStandardDeviation(); data[graph][3] = summaryStatistics.getMin(); data[graph][4] = summaryStatistics.getMax(); } } try { SpreadSheet.createEmpty(new JTable(data, columnNames).getModel()).saveAs(outputFile); } catch (IOException e) { e.printStackTrace(); } if (rawData == true) { if (testType == SnarkTestTypes.ALL_ALGORITHMS) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); columnNames = String.valueOf("Class,Run,Graph,Time").split(","); data = new Object[graphTestResult.length * graphTestResult[0].length * graphTestResult[0][0].length][4]; int row = 0; for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { for (int k = 0; k < graphTestResult[i][j].length; k++) { data[row][0] = graphTestResult[i][j][k].getValue("algorithmClass"); data[row][1] = i; data[row][2] = j; data[row][3] = graphTestResult[i][j][k].getValue("time"); row++; } } } } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); columnNames = String.valueOf("Run,Graph,Vertex,Time").split(","); data = new Object[graphTestResult.length * graphTestResult[0].length * graphTestResult[0][0].length][4]; int row = 0; for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { for (int k = 0; k < graphTestResult[i][j].length; k++) { data[row][0] = i; data[row][1] = j; data[row][2] = k; data[row][3] = graphTestResult[i][j][k].getValue("time"); row++; } } } } else if (testType == SnarkTestTypes.ALGORITHM_COMPARATION) { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); columnNames = String.valueOf("Run,Graph,Time,Class").split(","); data = new Object[graphTestResult.length * graphTestResult[0].length][4]; int row = 0; for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { data[row][0] = i; data[row][1] = j; data[row][2] = graphTestResult[i][j].getValue("time"); data[row][3] = ((Class<?>) graphTestResult[i][j] .getValue(GraphTestResult.SNARK_TESTER_CLASS_KEY)).getSimpleName(); row++; } } } else { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); columnNames = String.valueOf("Run,Graph,Time").split(","); data = new Object[graphTestResult.length * graphTestResult[0].length][3]; int row = 0; for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { data[row][0] = i; data[row][1] = j; data[row][2] = graphTestResult[i][j].getValue("time"); row++; } } } try { SpreadSheet.createEmpty(new JTable(data, columnNames).getModel()).saveAs(outputFile); } catch (IOException e) { e.printStackTrace(); } } } else { StringBuilder sbData = new StringBuilder(); if (testType == SnarkTestTypes.ALL_ALGORITHMS) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); sbData.append(",,All data,,,,,Data without extremes,,,,,\n"); sbData.append( "Graph ID,Graph ID,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n"); for (int cls = 0; cls < graphTestResult[0][0].length; cls++) { Class<?> c = (Class<?>) graphTestResult[0][0][cls].getValue("algorithmClass"); for (int graph = 0; graph < graphTestResult[0].length; graph++) { DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(); for (int run = 0; run < graphTestResult.length; run++) { descriptiveStatistics .addValue((double) graphTestResult[run][graph][cls].getValue("timeInSeconds")); } DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005); sbData.append(c.getSimpleName()); sbData.append(","); sbData.append(graph); sbData.append(","); sbData.append(descriptiveStatistics.getMean()); sbData.append(","); sbData.append(descriptiveStatistics.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatistics.getMin()); sbData.append(","); sbData.append(descriptiveStatistics.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics, StatisticsUtils.NormCritical.U0050)); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMean()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMin()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes, StatisticsUtils.NormCritical.U0050)); sbData.append("\n"); } } } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); sbData.append(",,All data,,,,,Data without extremes,,,,,\n"); sbData.append( "Graph ID,Start vertex,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n"); for (int vid = 0; vid < graphTestResult[0][0].length; vid++) { for (int graph = 0; graph < graphTestResult[0].length; graph++) { DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(); for (int run = 0; run < graphTestResult.length; run++) { descriptiveStatistics .addValue((double) graphTestResult[run][graph][vid].getValue("timeInSeconds")); } DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005); sbData.append(graph); sbData.append(","); sbData.append(vid); sbData.append(","); sbData.append(descriptiveStatistics.getMean()); sbData.append(","); sbData.append(descriptiveStatistics.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatistics.getMin()); sbData.append(","); sbData.append(descriptiveStatistics.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics, StatisticsUtils.NormCritical.U0050)); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMean()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMin()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes, StatisticsUtils.NormCritical.U0050)); sbData.append("\n"); } } } else { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); sbData.append(",All data,,,,,Data without extremes,,,,,\n"); sbData.append( "Graph ID,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval,Avarage time,Standard deviation,Minimum,Maximum,Confidence Interval\n"); for (int graph = 0; graph < graphTestResult[0].length; graph++) { DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(); for (int run = 0; run < graphTestResult.length; run++) { descriptiveStatistics .addValue((double) graphTestResult[run][graph].getValue("timeInSeconds")); } DescriptiveStatistics descriptiveStatisticsWithoutExtremes = StatisticsUtils .statisticsWithoutExtremes(descriptiveStatistics, StatisticsUtils.GrubbsLevel.L005); sbData.append(graph); sbData.append(","); sbData.append(descriptiveStatistics.getMean()); sbData.append(","); sbData.append(descriptiveStatistics.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatistics.getMin()); sbData.append(","); sbData.append(descriptiveStatistics.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatistics, StatisticsUtils.NormCritical.U0050)); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMean()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getStandardDeviation()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMin()); sbData.append(","); sbData.append(descriptiveStatisticsWithoutExtremes.getMax()); sbData.append(","); sbData.append(StatisticsUtils.getConfidenceInterval(descriptiveStatisticsWithoutExtremes, StatisticsUtils.NormCritical.U0050)); sbData.append("\n"); } } this.saveStringIntoFile(this.outputFile, sbData.toString()); if (rawData == true) { StringBuilder sbRawData = new StringBuilder(); if (testType == SnarkTestTypes.ALL_ALGORITHMS) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); sbRawData.append("Class,Run,Graph,Time\n"); for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { for (int k = 0; k < graphTestResult[i][j].length; k++) { sbRawData.append(graphTestResult[i][j][k].getValue("algorithmClass")); sbRawData.append(","); sbRawData.append(i); sbRawData.append(","); sbRawData.append(j); sbRawData.append(","); sbRawData.append(graphTestResult[i][j][k].getValue("time")); sbRawData.append("\n"); } } } } else if (testType == SnarkTestTypes.ONE_ALGORITHM_START_IN_EVERY_VERTEX) { GraphTestResult[][][] graphTestResult = (GraphTestResult[][][]) results.get("resultsData"); sbRawData.append("Run,Graph,Vertex,Time\n"); for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { for (int k = 0; k < graphTestResult[i][j].length; k++) { sbRawData.append(i); sbRawData.append(","); sbRawData.append(j); sbRawData.append(","); sbRawData.append(k); sbRawData.append(","); sbRawData.append(graphTestResult[i][j][k].getValue("time")); sbRawData.append("\n"); } } } } else if (testType == SnarkTestTypes.ALGORITHM_COMPARATION) { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); sbRawData.append("Run,Graph,Time,Class\n"); for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { sbRawData.append(i); sbRawData.append(","); sbRawData.append(j); sbRawData.append(","); sbRawData.append(graphTestResult[i][j].getValue("time")); sbRawData.append(","); sbRawData.append(((Class<?>) graphTestResult[i][j] .getValue(GraphTestResult.SNARK_TESTER_CLASS_KEY)).getSimpleName()); sbRawData.append("\n"); } } } else { GraphTestResult[][] graphTestResult = (GraphTestResult[][]) results.get("resultsData"); sbRawData.append("Run,Graph,Time\n"); for (int i = 0; i < graphTestResult.length; i++) { for (int j = 0; j < graphTestResult[i].length; j++) { sbRawData.append(i); sbRawData.append(","); sbRawData.append(j); sbRawData.append(","); sbRawData.append(graphTestResult[i][j].getValue("time")); sbRawData.append("\n"); } } } this.saveStringIntoFile(new File(this.outputFile.getParent(), "raw_" + this.outputFile.getName()), sbRawData.toString()); } } }
From source file:gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) { long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions, RoundingMode.CEILING); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;/*w w w. j a v a 2s.com*/ Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } // Move the candidate to a new bucket if the attempted total is 2x of interval if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } // If the last group is used as the last partition point if (count == 0) { // Exchange the last partition point with global high watermark partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark)); } else { // Summarize last group statistics.addValue(count); // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); } log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }
From source file:knop.utils.stats.DataSet.java
/** * Gets the column statistics./*from w w w. j av a2s.c o m*/ * * @param column the column * @return the column statistics */ public DescriptiveStatistics getColumnStatistics(String column) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i != getColumnSize(column); i++) { Object value = getValue(column, i); double doubleValue; try { doubleValue = (Double) value; stats.addValue(doubleValue); } catch (Exception e) { } } return stats; }
From source file:com.linuxbox.enkive.statistics.consolidation.EmbeddedConsolidator.java
@Override protected void consolidateMaps(Map<String, Object> consolidatedData, List<Map<String, Object>> serviceData, ConsolidationKeyHandler keyDef, LinkedList<String> dataPath) { Map<String, Object> statConsolidatedData = new HashMap<String, Object>(); if (keyDef.getMethods() != null) { // loop over stat consolidation methods Collection<String> methods = new LinkedList<String>(keyDef.getMethods()); if (!keyDef.isPoint()) { methods.add(CONSOLIDATION_SUM); }/*from w w w. j av a2s.co m*/ for (String method : methods) { DescriptiveStatistics statsMaker = new DescriptiveStatistics(); Object dataVal = null; dataVal = null; // loop over data for consolidation Method LinkedList<String> tempPath = new LinkedList<String>(dataPath); if (keyDef.isPoint()) { tempPath.add(method); } else { tempPath.add(CONSOLIDATION_SUM); } double input = -1; for (Map<String, Object> dataMap : serviceData) { // go to end of path & get variable input = -1; dataVal = getDataVal(dataMap, tempPath); if (dataVal != null) { // extract relevant data from end of path input = statToDouble(dataVal); if (input > -1) { // add to stat maker if relevant statsMaker.addValue(input); } } } // store in map if method is valid methodMapBuilder(method, statsMaker, statConsolidatedData); } // store stat methods' data on main consolidated map putOnPath(dataPath, consolidatedData, statConsolidatedData); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java
/** * Transitivity score is a collection of scores computed for each pair of nodes A and B with * multiple paths between them. A single score for such a pair is a ratio of the longest * path from A to B and the shortest path. * <p/>/*from w w w. ja v a 2 s.com*/ * It uses Bellman-Ford algorithm to compute the shortest and longest paths * * @param graph graph (must be DAG) * @return statistics */ private static DescriptiveStatistics computeTransitivityScores(Graph graph) { // find all out-degree > 1 nodes Set<Node> sourceNodes = new HashSet<>(); Set<Node> targetNodes = new HashSet<>(); for (Node n : graph) { if (n.getOutDegree() > 1) { sourceNodes.add(n); } if (n.getInDegree() > 1) { targetNodes.add(n); } } FileSourceDGS source = new FileSourceDGS(); source.addSink(graph); DescriptiveStatistics result = new DescriptiveStatistics(); for (Node sourceNode : sourceNodes) { // set positive weight first for (Edge e : graph.getEdgeSet()) { e.setAttribute(WEIGHT, 1.0); } BellmanFord bfShortest = new BellmanFord(WEIGHT, sourceNode.getId()); bfShortest.init(graph); bfShortest.compute(); // now negative weight for longest-path for (Edge e : graph.getEdgeSet()) { e.setAttribute(WEIGHT, -1.0); } BellmanFord bfLongest = new BellmanFord(WEIGHT, sourceNode.getId()); bfLongest.init(graph); bfLongest.compute(); for (Node targetNode : targetNodes) { Path shortestPath = bfShortest.getShortestPath(targetNode); Path longestPath = bfLongest.getShortestPath(targetNode); int shortestPathLength = shortestPath.getEdgeCount(); int longestPathLength = longestPath.getEdgeCount(); if (shortestPathLength == 1 && longestPathLength > 1) { // update statistics result.addValue((double) longestPathLength / (double) shortestPathLength); } } } return result; }
From source file:com.itemanalysis.jmetrik.stats.descriptives.DescriptiveAnalysis.java
public void summarize() throws SQLException { Statement stmt = null;/*from w w w. j a va 2s. c o m*/ ResultSet rs = null; DescriptiveStatistics temp = null; Table sqlTable = new Table(tableName.getNameForDatabase()); SelectQuery select = new SelectQuery(); for (VariableAttributes v : variables) { select.addColumn(sqlTable, v.getName().nameForDatabase()); } stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); rs = stmt.executeQuery(select.toString()); double value = Double.NaN; while (rs.next()) { for (VariableAttributes v : variables) { temp = data.get(v); if (temp == null) { temp = new DescriptiveStatistics(); data.put(v, temp); } //only increment for non null doubles value = rs.getDouble(v.getName().nameForDatabase()); if (!rs.wasNull()) { temp.addValue(value); } } updateProgress(); } rs.close(); stmt.close(); for (VariableAttributes v : data.keySet()) { publishTable(v); } }
From source file:io.hops.experiments.controller.Master.java
private void startBlockReportingCommander() throws IOException, ClassNotFoundException { System.out.println("Starting BlockReporting Benchmark ..."); prompt();/*from w w w . j a va2 s .c o m*/ BlockReportingBenchmarkCommand.Request request = new BlockReportingBenchmarkCommand.Request( config.getBlockReportBenchMarkDuration(), config.getBlockReportingMinTimeBeforeNextReport(), config.getBlockReportingMaxTimeBeforeNextReport()); sendToAllSlaves(request, 0/*delay*/); Collection<Object> responses = receiveFromAllSlaves(Integer.MAX_VALUE); DescriptiveStatistics successfulOps = new DescriptiveStatistics(); DescriptiveStatistics failedOps = new DescriptiveStatistics(); DescriptiveStatistics speed = new DescriptiveStatistics(); DescriptiveStatistics avgTimePerReport = new DescriptiveStatistics(); DescriptiveStatistics avgTimeTogetANewNameNode = new DescriptiveStatistics(); DescriptiveStatistics noOfNNs = new DescriptiveStatistics(); for (Object obj : responses) { if (!(obj instanceof BlockReportingBenchmarkCommand.Response)) { throw new IllegalStateException("Wrong response received from the client"); } else { BlockReportingBenchmarkCommand.Response response = (BlockReportingBenchmarkCommand.Response) obj; successfulOps.addValue(response.getSuccessfulOps()); failedOps.addValue(response.getFailedOps()); speed.addValue(response.getSpeed()); avgTimePerReport.addValue(response.getAvgTimePerReport()); avgTimeTogetANewNameNode.addValue(response.getAvgTimeTogetNewNameNode()); noOfNNs.addValue(response.getNnCount()); } } BlockReportBMResults result = new BlockReportBMResults(config.getNamenodeCount(), (int) Math.floor(noOfNNs.getMean()), config.getNdbNodesCount(), speed.getSum(), successfulOps.getSum(), failedOps.getSum(), avgTimePerReport.getMean(), avgTimeTogetANewNameNode.getMean()); printMasterResultMessages(result); }
From source file:com.caseystella.analytics.outlier.streaming.mad.SketchyMovingMADIntegrationTest.java
@Test public void runAccuracyBenchmark() throws IOException { Map<String, List<String>> benchmarks = JSONUtil.INSTANCE.load( new FileInputStream(new File(new File(benchmarkRoot), "combined_labels.json")), new TypeReference<Map<String, List<String>>>() { });// ww w . j a va 2 s . co m Assert.assertTrue(benchmarks.size() > 0); Map<ConfusionMatrix.ConfusionEntry, Long> overallConfusionMatrix = new HashMap<>(); DescriptiveStatistics globalExpectedScores = new DescriptiveStatistics(); long total = 0; for (Map.Entry<String, List<String>> kv : benchmarks.entrySet()) { File dataFile = new File(new File(benchmarkRoot), kv.getKey()); File plotFile = new File(new File(benchmarkRoot), kv.getKey() + ".dat"); Assert.assertTrue(dataFile.exists()); Set<Long> expectedOutliers = Sets.newHashSet(Iterables.transform(kv.getValue(), STR_TO_TS)); OutlierRunner runner = new OutlierRunner(outlierConfig, extractorConfigStr); final long[] numObservations = { 0L }; final long[] lastTimestamp = { Long.MIN_VALUE }; final DescriptiveStatistics timeDiffStats = new DescriptiveStatistics(); final Map<Long, Outlier> outlierMap = new HashMap<>(); final PrintWriter pw = new PrintWriter(plotFile); List<Outlier> outliers = runner.run(dataFile, 1, EnumSet.of(Severity.SEVERE_OUTLIER), new Function<Map.Entry<DataPoint, Outlier>, Void>() { @Nullable @Override public Void apply(@Nullable Map.Entry<DataPoint, Outlier> kv) { DataPoint dataPoint = kv.getKey(); Outlier outlier = kv.getValue(); pw.println(dataPoint.getTimestamp() + " " + outlier.getDataPoint().getValue() + " " + ((outlier.getSeverity() == Severity.SEVERE_OUTLIER) ? "outlier" : "normal")); outlierMap.put(dataPoint.getTimestamp(), outlier); numObservations[0] += 1; if (lastTimestamp[0] != Long.MIN_VALUE) { timeDiffStats.addValue(dataPoint.getTimestamp() - lastTimestamp[0]); } lastTimestamp[0] = dataPoint.getTimestamp(); return null; } }); pw.close(); total += numObservations[0]; Set<Long> calculatedOutliers = Sets .newHashSet(Iterables.transform(outliers, OutlierRunner.OUTLIER_TO_TS)); double stdDevDiff = Math.sqrt(timeDiffStats.getVariance()); System.out.println("Running data from " + kv.getKey() + " - E[time delta]: " + ConfusionMatrix.timeConversion((long) timeDiffStats.getMean()) + ", StdDev[time delta]: " + ConfusionMatrix.timeConversion((long) stdDevDiff) + " mean: " + runner.getMean()); Map<ConfusionMatrix.ConfusionEntry, Long> confusionMatrix = ConfusionMatrix.getConfusionMatrix( expectedOutliers, calculatedOutliers, numObservations[0], (long) timeDiffStats.getMean(), 3 //stdDevDiff > 30000?0:3 , outlierMap, globalExpectedScores); ConfusionMatrix.printConfusionMatrix(confusionMatrix); overallConfusionMatrix = ConfusionMatrix.merge(overallConfusionMatrix, confusionMatrix); } System.out.println("Really ran " + total); ConfusionMatrix.printConfusionMatrix(overallConfusionMatrix); ConfusionMatrix.printStats("Global Expected Outlier Scores", globalExpectedScores); }
From source file:com.screenslicer.core.scrape.type.ComparableNode.java
public ComparableNode(final Node node) { this.node = node; List<Node> separated = node.childNodes(); int children = 0; int childBlocks = 0; int childFormatting = 0; int childContent = 0; int childItems = 0; int childDecoration = 0; int anchorChildren = 0; int textChildren = 0; int anchorTextChildren = 0; int anchorChildItems = 0; int textChildItems = 0; int anchorTextChildItems = 0; int itemChars = 0; int itemAnchorChars = 0; List<String> firstChildTags = null; List<List<String>> orderedTags = new ArrayList<List<String>>(); List<String> allChildTags = new ArrayList<String>(); ArrayList<List<String>> childTags = new ArrayList<List<String>>(); boolean childrenConsistent = true; String childName = null;// ww w. j av a 2 s. co m boolean childrenSame = true; double avgChildLengthDouble = 0d; int nodeStrLen = Util.trimmedLen(node.toString()); DescriptiveStatistics statAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statAnchors = new DescriptiveStatistics(); DescriptiveStatistics statChars = new DescriptiveStatistics(); DescriptiveStatistics statDescendants = new DescriptiveStatistics(); DescriptiveStatistics statFields = new DescriptiveStatistics(); DescriptiveStatistics statLevels = new DescriptiveStatistics(); DescriptiveStatistics statLongestField = new DescriptiveStatistics(); DescriptiveStatistics statNonAnchorChars = new DescriptiveStatistics(); DescriptiveStatistics statTextAnchors = new DescriptiveStatistics(); DescriptiveStatistics statStrLen = new DescriptiveStatistics(); DescriptiveStatistics statItemChars = new DescriptiveStatistics(); DescriptiveStatistics statItemAnchorChars = new DescriptiveStatistics(); for (Node child : separated) { if (!Util.isEmpty(child)) { children++; int childStrLen = Util.trimmedLen(child.toString()); avgChildLengthDouble += childStrLen; NodeCounter counter = new NodeCounter(child); if (Util.isItem(child.nodeName())) { ++childItems; anchorChildItems += counter.anchors() > 0 ? 1 : 0; textChildItems += counter.fields() > 0 ? 1 : 0; anchorTextChildItems += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; itemChars += counter.chars(); itemAnchorChars += counter.anchorChars(); statItemChars.addValue(counter.chars()); statItemAnchorChars.addValue(counter.anchorChars()); } if (Util.isBlock(child.nodeName())) { ++childBlocks; } if (Util.isDecoration(child.nodeName())) { ++childDecoration; } if (Util.isFormatting(child.nodeName())) { ++childFormatting; } if (Util.isContent(child)) { ++childContent; } anchorChildren += counter.anchors() > 0 ? 1 : 0; textChildren += counter.fields() > 0 ? 1 : 0; anchorTextChildren += counter.anchors() > 0 && counter.fields() > 0 ? 1 : 0; statAnchorChars.addValue(counter.anchorChars()); statAnchors.addValue(counter.anchors()); statChars.addValue(counter.chars()); statDescendants.addValue(counter.descendants()); statFields.addValue(counter.fields()); statLevels.addValue(counter.levels()); statLongestField.addValue(counter.longestField()); statNonAnchorChars.addValue(counter.nonAnchorChars()); statTextAnchors.addValue(counter.textAnchors()); statStrLen.addValue(childStrLen); List<String> curChildTags = counter.tags(); allChildTags = Util.join(allChildTags, curChildTags); childTags.add(curChildTags); if (firstChildTags == null) { firstChildTags = curChildTags; } else if (childrenConsistent && !Util.isSame(firstChildTags, curChildTags)) { childrenConsistent = false; } if (childName == null) { childName = child.nodeName(); } else if (childrenSame && !childName.equals(child.nodeName())) { childrenSame = false; } if (!Util.contains(counter.orderedTags(), orderedTags)) { orderedTags.add(counter.orderedTags()); } } } avgChildLengthDouble = children == 0 ? 0 : avgChildLengthDouble / (double) children; int avgChildLength = (int) avgChildLengthDouble; double avgChildDiff = 0; int maxChildDiff = 0; for (List<String> tagList : childTags) { avgChildDiff += allChildTags.size() - tagList.size(); maxChildDiff = Math.max(maxChildDiff, allChildTags.size() - tagList.size()); } avgChildDiff = childTags.size() == 0 ? 0 : avgChildDiff / (double) childTags.size(); childrenConsistent = firstChildTags != null && !firstChildTags.isEmpty() && childrenConsistent; NodeCounter counter = new NodeCounter(separated); int siblings = 0; for (Node sibling : node.parent().childNodes()) { if (!Util.isEmpty(sibling)) { siblings++; } } this.scores = new int[] { counter.items(), counter.blocks(), counter.decoration(), counter.formatting(), counter.content(), div(counter.items(), children), div(counter.blocks(), children), div(counter.decoration(), children), div(counter.formatting(), children), div(counter.content(), children), childItems, childBlocks, childDecoration, childFormatting, childContent, avgChildLength, counter.fields(), textChildItems, counter.images(), counter.anchors(), counter.textAnchors(), div(counter.chars(), Math.max(1, counter.fields())), div(itemChars, Math.max(1, textChildItems)), counter.longestField(), nodeStrLen, div(nodeStrLen, children), counter.anchorLen(), counter.chars(), itemChars, div(counter.chars(), children), div(itemChars, childItems), counter.nonAnchorChars(), div(counter.nonAnchorChars(), children), div(counter.nonAnchorChars(), childItems), div(counter.nonAnchorChars(), childBlocks), div(counter.nonAnchorChars(), childContent), div(counter.nonAnchorChars(), counter.anchors()), div(counter.nonAnchorChars(), counter.textAnchors()), counter.anchorChars(), itemAnchorChars, div(itemAnchorChars, anchorChildItems), div(counter.anchorChars(), counter.anchors()), div(counter.anchorChars(), counter.textAnchors()), div(counter.anchorChars(), children), counter.descendants(), counter.levels(), div(counter.descendants(), children), div(children, counter.levels()), siblings, children, maxChildDiff, toInt(avgChildDiff), toInt(childrenSame), toInt(childrenConsistent), orderedTags.size(), mod0(children, RESULT_GROUP_LARGE), mod0(children, RESULT_GROUP_SMALL), distance(children, RESULT_GROUP_LARGE), distance(children, RESULT_GROUP_SMALL), mod0(childItems, RESULT_GROUP_LARGE), mod0(childItems, RESULT_GROUP_SMALL), distance(childItems, RESULT_GROUP_LARGE), distance(childItems, RESULT_GROUP_SMALL), mod0(childBlocks, RESULT_GROUP_LARGE), mod0(childBlocks, RESULT_GROUP_SMALL), distance(childBlocks, RESULT_GROUP_LARGE), distance(childBlocks, RESULT_GROUP_SMALL), mod0(childContent, RESULT_GROUP_LARGE), mod0(childContent, RESULT_GROUP_SMALL), distance(childContent, RESULT_GROUP_LARGE), distance(childContent, RESULT_GROUP_SMALL), mod0(counter.anchors(), RESULT_GROUP_LARGE), mod0(counter.anchors(), RESULT_GROUP_SMALL), distance(counter.anchors(), RESULT_GROUP_LARGE), distance(counter.anchors(), RESULT_GROUP_SMALL), mod0(anchorChildItems, RESULT_GROUP_LARGE), mod0(anchorChildItems, RESULT_GROUP_SMALL), distance(anchorChildItems, RESULT_GROUP_LARGE), distance(anchorChildItems, RESULT_GROUP_SMALL), mod0(textChildItems, RESULT_GROUP_LARGE), mod0(textChildItems, RESULT_GROUP_SMALL), distance(textChildItems, RESULT_GROUP_LARGE), distance(textChildItems, RESULT_GROUP_SMALL), mod0(counter.textAnchors(), RESULT_GROUP_LARGE), mod0(counter.textAnchors(), RESULT_GROUP_SMALL), distance(counter.textAnchors(), RESULT_GROUP_LARGE), distance(counter.textAnchors(), RESULT_GROUP_SMALL), Math.abs(children - counter.anchors()), Math.abs(childItems - counter.anchors()), evenlyDivisible(children, counter.anchors()), evenlyDivisible(childItems, counter.anchors()), smallestMod(children, counter.anchors()), smallestMod(childItems, counter.anchors()), Math.abs(children - counter.textAnchors()), Math.abs(childItems - counter.textAnchors()), Math.abs(children - anchorChildren), Math.abs(childItems - anchorChildItems), Math.abs(children - textChildren), Math.abs(childItems - textChildItems), Math.abs(children - anchorTextChildren), Math.abs(childItems - anchorTextChildItems), evenlyDivisible(children, counter.textAnchors()), evenlyDivisible(childItems, counter.textAnchors()), evenlyDivisible(children, anchorChildren), evenlyDivisible(childItems, anchorChildItems), evenlyDivisible(children, textChildren), evenlyDivisible(childItems, textChildItems), evenlyDivisible(children, anchorTextChildren), evenlyDivisible(childItems, anchorTextChildItems), smallestMod(children, counter.textAnchors()), smallestMod(childItems, counter.textAnchors()), smallestMod(children, anchorChildren), smallestMod(childItems, anchorChildItems), smallestMod(children, textChildren), smallestMod(childItems, textChildItems), smallestMod(children, anchorTextChildren), smallestMod(childItems, anchorTextChildItems), Math.abs(anchorChildren - anchorChildItems), Math.abs(textChildren - textChildItems), Math.abs(anchorTextChildren - anchorTextChildItems), toInt(statAnchorChars.getSkewness()), toInt(statAnchorChars.getStandardDeviation()), toInt(statAnchorChars.getMean()), toInt(statAnchors.getSkewness()), toInt(statAnchors.getStandardDeviation()), toInt(statAnchors.getMean()), toInt(statChars.getSkewness()), toInt(statChars.getStandardDeviation()), toInt(statChars.getMean()), toInt(statDescendants.getSkewness()), toInt(statDescendants.getStandardDeviation()), toInt(statDescendants.getMean()), toInt(statFields.getSkewness()), toInt(statFields.getStandardDeviation()), toInt(statFields.getMean()), toInt(statLevels.getSkewness()), toInt(statLevels.getStandardDeviation()), toInt(statLevels.getMean()), toInt(statLongestField.getSkewness()), toInt(statLongestField.getStandardDeviation()), toInt(statLongestField.getMean()), toInt(statNonAnchorChars.getSkewness()), toInt(statNonAnchorChars.getStandardDeviation()), toInt(statNonAnchorChars.getMean()), toInt(statStrLen.getSkewness()), toInt(statStrLen.getStandardDeviation()), toInt(statStrLen.getMean()), toInt(statTextAnchors.getSkewness()), toInt(statTextAnchors.getStandardDeviation()), toInt(statTextAnchors.getMean()), toInt(statItemChars.getSkewness()), toInt(statItemChars.getStandardDeviation()), toInt(statItemChars.getMean()), toInt(statItemAnchorChars.getSkewness()), toInt(statItemAnchorChars.getStandardDeviation()), toInt(statItemAnchorChars.getMean()), }; }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step1DebateFilter.java
/** * Processes the debates and extract the required debates with arguments * * @param inputDir all debates//from w w w .j av a2 s .c o m * @param outputDir output * @throws IOException IO Exception */ public static void processData(String inputDir, File outputDir) throws IOException { // collect some lengths statistics DescriptiveStatistics filteredWordCountStatistics = new DescriptiveStatistics(); Frequency frequency = new Frequency(); final int lowerBoundaries = MEDIAN - ARGUMENT_LENGTH_PLUS_MINUS_RANGE; final int upperBoundaries = MEDIAN + ARGUMENT_LENGTH_PLUS_MINUS_RANGE; // read all debates and filter them for (File file : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { Debate debate = DebateSerializer.deserializeFromXML(FileUtils.readFileToString(file, "utf-8")); // only selected debates if (selectedDebates.contains(debate.getDebateMetaData().getUrl())) { Debate debateCopy = new Debate(); debateCopy.setDebateMetaData(debate.getDebateMetaData()); // for counting first level arguments (those without parents) for each of the two stances Map<String, Integer> argumentStancesCounts = new TreeMap<>(); for (Argument argument : debate.getArgumentList()) { boolean keepArgument = false; // hack: clean the data -- update stance for "tv" vs. "TV" if ("tv".equalsIgnoreCase(argument.getStance())) { argument.setStance("TV"); } // we have a first-level argument if (argument.getParentId() == null) { // now check the length int wordCount = argument.getText().split("\\s+").length; if (wordCount >= lowerBoundaries && wordCount <= upperBoundaries) { String stance = argument.getStance(); // update counts if (!argumentStancesCounts.containsKey(stance)) { argumentStancesCounts.put(stance, 0); } argumentStancesCounts.put(stance, argumentStancesCounts.get(stance) + 1); // keep it keepArgument = true; // update statistics; delete later filteredWordCountStatistics.addValue(wordCount); frequency.addValue((wordCount / 10) * 10); } } // copy to the result if (keepArgument) { debateCopy.getArgumentList().add(argument); } } // get number of first-level arguments for each side Iterator<Map.Entry<String, Integer>> tempIter = argumentStancesCounts.entrySet().iterator(); if (argumentStancesCounts.size() > 2) { // System.out.println("More stances: " + argumentStancesCounts); } Integer val1 = tempIter.hasNext() ? tempIter.next().getValue() : 0; Integer val2 = tempIter.hasNext() ? tempIter.next().getValue() : 0; if ((val1 + val2) >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_DEBATE) { if (val1 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE && val2 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE) { System.out.println(debate.getDebateMetaData().getUrl() + "\t" + debate.getDebateMetaData().getTitle() + "\t" + argumentStancesCounts); // write the output String xml = DebateSerializer.serializeToXML(debateCopy); FileUtils.writeStringToFile(new File(outputDir, file.getName()), xml, "utf-8"); } } } } }