List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics
public DescriptiveStatistics()
From source file:io.prestosql.operator.aggregation.AbstractTestApproximateCountDistinct.java
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositions(double maxStandardError) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < 500; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); long actual = estimateGroupByCount(values, maxStandardError); double error = (actual - uniques) * 1.0 / uniques; stats.addValue(error);/*from w ww . j a v a 2s .co m*/ } assertLessThan(stats.getMean(), 1.0e-2); assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError); }
From source file:com.insightml.math.statistics.Correlation.java
public final PairList<String, Map<Number, Number>> getChart(final CharSequence label) { final Map<Number, DescriptiveStatistics> points = new HashMap<>(); for (int i = 0; i < arrays[0].length; ++i) { final double key = arrays[1][i]; if (!points.containsKey(key)) { points.put(key, new DescriptiveStatistics()); }/* w w w . j a v a 2 s .c o m*/ points.get(key).addValue(arrays[0][i]); } final Map<Number, Number> average = new HashMap<>(); final Map<Number, Number> median = new HashMap<>(); for (final Entry<Number, DescriptiveStatistics> entry : points.entrySet()) { average.put(entry.getKey(), entry.getValue().getMean()); median.put(entry.getKey(), entry.getValue().getPercentile(50)); } final PairList<String, Map<Number, Number>> list = new PairList<>(); list.add("Average " + label, average); list.add("Median " + label, median); return list; }
From source file:ijfx.core.stats.DefaultImageStatisticsService.java
@Override public DescriptiveStatistics getDatasetDescriptiveStatistics(Dataset dataset) { DescriptiveStatistics summary = new DescriptiveStatistics(); Cursor<RealType<?>> cursor = dataset.cursor(); cursor.reset();//from w ww. jav a 2s . c om while (cursor.hasNext()) { cursor.fwd(); double value = cursor.get().getRealDouble(); summary.addValue(value); } return summary; }
From source file:com.caseystella.analytics.outlier.streaming.mad.ConfusionMatrix.java
public static Map<ConfusionEntry, Long> getConfusionMatrix(Set<Long> expectedOutliers, Set<Long> computedOutliers, long numObservations, long meanDiffBetweenTs, int timeBounds, Map<Long, Outlier> outlierMap, DescriptiveStatistics globalExpectedOutlierScoreStats) { Map<ConfusionEntry, Long> ret = new HashMap<>(); for (ResultType r : ResultType.values()) { for (ResultType s : ResultType.values()) { ret.put(new ConfusionEntry(r, s), 0L); }//www . jav a 2 s . c om } int unionSize = 0; DescriptiveStatistics expectedOutlierScoreStats = new DescriptiveStatistics(); for (Long expectedOutlier : expectedOutliers) { Outlier o = outlierMap.get(expectedOutlier); if (o.getScore() != null) { expectedOutlierScoreStats.addValue(o.getScore()); globalExpectedOutlierScoreStats.addValue(o.getScore()); } if (setContains(computedOutliers, expectedOutlier, meanDiffBetweenTs, timeBounds)) { ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.OUTLIER); ConfusionEntry.increment(entry, ret); unionSize++; } else { ConfusionEntry entry = new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.OUTLIER); long closest = closest(expectedOutlier, computedOutliers); long delta = Math.abs(expectedOutlier - closest); if (closest != Long.MAX_VALUE) { System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers (" + o + "), closest point is " + closest + " which is " + timeConversion(delta) + "away. - E[delta t] " + timeConversion(meanDiffBetweenTs) + ""); } else { System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers (" + o + "), which is empty. - E[delta t] " + timeConversion(meanDiffBetweenTs) + ""); } ConfusionEntry.increment(entry, ret); unionSize++; } } printStats("Expected Outlier Score Stats", expectedOutlierScoreStats); DescriptiveStatistics computedOutlierScoreStats = new DescriptiveStatistics(); for (Long computedOutlier : computedOutliers) { if (!setContains(expectedOutliers, computedOutlier, meanDiffBetweenTs, timeBounds)) { Outlier o = outlierMap.get(computedOutlier); if (o.getScore() != null) { computedOutlierScoreStats.addValue(o.getScore()); } ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.NON_OUTLIER); ConfusionEntry.increment(entry, ret); unionSize++; } } printStats("Computed Outlier Scores", computedOutlierScoreStats); ret.put(new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.NON_OUTLIER), numObservations - unionSize); Assert.assertEquals(numObservations, getTotalNum(ret)); return ret; }
From source file:com.nridge.connector.fs.con_fs.core.RunMetricReport.java
/** * When an object implementing interface <code>Runnable</code> is used * to create a thread, starting the thread causes the object's * <code>run</code> method to be called in that separately executing * thread.//w w w .ja v a2 s . c o m * * The general contract of the method <code>run</code> is that it may * take any action whatsoever. * * @see Thread#run() */ @Override public void run() { long msTime; String[] phaseTimes; double secondsTime, docsPerSecond; String docId, queueItem, phaseName; Logger appLogger = mAppMgr.getLogger(this, "run"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); long extractCount = 0; DescriptiveStatistics dsExtract = new DescriptiveStatistics(); long transformCount = 0; DescriptiveStatistics dsTransform = new DescriptiveStatistics(); long publishCount = 0; DescriptiveStatistics dsPublish = new DescriptiveStatistics(); BlockingQueue publishQueue = (BlockingQueue) mAppMgr.getProperty(Connector.QUEUE_PUBLISH_NAME); do { try { queueItem = (String) publishQueue.poll(Constants.QUEUE_POLL_TIMEOUT_DEFAULT, TimeUnit.SECONDS); if (mCrawlQueue.isQueueItemDocument(queueItem)) { StopWatch stopWatch = new StopWatch(); stopWatch.start(); docId = Connector.docIdFromQueueItem(queueItem); appLogger.debug(String.format("Publish Queue Item: %s", docId)); phaseTimes = Connector.phaseTimeFromQueueItem(queueItem); if (phaseTimes != null) { for (String phaseTime : phaseTimes) { phaseName = Connector.phaseFromPhaseTime(phaseTime); msTime = Connector.timeFromPhaseTime(phaseTime); if (StringUtils.equals(phaseName, Connector.PHASE_EXTRACT)) { extractCount++; secondsTime = msTime / MILLISECONDS_IN_A_SECOND; dsExtract.addValue(secondsTime); } else if (StringUtils.equals(phaseName, Connector.PHASE_TRANSFORM)) { transformCount++; secondsTime = msTime / MILLISECONDS_IN_A_SECOND; dsTransform.addValue(secondsTime); } else if (StringUtils.equals(phaseName, Connector.PHASE_PUBLISH)) { publishCount++; secondsTime = msTime / MILLISECONDS_IN_A_SECOND; dsPublish.addValue(secondsTime); } } } } } catch (InterruptedException e) { queueItem = StringUtils.EMPTY; } } while (!mCrawlQueue.isPhaseComplete(Connector.PHASE_PUBLISH, queueItem)); // Note: This is the end of the queue processing pipeline, so we will not pass on queue item markers. // Generate our metrics summary for the log file. writePhaseMetric(Connector.PHASE_EXTRACT, extractCount, dsExtract.getSum()); writePhaseMetric(Connector.PHASE_TRANSFORM, transformCount, dsTransform.getSum()); writePhaseMetric(Connector.PHASE_PUBLISH, publishCount, dsPublish.getSum()); double totalTime = dsExtract.getSum() + dsTransform.getSum() + dsPublish.getSum(); if ((publishCount > 0L) && (totalTime > 0.0)) docsPerSecond = publishCount / totalTime; else docsPerSecond = 0.0; String msgStr = String.format("Total metric summary: %d documents, %.2f seconds (%.2f docs/sec avg)", publishCount, totalTime, docsPerSecond); appLogger.info(msgStr); appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); }
From source file:io.hops.leaderElection.experiments.ExperimentDriver.java
private void calculateNumbers(int numProcesses, String outputFileName) throws FileNotFoundException, IOException { if (!new File(outputFileName).exists()) { LOG.error("File " + outputFileName + " does not exists"); return;//from w ww. j a v a2 s .c o m } String marker = "DataPoints: "; String line; DescriptiveStatistics failOverStats = new DescriptiveStatistics(); DescriptiveStatistics tpStats = new DescriptiveStatistics(); BufferedReader br = new BufferedReader(new FileReader(outputFileName)); while ((line = br.readLine()) != null) { if (!line.startsWith(marker)) { continue; } boolean tpStatRecorded = false; String numbers = line.substring(marker.length(), line.length()); StringTokenizer st = new StringTokenizer(numbers, ",[] "); while (st.hasMoreElements()) { double point = Double.parseDouble(st.nextToken()); if (!tpStatRecorded) { tpStats.addValue(point); tpStatRecorded = true; } else { failOverStats.addValue(point); } } } br.close(); writeMessageToFile(numProcesses, failOverStats, tpStats); }
From source file:com.joliciel.talismane.stats.FScoreCalculatorOneVsRest.java
private void calculate() { if (!this.calculated) { precisionStats = new DescriptiveStatistics(); recallStats = new DescriptiveStatistics(); fScoreStats = new DescriptiveStatistics(); precisionWeightedStats = new DescriptiveStatistics(); recallWeightedStats = new DescriptiveStatistics(); fScoreWeightedStats = new DescriptiveStatistics(); for (E outcome : fScoreCalculators.keySet()) { if (!outcomeCounts.containsKey(outcome)) outcomeCounts.put(outcome, 0); int count = outcomeCounts.get(outcome); FScoreCalculator<Boolean> fScoreCalculator = fScoreCalculators.get(outcome); if (count > 0) { precisionStats.addValue(fScoreCalculator.getPrecision(true)); recallStats.addValue(fScoreCalculator.getRecall(true)); fScoreStats.addValue(fScoreCalculator.getFScore(true)); }//from ww w. ja v a 2 s . c om for (int i = 0; i < count; i++) { precisionWeightedStats.addValue(fScoreCalculator.getPrecision(true)); recallWeightedStats.addValue(fScoreCalculator.getRecall(true)); fScoreWeightedStats.addValue(fScoreCalculator.getFScore(true)); } } this.calculated = true; } }
From source file:com.itemanalysis.psychometrics.measurement.TestSummary.java
public TestSummary(int numberOfItems, int numberOfSubscales, int[] cutScores, ArrayList<VariableAttributes> variableAttributes, boolean unbiased, boolean deletedReliability, boolean showCsem) { this.numberOfItems = numberOfItems; this.cutScores = cutScores; this.variableAttributes = variableAttributes; this.unbiased = unbiased; this.deletedReliability = deletedReliability; this.showCsem = showCsem; stats = new DescriptiveStatistics(); stdDev = new StandardDeviation(unbiased); relMatrix = new CovarianceMatrix(variableAttributes); this.numberOfSubscales = numberOfSubscales; if (numberOfSubscales > 1) partRelMatrix = new CovarianceMatrix(numberOfSubscales); }
From source file:io.yields.math.framework.data.DataProvidersTest.java
@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000) @Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers") public void testRandomDistribution(Explorer<Pair> explorer) { KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest(); DescriptiveStatistics xStats = new DescriptiveStatistics(); DescriptiveStatistics yStats = new DescriptiveStatistics(); explorer.all().forEach(result -> { Pair pair = result.getFunctionOutcome().orElse(new Pair()); xStats.addValue(pair.getX1());// w ww . j a v a 2 s .c o m yStats.addValue(pair.getX2()); }); DescriptiveStatistics cross = new DescriptiveStatistics(); for (int i = 0; i < xStats.getN(); i++) { cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5)); } /** * x and y should be uniformly distributed */ assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues())) .isEqualTo(0, Delta.delta(.015)); assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues())) .isEqualTo(0, Delta.delta(.015)); /** * and have zero correlation */ assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05)); }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step1DebateFilter.java
/** * Processes the debates and extract the required debates with arguments * * @param inputDir all debates//from www . ja va2 s . c o m * @param outputDir output * @throws IOException IO Exception */ public static void processData(String inputDir, File outputDir) throws IOException { // collect some lengths statistics DescriptiveStatistics filteredWordCountStatistics = new DescriptiveStatistics(); Frequency frequency = new Frequency(); final int lowerBoundaries = MEDIAN - ARGUMENT_LENGTH_PLUS_MINUS_RANGE; final int upperBoundaries = MEDIAN + ARGUMENT_LENGTH_PLUS_MINUS_RANGE; // read all debates and filter them for (File file : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) { Debate debate = DebateSerializer.deserializeFromXML(FileUtils.readFileToString(file, "utf-8")); // only selected debates if (selectedDebates.contains(debate.getDebateMetaData().getUrl())) { Debate debateCopy = new Debate(); debateCopy.setDebateMetaData(debate.getDebateMetaData()); // for counting first level arguments (those without parents) for each of the two stances Map<String, Integer> argumentStancesCounts = new TreeMap<>(); for (Argument argument : debate.getArgumentList()) { boolean keepArgument = false; // hack: clean the data -- update stance for "tv" vs. "TV" if ("tv".equalsIgnoreCase(argument.getStance())) { argument.setStance("TV"); } // we have a first-level argument if (argument.getParentId() == null) { // now check the length int wordCount = argument.getText().split("\\s+").length; if (wordCount >= lowerBoundaries && wordCount <= upperBoundaries) { String stance = argument.getStance(); // update counts if (!argumentStancesCounts.containsKey(stance)) { argumentStancesCounts.put(stance, 0); } argumentStancesCounts.put(stance, argumentStancesCounts.get(stance) + 1); // keep it keepArgument = true; // update statistics; delete later filteredWordCountStatistics.addValue(wordCount); frequency.addValue((wordCount / 10) * 10); } } // copy to the result if (keepArgument) { debateCopy.getArgumentList().add(argument); } } // get number of first-level arguments for each side Iterator<Map.Entry<String, Integer>> tempIter = argumentStancesCounts.entrySet().iterator(); if (argumentStancesCounts.size() > 2) { // System.out.println("More stances: " + argumentStancesCounts); } Integer val1 = tempIter.hasNext() ? tempIter.next().getValue() : 0; Integer val2 = tempIter.hasNext() ? tempIter.next().getValue() : 0; if ((val1 + val2) >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_DEBATE) { if (val1 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE && val2 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE) { System.out.println(debate.getDebateMetaData().getUrl() + "\t" + debate.getDebateMetaData().getTitle() + "\t" + argumentStancesCounts); // write the output String xml = DebateSerializer.serializeToXML(debateCopy); FileUtils.writeStringToFile(new File(outputDir, file.getName()), xml, "utf-8"); } } } } }