Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics()

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:io.prestosql.operator.aggregation.AbstractTestApproximateCountDistinct.java

@Test(dataProvider = "provideStandardErrors")
public void testMultiplePositions(double maxStandardError) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    for (int i = 0; i < 500; ++i) {
        int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;

        List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));

        long actual = estimateGroupByCount(values, maxStandardError);
        double error = (actual - uniques) * 1.0 / uniques;

        stats.addValue(error);/*from w ww  . j  a  v a  2s .co m*/
    }

    assertLessThan(stats.getMean(), 1.0e-2);
    assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError);
}

From source file:com.insightml.math.statistics.Correlation.java

public final PairList<String, Map<Number, Number>> getChart(final CharSequence label) {
    final Map<Number, DescriptiveStatistics> points = new HashMap<>();
    for (int i = 0; i < arrays[0].length; ++i) {
        final double key = arrays[1][i];
        if (!points.containsKey(key)) {
            points.put(key, new DescriptiveStatistics());
        }/* w  w w .  j a  v a 2  s  .c o  m*/
        points.get(key).addValue(arrays[0][i]);
    }
    final Map<Number, Number> average = new HashMap<>();
    final Map<Number, Number> median = new HashMap<>();
    for (final Entry<Number, DescriptiveStatistics> entry : points.entrySet()) {
        average.put(entry.getKey(), entry.getValue().getMean());
        median.put(entry.getKey(), entry.getValue().getPercentile(50));
    }
    final PairList<String, Map<Number, Number>> list = new PairList<>();
    list.add("Average " + label, average);
    list.add("Median " + label, median);
    return list;
}

From source file:ijfx.core.stats.DefaultImageStatisticsService.java

@Override
public DescriptiveStatistics getDatasetDescriptiveStatistics(Dataset dataset) {
    DescriptiveStatistics summary = new DescriptiveStatistics();
    Cursor<RealType<?>> cursor = dataset.cursor();
    cursor.reset();//from w ww. jav a  2s .  c om

    while (cursor.hasNext()) {
        cursor.fwd();
        double value = cursor.get().getRealDouble();
        summary.addValue(value);

    }
    return summary;
}

From source file:com.caseystella.analytics.outlier.streaming.mad.ConfusionMatrix.java

public static Map<ConfusionEntry, Long> getConfusionMatrix(Set<Long> expectedOutliers,
        Set<Long> computedOutliers, long numObservations, long meanDiffBetweenTs, int timeBounds,
        Map<Long, Outlier> outlierMap, DescriptiveStatistics globalExpectedOutlierScoreStats) {
    Map<ConfusionEntry, Long> ret = new HashMap<>();
    for (ResultType r : ResultType.values()) {
        for (ResultType s : ResultType.values()) {
            ret.put(new ConfusionEntry(r, s), 0L);
        }//www .  jav a  2 s . c  om
    }
    int unionSize = 0;
    DescriptiveStatistics expectedOutlierScoreStats = new DescriptiveStatistics();
    for (Long expectedOutlier : expectedOutliers) {
        Outlier o = outlierMap.get(expectedOutlier);
        if (o.getScore() != null) {
            expectedOutlierScoreStats.addValue(o.getScore());
            globalExpectedOutlierScoreStats.addValue(o.getScore());
        }
        if (setContains(computedOutliers, expectedOutlier, meanDiffBetweenTs, timeBounds)) {
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        } else {
            ConfusionEntry entry = new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.OUTLIER);
            long closest = closest(expectedOutlier, computedOutliers);
            long delta = Math.abs(expectedOutlier - closest);
            if (closest != Long.MAX_VALUE) {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), closest point is " + closest + " which is " + timeConversion(delta)
                        + "away. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            } else {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), which is empty. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            }
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Expected Outlier Score Stats", expectedOutlierScoreStats);
    DescriptiveStatistics computedOutlierScoreStats = new DescriptiveStatistics();
    for (Long computedOutlier : computedOutliers) {
        if (!setContains(expectedOutliers, computedOutlier, meanDiffBetweenTs, timeBounds)) {
            Outlier o = outlierMap.get(computedOutlier);
            if (o.getScore() != null) {
                computedOutlierScoreStats.addValue(o.getScore());
            }
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.NON_OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Computed Outlier Scores", computedOutlierScoreStats);
    ret.put(new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.NON_OUTLIER), numObservations - unionSize);
    Assert.assertEquals(numObservations, getTotalNum(ret));
    return ret;
}

From source file:com.nridge.connector.fs.con_fs.core.RunMetricReport.java

/**
 * When an object implementing interface <code>Runnable</code> is used
 * to create a thread, starting the thread causes the object's
 * <code>run</code> method to be called in that separately executing
 * thread.//w w w  .ja  v  a2  s  .  c  o  m
 * 
 * The general contract of the method <code>run</code> is that it may
 * take any action whatsoever.
 *
 * @see Thread#run()
 */
@Override
public void run() {
    long msTime;
    String[] phaseTimes;
    double secondsTime, docsPerSecond;
    String docId, queueItem, phaseName;
    Logger appLogger = mAppMgr.getLogger(this, "run");

    appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER);

    long extractCount = 0;
    DescriptiveStatistics dsExtract = new DescriptiveStatistics();
    long transformCount = 0;
    DescriptiveStatistics dsTransform = new DescriptiveStatistics();
    long publishCount = 0;
    DescriptiveStatistics dsPublish = new DescriptiveStatistics();

    BlockingQueue publishQueue = (BlockingQueue) mAppMgr.getProperty(Connector.QUEUE_PUBLISH_NAME);

    do {
        try {
            queueItem = (String) publishQueue.poll(Constants.QUEUE_POLL_TIMEOUT_DEFAULT, TimeUnit.SECONDS);
            if (mCrawlQueue.isQueueItemDocument(queueItem)) {
                StopWatch stopWatch = new StopWatch();
                stopWatch.start();

                docId = Connector.docIdFromQueueItem(queueItem);

                appLogger.debug(String.format("Publish Queue Item: %s", docId));

                phaseTimes = Connector.phaseTimeFromQueueItem(queueItem);
                if (phaseTimes != null) {
                    for (String phaseTime : phaseTimes) {
                        phaseName = Connector.phaseFromPhaseTime(phaseTime);
                        msTime = Connector.timeFromPhaseTime(phaseTime);
                        if (StringUtils.equals(phaseName, Connector.PHASE_EXTRACT)) {
                            extractCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsExtract.addValue(secondsTime);
                        } else if (StringUtils.equals(phaseName, Connector.PHASE_TRANSFORM)) {
                            transformCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsTransform.addValue(secondsTime);
                        } else if (StringUtils.equals(phaseName, Connector.PHASE_PUBLISH)) {
                            publishCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsPublish.addValue(secondsTime);
                        }
                    }
                }
            }
        } catch (InterruptedException e) {
            queueItem = StringUtils.EMPTY;
        }
    } while (!mCrawlQueue.isPhaseComplete(Connector.PHASE_PUBLISH, queueItem));

    // Note: This is the end of the queue processing pipeline, so we will not pass on queue item markers.

    // Generate our metrics summary for the log file.

    writePhaseMetric(Connector.PHASE_EXTRACT, extractCount, dsExtract.getSum());
    writePhaseMetric(Connector.PHASE_TRANSFORM, transformCount, dsTransform.getSum());
    writePhaseMetric(Connector.PHASE_PUBLISH, publishCount, dsPublish.getSum());

    double totalTime = dsExtract.getSum() + dsTransform.getSum() + dsPublish.getSum();
    if ((publishCount > 0L) && (totalTime > 0.0))
        docsPerSecond = publishCount / totalTime;
    else
        docsPerSecond = 0.0;
    String msgStr = String.format("Total metric summary: %d documents, %.2f seconds (%.2f docs/sec avg)",
            publishCount, totalTime, docsPerSecond);
    appLogger.info(msgStr);

    appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART);
}

From source file:io.hops.leaderElection.experiments.ExperimentDriver.java

private void calculateNumbers(int numProcesses, String outputFileName)
        throws FileNotFoundException, IOException {
    if (!new File(outputFileName).exists()) {
        LOG.error("File " + outputFileName + " does not exists");
        return;//from   w ww. j a  v a2 s .c o  m
    }
    String marker = "DataPoints: ";
    String line;
    DescriptiveStatistics failOverStats = new DescriptiveStatistics();
    DescriptiveStatistics tpStats = new DescriptiveStatistics();
    BufferedReader br = new BufferedReader(new FileReader(outputFileName));
    while ((line = br.readLine()) != null) {
        if (!line.startsWith(marker)) {
            continue;
        }

        boolean tpStatRecorded = false;
        String numbers = line.substring(marker.length(), line.length());
        StringTokenizer st = new StringTokenizer(numbers, ",[] ");
        while (st.hasMoreElements()) {
            double point = Double.parseDouble(st.nextToken());
            if (!tpStatRecorded) {
                tpStats.addValue(point);
                tpStatRecorded = true;
            } else {
                failOverStats.addValue(point);
            }
        }
    }
    br.close();
    writeMessageToFile(numProcesses, failOverStats, tpStats);

}

From source file:com.joliciel.talismane.stats.FScoreCalculatorOneVsRest.java

private void calculate() {
    if (!this.calculated) {
        precisionStats = new DescriptiveStatistics();
        recallStats = new DescriptiveStatistics();
        fScoreStats = new DescriptiveStatistics();

        precisionWeightedStats = new DescriptiveStatistics();
        recallWeightedStats = new DescriptiveStatistics();
        fScoreWeightedStats = new DescriptiveStatistics();

        for (E outcome : fScoreCalculators.keySet()) {
            if (!outcomeCounts.containsKey(outcome))
                outcomeCounts.put(outcome, 0);

            int count = outcomeCounts.get(outcome);
            FScoreCalculator<Boolean> fScoreCalculator = fScoreCalculators.get(outcome);
            if (count > 0) {
                precisionStats.addValue(fScoreCalculator.getPrecision(true));
                recallStats.addValue(fScoreCalculator.getRecall(true));
                fScoreStats.addValue(fScoreCalculator.getFScore(true));
            }//from  ww  w.  ja  v a  2  s .  c  om
            for (int i = 0; i < count; i++) {
                precisionWeightedStats.addValue(fScoreCalculator.getPrecision(true));
                recallWeightedStats.addValue(fScoreCalculator.getRecall(true));
                fScoreWeightedStats.addValue(fScoreCalculator.getFScore(true));
            }
        }
        this.calculated = true;
    }
}

From source file:com.itemanalysis.psychometrics.measurement.TestSummary.java

public TestSummary(int numberOfItems, int numberOfSubscales, int[] cutScores,
        ArrayList<VariableAttributes> variableAttributes, boolean unbiased, boolean deletedReliability,
        boolean showCsem) {
    this.numberOfItems = numberOfItems;
    this.cutScores = cutScores;
    this.variableAttributes = variableAttributes;
    this.unbiased = unbiased;
    this.deletedReliability = deletedReliability;
    this.showCsem = showCsem;
    stats = new DescriptiveStatistics();
    stdDev = new StandardDeviation(unbiased);
    relMatrix = new CovarianceMatrix(variableAttributes);
    this.numberOfSubscales = numberOfSubscales;
    if (numberOfSubscales > 1)
        partRelMatrix = new CovarianceMatrix(numberOfSubscales);
}

From source file:io.yields.math.framework.data.DataProvidersTest.java

@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000)
@Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers")
public void testRandomDistribution(Explorer<Pair> explorer) {
    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
    DescriptiveStatistics xStats = new DescriptiveStatistics();
    DescriptiveStatistics yStats = new DescriptiveStatistics();
    explorer.all().forEach(result -> {
        Pair pair = result.getFunctionOutcome().orElse(new Pair());
        xStats.addValue(pair.getX1());//  w ww  .  j a  v a 2 s  .c  o  m
        yStats.addValue(pair.getX2());
    });
    DescriptiveStatistics cross = new DescriptiveStatistics();
    for (int i = 0; i < xStats.getN(); i++) {
        cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5));
    }
    /**
     * x and y should be uniformly distributed
     */
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    /**
     * and have zero correlation
     */
    assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05));
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step1DebateFilter.java

/**
 * Processes the debates and extract the required debates with arguments
 *
 * @param inputDir  all debates//from  www .  ja  va2  s  .  c  o m
 * @param outputDir output
 * @throws IOException IO Exception
 */
public static void processData(String inputDir, File outputDir) throws IOException {
    // collect some lengths statistics
    DescriptiveStatistics filteredWordCountStatistics = new DescriptiveStatistics();

    Frequency frequency = new Frequency();

    final int lowerBoundaries = MEDIAN - ARGUMENT_LENGTH_PLUS_MINUS_RANGE;
    final int upperBoundaries = MEDIAN + ARGUMENT_LENGTH_PLUS_MINUS_RANGE;

    // read all debates and filter them
    for (File file : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) {
        Debate debate = DebateSerializer.deserializeFromXML(FileUtils.readFileToString(file, "utf-8"));

        // only selected debates
        if (selectedDebates.contains(debate.getDebateMetaData().getUrl())) {

            Debate debateCopy = new Debate();
            debateCopy.setDebateMetaData(debate.getDebateMetaData());

            // for counting first level arguments (those without parents) for each of the two stances
            Map<String, Integer> argumentStancesCounts = new TreeMap<>();

            for (Argument argument : debate.getArgumentList()) {
                boolean keepArgument = false;

                // hack: clean the data -- update stance for "tv" vs. "TV"
                if ("tv".equalsIgnoreCase(argument.getStance())) {
                    argument.setStance("TV");
                }

                // we have a first-level argument
                if (argument.getParentId() == null) {
                    // now check the length
                    int wordCount = argument.getText().split("\\s+").length;

                    if (wordCount >= lowerBoundaries && wordCount <= upperBoundaries) {
                        String stance = argument.getStance();

                        // update counts
                        if (!argumentStancesCounts.containsKey(stance)) {
                            argumentStancesCounts.put(stance, 0);
                        }
                        argumentStancesCounts.put(stance, argumentStancesCounts.get(stance) + 1);

                        // keep it
                        keepArgument = true;

                        // update statistics; delete later
                        filteredWordCountStatistics.addValue(wordCount);
                        frequency.addValue((wordCount / 10) * 10);

                    }
                }

                // copy to the result
                if (keepArgument) {
                    debateCopy.getArgumentList().add(argument);
                }
            }
            // get number of first-level arguments for each side
            Iterator<Map.Entry<String, Integer>> tempIter = argumentStancesCounts.entrySet().iterator();

            if (argumentStancesCounts.size() > 2) {
                //                    System.out.println("More stances: " + argumentStancesCounts);
            }

            Integer val1 = tempIter.hasNext() ? tempIter.next().getValue() : 0;
            Integer val2 = tempIter.hasNext() ? tempIter.next().getValue() : 0;

            if ((val1 + val2) >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_DEBATE) {
                if (val1 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE
                        && val2 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE) {
                    System.out.println(debate.getDebateMetaData().getUrl() + "\t"
                            + debate.getDebateMetaData().getTitle() + "\t" + argumentStancesCounts);

                    // write the output
                    String xml = DebateSerializer.serializeToXML(debateCopy);
                    FileUtils.writeStringToFile(new File(outputDir, file.getName()), xml, "utf-8");
                }
            }
        }
    }
}