Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics() 

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:io.prestosql.operator.aggregation.AbstractTestApproximateCountDistinct.java

@Test(dataProvider = "provideStandardErrors")
public void testMultiplePositions(double maxStandardError) {
    DescriptiveStatistics stats = new DescriptiveStatistics();

    for (int i = 0; i < 500; ++i) {
        int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;

        List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));

        long actual = estimateGroupByCount(values, maxStandardError);
        double error = (actual - uniques) * 1.0 / uniques;

        stats.addValue(error);/*from w ww  . j  a  v a  2s .co m*/
    }

    assertLessThan(stats.getMean(), 1.0e-2);
    assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError);
}

From source file:com.insightml.math.statistics.Correlation.java

public final PairList<String, Map<Number, Number>> getChart(final CharSequence label) {
    final Map<Number, DescriptiveStatistics> points = new HashMap<>();
    for (int i = 0; i < arrays[0].length; ++i) {
        final double key = arrays[1][i];
        if (!points.containsKey(key)) {
            points.put(key, new DescriptiveStatistics());
        }/* w  w w .  j a  v a 2  s  .c o  m*/
        points.get(key).addValue(arrays[0][i]);
    }
    final Map<Number, Number> average = new HashMap<>();
    final Map<Number, Number> median = new HashMap<>();
    for (final Entry<Number, DescriptiveStatistics> entry : points.entrySet()) {
        average.put(entry.getKey(), entry.getValue().getMean());
        median.put(entry.getKey(), entry.getValue().getPercentile(50));
    }
    final PairList<String, Map<Number, Number>> list = new PairList<>();
    list.add("Average " + label, average);
    list.add("Median " + label, median);
    return list;
}

From source file:ijfx.core.stats.DefaultImageStatisticsService.java

@Override
public DescriptiveStatistics getDatasetDescriptiveStatistics(Dataset dataset) {
    DescriptiveStatistics summary = new DescriptiveStatistics();
    Cursor<RealType<?>> cursor = dataset.cursor();
    cursor.reset();//from w ww. jav a  2s .  c om

    while (cursor.hasNext()) {
        cursor.fwd();
        double value = cursor.get().getRealDouble();
        summary.addValue(value);

    }
    return summary;
}

From source file:com.caseystella.analytics.outlier.streaming.mad.ConfusionMatrix.java

public static Map<ConfusionEntry, Long> getConfusionMatrix(Set<Long> expectedOutliers,
        Set<Long> computedOutliers, long numObservations, long meanDiffBetweenTs, int timeBounds,
        Map<Long, Outlier> outlierMap, DescriptiveStatistics globalExpectedOutlierScoreStats) {
    Map<ConfusionEntry, Long> ret = new HashMap<>();
    for (ResultType r : ResultType.values()) {
        for (ResultType s : ResultType.values()) {
            ret.put(new ConfusionEntry(r, s), 0L);
        }//www .  jav a  2 s . c  om
    }
    int unionSize = 0;
    DescriptiveStatistics expectedOutlierScoreStats = new DescriptiveStatistics();
    for (Long expectedOutlier : expectedOutliers) {
        Outlier o = outlierMap.get(expectedOutlier);
        if (o.getScore() != null) {
            expectedOutlierScoreStats.addValue(o.getScore());
            globalExpectedOutlierScoreStats.addValue(o.getScore());
        }
        if (setContains(computedOutliers, expectedOutlier, meanDiffBetweenTs, timeBounds)) {
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        } else {
            ConfusionEntry entry = new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.OUTLIER);
            long closest = closest(expectedOutlier, computedOutliers);
            long delta = Math.abs(expectedOutlier - closest);
            if (closest != Long.MAX_VALUE) {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), closest point is " + closest + " which is " + timeConversion(delta)
                        + "away. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            } else {
                System.out.println("Missed an outlier (" + expectedOutlier + ") wasn't in computed outliers ("
                        + o + "), which is empty. - E[delta t] " + timeConversion(meanDiffBetweenTs) + "");
            }
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Expected Outlier Score Stats", expectedOutlierScoreStats);
    DescriptiveStatistics computedOutlierScoreStats = new DescriptiveStatistics();
    for (Long computedOutlier : computedOutliers) {
        if (!setContains(expectedOutliers, computedOutlier, meanDiffBetweenTs, timeBounds)) {
            Outlier o = outlierMap.get(computedOutlier);
            if (o.getScore() != null) {
                computedOutlierScoreStats.addValue(o.getScore());
            }
            ConfusionEntry entry = new ConfusionEntry(ResultType.OUTLIER, ResultType.NON_OUTLIER);
            ConfusionEntry.increment(entry, ret);
            unionSize++;
        }
    }
    printStats("Computed Outlier Scores", computedOutlierScoreStats);
    ret.put(new ConfusionEntry(ResultType.NON_OUTLIER, ResultType.NON_OUTLIER), numObservations - unionSize);
    Assert.assertEquals(numObservations, getTotalNum(ret));
    return ret;
}

From source file:com.nridge.connector.fs.con_fs.core.RunMetricReport.java

/**
 * When an object implementing interface <code>Runnable</code> is used
 * to create a thread, starting the thread causes the object's
 * <code>run</code> method to be called in that separately executing
 * thread.//w w w  .ja  v  a2  s  .  c  o  m
 * 
 * The general contract of the method <code>run</code> is that it may
 * take any action whatsoever.
 *
 * @see Thread#run()
 */
@Override
public void run() {
    long msTime;
    String[] phaseTimes;
    double secondsTime, docsPerSecond;
    String docId, queueItem, phaseName;
    Logger appLogger = mAppMgr.getLogger(this, "run");

    appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER);

    long extractCount = 0;
    DescriptiveStatistics dsExtract = new DescriptiveStatistics();
    long transformCount = 0;
    DescriptiveStatistics dsTransform = new DescriptiveStatistics();
    long publishCount = 0;
    DescriptiveStatistics dsPublish = new DescriptiveStatistics();

    BlockingQueue publishQueue = (BlockingQueue) mAppMgr.getProperty(Connector.QUEUE_PUBLISH_NAME);

    do {
        try {
            queueItem = (String) publishQueue.poll(Constants.QUEUE_POLL_TIMEOUT_DEFAULT, TimeUnit.SECONDS);
            if (mCrawlQueue.isQueueItemDocument(queueItem)) {
                StopWatch stopWatch = new StopWatch();
                stopWatch.start();

                docId = Connector.docIdFromQueueItem(queueItem);

                appLogger.debug(String.format("Publish Queue Item: %s", docId));

                phaseTimes = Connector.phaseTimeFromQueueItem(queueItem);
                if (phaseTimes != null) {
                    for (String phaseTime : phaseTimes) {
                        phaseName = Connector.phaseFromPhaseTime(phaseTime);
                        msTime = Connector.timeFromPhaseTime(phaseTime);
                        if (StringUtils.equals(phaseName, Connector.PHASE_EXTRACT)) {
                            extractCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsExtract.addValue(secondsTime);
                        } else if (StringUtils.equals(phaseName, Connector.PHASE_TRANSFORM)) {
                            transformCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsTransform.addValue(secondsTime);
                        } else if (StringUtils.equals(phaseName, Connector.PHASE_PUBLISH)) {
                            publishCount++;
                            secondsTime = msTime / MILLISECONDS_IN_A_SECOND;
                            dsPublish.addValue(secondsTime);
                        }
                    }
                }
            }
        } catch (InterruptedException e) {
            queueItem = StringUtils.EMPTY;
        }
    } while (!mCrawlQueue.isPhaseComplete(Connector.PHASE_PUBLISH, queueItem));

    // Note: This is the end of the queue processing pipeline, so we will not pass on queue item markers.

    // Generate our metrics summary for the log file.

    writePhaseMetric(Connector.PHASE_EXTRACT, extractCount, dsExtract.getSum());
    writePhaseMetric(Connector.PHASE_TRANSFORM, transformCount, dsTransform.getSum());
    writePhaseMetric(Connector.PHASE_PUBLISH, publishCount, dsPublish.getSum());

    double totalTime = dsExtract.getSum() + dsTransform.getSum() + dsPublish.getSum();
    if ((publishCount > 0L) && (totalTime > 0.0))
        docsPerSecond = publishCount / totalTime;
    else
        docsPerSecond = 0.0;
    String msgStr = String.format("Total metric summary: %d documents, %.2f seconds (%.2f docs/sec avg)",
            publishCount, totalTime, docsPerSecond);
    appLogger.info(msgStr);

    appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART);
}

From source file:io.hops.leaderElection.experiments.ExperimentDriver.java

private void calculateNumbers(int numProcesses, String outputFileName)
        throws FileNotFoundException, IOException {
    if (!new File(outputFileName).exists()) {
        LOG.error("File " + outputFileName + " does not exists");
        return;//from   w ww. j a  v a2 s .c o  m
    }
    String marker = "DataPoints: ";
    String line;
    DescriptiveStatistics failOverStats = new DescriptiveStatistics();
    DescriptiveStatistics tpStats = new DescriptiveStatistics();
    BufferedReader br = new BufferedReader(new FileReader(outputFileName));
    while ((line = br.readLine()) != null) {
        if (!line.startsWith(marker)) {
            continue;
        }

        boolean tpStatRecorded = false;
        String numbers = line.substring(marker.length(), line.length());
        StringTokenizer st = new StringTokenizer(numbers, ",[] ");
        while (st.hasMoreElements()) {
            double point = Double.parseDouble(st.nextToken());
            if (!tpStatRecorded) {
                tpStats.addValue(point);
                tpStatRecorded = true;
            } else {
                failOverStats.addValue(point);
            }
        }
    }
    br.close();
    writeMessageToFile(numProcesses, failOverStats, tpStats);

}

From source file:com.joliciel.talismane.stats.FScoreCalculatorOneVsRest.java

private void calculate() {
    if (!this.calculated) {
        precisionStats = new DescriptiveStatistics();
        recallStats = new DescriptiveStatistics();
        fScoreStats = new DescriptiveStatistics();

        precisionWeightedStats = new DescriptiveStatistics();
        recallWeightedStats = new DescriptiveStatistics();
        fScoreWeightedStats = new DescriptiveStatistics();

        for (E outcome : fScoreCalculators.keySet()) {
            if (!outcomeCounts.containsKey(outcome))
                outcomeCounts.put(outcome, 0);

            int count = outcomeCounts.get(outcome);
            FScoreCalculator<Boolean> fScoreCalculator = fScoreCalculators.get(outcome);
            if (count > 0) {
                precisionStats.addValue(fScoreCalculator.getPrecision(true));
                recallStats.addValue(fScoreCalculator.getRecall(true));
                fScoreStats.addValue(fScoreCalculator.getFScore(true));
            }//from  ww  w.  ja  v a  2  s .  c  om
            for (int i = 0; i < count; i++) {
                precisionWeightedStats.addValue(fScoreCalculator.getPrecision(true));
                recallWeightedStats.addValue(fScoreCalculator.getRecall(true));
                fScoreWeightedStats.addValue(fScoreCalculator.getFScore(true));
            }
        }
        this.calculated = true;
    }
}

From source file:com.itemanalysis.psychometrics.measurement.TestSummary.java

public TestSummary(int numberOfItems, int numberOfSubscales, int[] cutScores,
        ArrayList<VariableAttributes> variableAttributes, boolean unbiased, boolean deletedReliability,
        boolean showCsem) {
    this.numberOfItems = numberOfItems;
    this.cutScores = cutScores;
    this.variableAttributes = variableAttributes;
    this.unbiased = unbiased;
    this.deletedReliability = deletedReliability;
    this.showCsem = showCsem;
    stats = new DescriptiveStatistics();
    stdDev = new StandardDeviation(unbiased);
    relMatrix = new CovarianceMatrix(variableAttributes);
    this.numberOfSubscales = numberOfSubscales;
    if (numberOfSubscales > 1)
        partRelMatrix = new CovarianceMatrix(numberOfSubscales);
}

From source file:io.yields.math.framework.data.DataProvidersTest.java

@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000)
@Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers")
public void testRandomDistribution(Explorer<Pair> explorer) {
    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
    DescriptiveStatistics xStats = new DescriptiveStatistics();
    DescriptiveStatistics yStats = new DescriptiveStatistics();
    explorer.all().forEach(result -> {
        Pair pair = result.getFunctionOutcome().orElse(new Pair());
        xStats.addValue(pair.getX1());//  w ww  .  j a  v a 2 s  .c  o  m
        yStats.addValue(pair.getX2());
    });
    DescriptiveStatistics cross = new DescriptiveStatistics();
    for (int i = 0; i < xStats.getN(); i++) {
        cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5));
    }
    /**
     * x and y should be uniformly distributed
     */
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    /**
     * and have zero correlation
     */
    assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05));
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step1DebateFilter.java

/**
 * Processes the debates and extract the required debates with arguments
 *
 * @param inputDir  all debates//from  www .  ja  va2  s  .  c  o m
 * @param outputDir output
 * @throws IOException IO Exception
 */
public static void processData(String inputDir, File outputDir) throws IOException {
    // collect some lengths statistics
    DescriptiveStatistics filteredWordCountStatistics = new DescriptiveStatistics();

    Frequency frequency = new Frequency();

    final int lowerBoundaries = MEDIAN - ARGUMENT_LENGTH_PLUS_MINUS_RANGE;
    final int upperBoundaries = MEDIAN + ARGUMENT_LENGTH_PLUS_MINUS_RANGE;

    // read all debates and filter them
    for (File file : FileUtils.listFiles(new File(inputDir), new String[] { "xml" }, false)) {
        Debate debate = DebateSerializer.deserializeFromXML(FileUtils.readFileToString(file, "utf-8"));

        // only selected debates
        if (selectedDebates.contains(debate.getDebateMetaData().getUrl())) {

            Debate debateCopy = new Debate();
            debateCopy.setDebateMetaData(debate.getDebateMetaData());

            // for counting first level arguments (those without parents) for each of the two stances
            Map<String, Integer> argumentStancesCounts = new TreeMap<>();

            for (Argument argument : debate.getArgumentList()) {
                boolean keepArgument = false;

                // hack: clean the data -- update stance for "tv" vs. "TV"
                if ("tv".equalsIgnoreCase(argument.getStance())) {
                    argument.setStance("TV");
                }

                // we have a first-level argument
                if (argument.getParentId() == null) {
                    // now check the length
                    int wordCount = argument.getText().split("\\s+").length;

                    if (wordCount >= lowerBoundaries && wordCount <= upperBoundaries) {
                        String stance = argument.getStance();

                        // update counts
                        if (!argumentStancesCounts.containsKey(stance)) {
                            argumentStancesCounts.put(stance, 0);
                        }
                        argumentStancesCounts.put(stance, argumentStancesCounts.get(stance) + 1);

                        // keep it
                        keepArgument = true;

                        // update statistics; delete later
                        filteredWordCountStatistics.addValue(wordCount);
                        frequency.addValue((wordCount / 10) * 10);

                    }
                }

                // copy to the result
                if (keepArgument) {
                    debateCopy.getArgumentList().add(argument);
                }
            }
            // get number of first-level arguments for each side
            Iterator<Map.Entry<String, Integer>> tempIter = argumentStancesCounts.entrySet().iterator();

            if (argumentStancesCounts.size() > 2) {
                //                    System.out.println("More stances: " + argumentStancesCounts);
            }

            Integer val1 = tempIter.hasNext() ? tempIter.next().getValue() : 0;
            Integer val2 = tempIter.hasNext() ? tempIter.next().getValue() : 0;

            if ((val1 + val2) >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_DEBATE) {
                if (val1 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE
                        && val2 >= MINIMUM_NUMBER_OF_FIRST_LEVEL_ARGUMENTS_PER_SIDE) {
                    System.out.println(debate.getDebateMetaData().getUrl() + "\t"
                            + debate.getDebateMetaData().getTitle() + "\t" + argumentStancesCounts);

                    // write the output
                    String xml = DebateSerializer.serializeToXML(debateCopy);
                    FileUtils.writeStringToFile(new File(outputDir, file.getName()), xml, "utf-8");
                }
            }
        }
    }
}