Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics()

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java

/**
 * Run an experiment which launches tasks at {@code arrivalRate} for {@code durationMs}
 * seconds and waits for all tasks to finish. Return a {@link DescriptiveStatistics}
 * object which contains stats about the distribution of task finish times. Tasks
 * are executed in a thread pool which contains at least {@code corePoolSize} threads
 * and grows up to {@code maxPoolSize} threads (growing whenever a new task arrives
 * and all existing threads are used). //from   ww w.j a v a  2 s.  c o  m
 * 
 * Setting {@code maxPoolSize} to a very large number enacts time sharing, while
 * setting it equal to {@code corePoolSize} creates a fixed size task pool.
 * 
 * The derivative of task finishes is tracked by bucketing tasks at the granularity
 * {@code bucketSize}. If it is detected that task finishes are increasing in an 
 * unbounded fashion (i.e. infinite queuing is occuring) a {@link RuntimeException} 
 * is thrown.
 */
public static void runExperiment(double arrivalRate, int corePoolSize, int maxPoolSize, long bucketSize,
        long durationMs, DescriptiveStatistics runTimes, DescriptiveStatistics waitTimes) {
    long startTime = System.currentTimeMillis();
    long keepAliveTime = 10;
    Random r = new Random();
    BlockingQueue<Runnable> runQueue = new LinkedBlockingQueue<Runnable>();
    ExecutorService threadPool = new ThreadPoolExecutor(corePoolSize, maxPoolSize, keepAliveTime,
            TimeUnit.MILLISECONDS, runQueue);
    if (maxPoolSize == Integer.MAX_VALUE) {
        threadPool = Executors.newCachedThreadPool();
    }

    // run times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedRunTimes = new HashMap<Long, List<Long>>();
    // wait times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedWaitTimes = new HashMap<Long, List<Long>>();

    /*
     * This is a little tricky. 
     * 
     * We want to generate inter-arrival delays according to the arrival rate specified.
     * The simplest option would be to generate an arrival delay and then sleep() for it
     * before launching each task. This has in issue, however: sleep() might wait 
     * several ms longer than we ask it to. When task arrival rates get really fast, 
     * i.e. one task every 10 ms, sleeping an additional few ms will mean we launch 
     * tasks at a much lower rate than requested.
     * 
     * Instead, we keep track of task launches in a way that does not depend on how long
     * sleep() actually takes. We still might have tasks launch slightly after their
     * scheduled launch time, but we will not systematically "fall behind" due to
     * compounding time lost during sleep()'s;
     */
    long currTime = startTime;
    while (true) {
        long delay = (long) (generateInterarrivalDelay(r, arrivalRate) * 1000);

        // When should the next task launch, based on when the last task was scheduled
        // to launch.
        long nextTime = currTime + delay;

        // Diff gives how long we should wait for the next scheduled task. The difference 
        // may be negative if our last sleep() lasted too long relative to the inter-arrival
        // delay based on the last scheduled launch, so we round up to 0 in that case. 
        long diff = Math.max(0, nextTime - System.currentTimeMillis());
        currTime = nextTime;
        if (diff > 0) {
            try {
                Thread.sleep(diff);
            } catch (InterruptedException e) {
                System.err.println("Unexpected interruption!");
                System.exit(1);
            }
        }
        threadPool.submit((new BenchmarkRunnable(bucketedRunTimes, bucketedWaitTimes, bucketSize)));
        if (System.currentTimeMillis() > startTime + durationMs) {
            break;
        }
    }
    threadPool.shutdown();
    try {
        threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e1) {
        System.err.println("Unexpected interruption!");
        System.exit(1);
    }
    List<Long> times = new ArrayList<Long>(bucketedRunTimes.keySet());
    Collections.sort(times);
    HashMap<Long, DescriptiveStatistics> bucketStats = new HashMap<Long, DescriptiveStatistics>();

    // Remove first and last buckets since they will not be completely full to do
    // discretization. 
    times.remove(0);
    times.remove(times.size() - 1);

    for (Long time : times) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        List<Long> list = bucketedRunTimes.get(time);
        for (Long l : list) {
            stats.addValue(l);
            runTimes.addValue(l);
        }
        bucketStats.put(time, stats);

        List<Long> waitList = bucketedWaitTimes.get(time);
        for (Long l : waitList) {
            waitTimes.addValue(l);
        }
    }
    int size = bucketStats.size();
    if (size >= 2) {
        DescriptiveStatistics first = bucketStats.get(times.get(0));
        DescriptiveStatistics last = bucketStats.get(times.get(times.size() - 1));
        double increase = last.getPercentile(50) / first.getPercentile(50);
        // A simple heuristic, if the median runtime went up by five from the first to 
        // last complete bucket, we assume we are seeing unbounded growth
        if (increase > 5.0) {
            throw new RuntimeException(
                    "Queue not in steady state: " + last.getMean() + " vs " + first.getMean());
        }
    }
}

From source file:com.joliciel.jochre.graphics.JochreImageImpl.java

public double getAverageRowHeight() {
    if (averageRowHeight == 0) {
        DescriptiveStatistics rowHeightStats = new DescriptiveStatistics();
        for (Paragraph paragraph : this.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                int height = row.getXHeight();
                rowHeightStats.addValue(height);
            }//from   ww  w.  j  ava 2 s . co m
        }
        averageRowHeight = rowHeightStats.getPercentile(50);
        LOG.debug("averageRowHeight: " + averageRowHeight);
    }
    return averageRowHeight;
}

From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java

private Double[] minmax(Double[] vect) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (Double value : vect) {
        stats.addValue(value);/*  ww  w . j a v  a 2  s .c o  m*/
    }
    return new Double[] { stats.getMin(), stats.getMax() };
}

From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java

private Double[] minmax(Double[] vect1, Double[] vect2) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (Double value : vect1) {
        stats.addValue(value);//from  w ww.  jav a2 s  .  c  o  m
    }
    for (Double value : vect2) {
        stats.addValue(value);
    }
    return new Double[] { stats.getMin(), stats.getMax() };
}

From source file:com.linkedin.pinot.tools.PinotSegmentRebalancer.java

private void printSegmentAssignment(Map<String, Map<String, String>> mapping) throws Exception {
    StringWriter sw = new StringWriter();
    objectMapper.writerWithDefaultPrettyPrinter().writeValue(sw, mapping);
    LOGGER.info(sw.toString());//from w ww  . j  a  va  2 s .  c  o  m
    Map<String, List<String>> serverToSegmentMapping = new TreeMap<>();
    for (String segment : mapping.keySet()) {
        Map<String, String> serverToStateMap = mapping.get(segment);
        for (String server : serverToStateMap.keySet()) {
            if (!serverToSegmentMapping.containsKey(server)) {
                serverToSegmentMapping.put(server, new ArrayList<String>());
            }
            serverToSegmentMapping.get(server).add(segment);
        }
    }
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (String server : serverToSegmentMapping.keySet()) {
        List<String> list = serverToSegmentMapping.get(server);
        LOGGER.info("server " + server + " has " + list.size() + " segments");
        stats.addValue(list.size());
    }
    LOGGER.info("Segment Distrbution stat");
    LOGGER.info(stats.toString());
}

From source file:info.raack.appliancelabeler.machinelearning.appliancedetection.algorithms.HighConfidenceFSMPowerSpikeDetectionAlgorithm.java

private Map<Integer, Integer[]> computeTrainingInstanceSpikeLimits(
        List<double[]> trainingInstancesWithClassLabels) {
    Map<Integer, List<Double>> trainingSpikes = new HashMap<Integer, List<Double>>();

    // collect all spikes for each class
    for (double[] instance : trainingInstancesWithClassLabels) {
        double clazz = instance[instance.length - 1];
        if (clazz != missingValue) {
            double trainingSpike = instance[0];

            if (!trainingSpikes.containsKey((int) clazz)) {
                trainingSpikes.put((int) clazz, new ArrayList<Double>());
            }/*w  w  w  .j  ava 2  s.com*/
            trainingSpikes.get((int) clazz).add(trainingSpike);
        }
    }

    Map<Integer, Integer[]> trainingInstanceLimits = new HashMap<Integer, Integer[]>();

    // calculate interval one standard deviation away from mean of labeled power spikes for each class
    for (Integer clazz : trainingSpikes.keySet()) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (Double spikeValue : trainingSpikes.get(clazz)) {
            stats.addValue(spikeValue);
        }
        trainingInstanceLimits.put(clazz,
                new Integer[] { (int) (stats.getMean() - stats.getStandardDeviation()),
                        (int) (stats.getMean() + stats.getStandardDeviation()) });
    }

    return trainingInstanceLimits;
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step11GoldDataStatistics.java

/**
 * Relevant sentences per document (per query)
 *///  w  w  w.ja va 2  s. c o  m
public static void statistics6(File inputDir, File outputDir) throws IOException {
    PrintWriter pw = new PrintWriter(new FileWriter(new File(outputDir, "stats6.csv")));

    SortedMap<String, DescriptiveStatistics> result = new TreeMap<>();
    result.put("relevantSentencesDocumentPercent", new DescriptiveStatistics());

    // print header
    for (String mapKey : result.keySet()) {
        pw.printf(Locale.ENGLISH, "%s\t%sStdDev\t", mapKey, mapKey);
    }
    pw.println();

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        System.out.println("Processing " + f);

        for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {

            if (rankedResult.plainText != null && !rankedResult.plainText.isEmpty()) {

                int relevantSentences = 0;
                int totalSentences = 0;

                if (rankedResult.goldEstimatedLabels != null) {
                    for (QueryResultContainer.SingleSentenceRelevanceVote sentenceRelevanceVote : rankedResult.goldEstimatedLabels) {
                        totalSentences++;

                        if (Boolean.valueOf(sentenceRelevanceVote.relevant)) {
                            relevantSentences++;
                        }
                    }

                    // percent relevant

                    result.get("relevantSentencesDocumentPercent")
                            .addValue((double) relevantSentences / (double) totalSentences);
                }
            }
        }
    }

    // print results
    // print header
    for (String mapKey : result.keySet()) {
        pw.printf(Locale.ENGLISH, "%.3f\t%.3f\t", result.get(mapKey).getMean(),
                result.get(mapKey).getStandardDeviation());
    }

    pw.close();

}

From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java

public double median(final List<Integer> values) {
    final DescriptiveStatistics stats = new DescriptiveStatistics();
    for (final Integer value : values) {
        stats.addValue(value);//from w ww .j  a  v  a  2  s  .co  m
    }
    return stats.getPercentile(0.5);

}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /*from  w ww  . j a v a  2  s  . co  m*/
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:guineu.modules.filter.report.qualityReport.ReportTask.java

private void writeDataset(List<sample> samples) {
    DecimalFormat formatter = new DecimalFormat("####.##");

    SimpleBasicDataset dataset = new SimpleQualityControlDataset("Summary Report");
    dataset.setType(DatasetType.QUALITYCONTROL);
    ((SimpleQualityControlDataset) dataset).setParameters(date, sampleSet, ionMode, injection, sampleType,
            comments);/*  www. jav  a2 s .co  m*/

    for (int i = 1; i <= 12; i++) {
        dataset.addColumnName(String.valueOf(i));
    }

    dataset.addRow(getRow("Date:", date));

    dataset.addRow(getRow("SampleSet:", sampleSet));

    dataset.addRow(getRow("Ion Mode:", ionMode));

    dataset.addRow(getRow("Injection volume:", injection));

    dataset.addRow(getRow("Sample type:", sampleType));

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));

    dataset.addRow(this.getTitle());

    Stats = new DescriptiveStatistics[9];
    for (int i = 0; i < 9; i++) {
        Stats[i] = new DescriptiveStatistics();
    }
    for (sample s : samples) {
        PeakListRow row = s.getRow(Stats);
        dataset.addRow(row);
        ((SimpleQualityControlDataset) dataset).setRow(row);
    }

    SimplePeakListRowOther row = new SimplePeakListRowOther();
    row.setPeak("1", "MEAN");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter.format(Stats[i].getMean()).toString());
    }
    dataset.addRow(row);

    row = new SimplePeakListRowOther();
    row.setPeak("1", "RSD");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2),
                formatter.format((Stats[i].getStandardDeviation() * 100) / Stats[i].getMean()).toString());
    }
    dataset.addRow(row);

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));
    // row8
    SimplePeakListRowOther row8 = new SimplePeakListRowOther();
    row8.setPeak("1", "Additional parameters for seronorm control samples & batch standard:");
    dataset.addRow(row8);

    dataset.addRow(this.getTitle2());

    superStats = new DescriptiveStatistics[9];
    for (int i = 0; i < 9; i++) {
        superStats[i] = new DescriptiveStatistics();
    }
    for (sample s : samples) {
        PeakListRow row2 = s.getRow2(superStats);
        dataset.addRow(row2);
        ((SimpleQualityControlDataset) dataset).setAdditionalRow(row2);
    }

    row = new SimplePeakListRowOther();
    row.setPeak("1", "MEAN");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter.format(superStats[i].getMean()).toString());
    }
    dataset.addRow(row);

    row = new SimplePeakListRowOther();
    row.setPeak("1", "RSD");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter
                .format((superStats[i].getStandardDeviation() * 100) / superStats[i].getMean()).toString());
    }
    dataset.addRow(row);

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("Comments:", comments));
    //creates internal frame with the table
    GUIUtils.showNewTable(dataset, true);
}