Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics.

Prototype

public DescriptiveStatistics() 

Source Link

Document

Construct a DescriptiveStatistics instance with an infinite window

Usage

From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java

/**
 * Run an experiment which launches tasks at {@code arrivalRate} for {@code durationMs}
 * seconds and waits for all tasks to finish. Return a {@link DescriptiveStatistics}
 * object which contains stats about the distribution of task finish times. Tasks
 * are executed in a thread pool which contains at least {@code corePoolSize} threads
 * and grows up to {@code maxPoolSize} threads (growing whenever a new task arrives
 * and all existing threads are used). //from   ww w.j a v a  2 s.  c o  m
 * 
 * Setting {@code maxPoolSize} to a very large number enacts time sharing, while
 * setting it equal to {@code corePoolSize} creates a fixed size task pool.
 * 
 * The derivative of task finishes is tracked by bucketing tasks at the granularity
 * {@code bucketSize}. If it is detected that task finishes are increasing in an 
 * unbounded fashion (i.e. infinite queuing is occuring) a {@link RuntimeException} 
 * is thrown.
 */
public static void runExperiment(double arrivalRate, int corePoolSize, int maxPoolSize, long bucketSize,
        long durationMs, DescriptiveStatistics runTimes, DescriptiveStatistics waitTimes) {
    long startTime = System.currentTimeMillis();
    long keepAliveTime = 10;
    Random r = new Random();
    BlockingQueue<Runnable> runQueue = new LinkedBlockingQueue<Runnable>();
    ExecutorService threadPool = new ThreadPoolExecutor(corePoolSize, maxPoolSize, keepAliveTime,
            TimeUnit.MILLISECONDS, runQueue);
    if (maxPoolSize == Integer.MAX_VALUE) {
        threadPool = Executors.newCachedThreadPool();
    }

    // run times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedRunTimes = new HashMap<Long, List<Long>>();
    // wait times indexed by bucketing interval
    HashMap<Long, List<Long>> bucketedWaitTimes = new HashMap<Long, List<Long>>();

    /*
     * This is a little tricky. 
     * 
     * We want to generate inter-arrival delays according to the arrival rate specified.
     * The simplest option would be to generate an arrival delay and then sleep() for it
     * before launching each task. This has in issue, however: sleep() might wait 
     * several ms longer than we ask it to. When task arrival rates get really fast, 
     * i.e. one task every 10 ms, sleeping an additional few ms will mean we launch 
     * tasks at a much lower rate than requested.
     * 
     * Instead, we keep track of task launches in a way that does not depend on how long
     * sleep() actually takes. We still might have tasks launch slightly after their
     * scheduled launch time, but we will not systematically "fall behind" due to
     * compounding time lost during sleep()'s;
     */
    long currTime = startTime;
    while (true) {
        long delay = (long) (generateInterarrivalDelay(r, arrivalRate) * 1000);

        // When should the next task launch, based on when the last task was scheduled
        // to launch.
        long nextTime = currTime + delay;

        // Diff gives how long we should wait for the next scheduled task. The difference 
        // may be negative if our last sleep() lasted too long relative to the inter-arrival
        // delay based on the last scheduled launch, so we round up to 0 in that case. 
        long diff = Math.max(0, nextTime - System.currentTimeMillis());
        currTime = nextTime;
        if (diff > 0) {
            try {
                Thread.sleep(diff);
            } catch (InterruptedException e) {
                System.err.println("Unexpected interruption!");
                System.exit(1);
            }
        }
        threadPool.submit((new BenchmarkRunnable(bucketedRunTimes, bucketedWaitTimes, bucketSize)));
        if (System.currentTimeMillis() > startTime + durationMs) {
            break;
        }
    }
    threadPool.shutdown();
    try {
        threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e1) {
        System.err.println("Unexpected interruption!");
        System.exit(1);
    }
    List<Long> times = new ArrayList<Long>(bucketedRunTimes.keySet());
    Collections.sort(times);
    HashMap<Long, DescriptiveStatistics> bucketStats = new HashMap<Long, DescriptiveStatistics>();

    // Remove first and last buckets since they will not be completely full to do
    // discretization. 
    times.remove(0);
    times.remove(times.size() - 1);

    for (Long time : times) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        List<Long> list = bucketedRunTimes.get(time);
        for (Long l : list) {
            stats.addValue(l);
            runTimes.addValue(l);
        }
        bucketStats.put(time, stats);

        List<Long> waitList = bucketedWaitTimes.get(time);
        for (Long l : waitList) {
            waitTimes.addValue(l);
        }
    }
    int size = bucketStats.size();
    if (size >= 2) {
        DescriptiveStatistics first = bucketStats.get(times.get(0));
        DescriptiveStatistics last = bucketStats.get(times.get(times.size() - 1));
        double increase = last.getPercentile(50) / first.getPercentile(50);
        // A simple heuristic, if the median runtime went up by five from the first to 
        // last complete bucket, we assume we are seeing unbounded growth
        if (increase > 5.0) {
            throw new RuntimeException(
                    "Queue not in steady state: " + last.getMean() + " vs " + first.getMean());
        }
    }
}

From source file:com.joliciel.jochre.graphics.JochreImageImpl.java

public double getAverageRowHeight() {
    if (averageRowHeight == 0) {
        DescriptiveStatistics rowHeightStats = new DescriptiveStatistics();
        for (Paragraph paragraph : this.getParagraphs()) {
            for (RowOfShapes row : paragraph.getRows()) {
                int height = row.getXHeight();
                rowHeightStats.addValue(height);
            }//from   ww  w.  j  ava 2 s . co m
        }
        averageRowHeight = rowHeightStats.getPercentile(50);
        LOG.debug("averageRowHeight: " + averageRowHeight);
    }
    return averageRowHeight;
}

From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java

private Double[] minmax(Double[] vect) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (Double value : vect) {
        stats.addValue(value);/*  ww  w . j a v  a 2  s .c o  m*/
    }
    return new Double[] { stats.getMin(), stats.getMax() };
}

From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java

private Double[] minmax(Double[] vect1, Double[] vect2) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (Double value : vect1) {
        stats.addValue(value);//from  w ww.  jav a2 s  .  c  o  m
    }
    for (Double value : vect2) {
        stats.addValue(value);
    }
    return new Double[] { stats.getMin(), stats.getMax() };
}

From source file:com.linkedin.pinot.tools.PinotSegmentRebalancer.java

private void printSegmentAssignment(Map<String, Map<String, String>> mapping) throws Exception {
    StringWriter sw = new StringWriter();
    objectMapper.writerWithDefaultPrettyPrinter().writeValue(sw, mapping);
    LOGGER.info(sw.toString());//from w ww  . j  a  va  2 s .  c  o  m
    Map<String, List<String>> serverToSegmentMapping = new TreeMap<>();
    for (String segment : mapping.keySet()) {
        Map<String, String> serverToStateMap = mapping.get(segment);
        for (String server : serverToStateMap.keySet()) {
            if (!serverToSegmentMapping.containsKey(server)) {
                serverToSegmentMapping.put(server, new ArrayList<String>());
            }
            serverToSegmentMapping.get(server).add(segment);
        }
    }
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (String server : serverToSegmentMapping.keySet()) {
        List<String> list = serverToSegmentMapping.get(server);
        LOGGER.info("server " + server + " has " + list.size() + " segments");
        stats.addValue(list.size());
    }
    LOGGER.info("Segment Distrbution stat");
    LOGGER.info(stats.toString());
}

From source file:info.raack.appliancelabeler.machinelearning.appliancedetection.algorithms.HighConfidenceFSMPowerSpikeDetectionAlgorithm.java

private Map<Integer, Integer[]> computeTrainingInstanceSpikeLimits(
        List<double[]> trainingInstancesWithClassLabels) {
    Map<Integer, List<Double>> trainingSpikes = new HashMap<Integer, List<Double>>();

    // collect all spikes for each class
    for (double[] instance : trainingInstancesWithClassLabels) {
        double clazz = instance[instance.length - 1];
        if (clazz != missingValue) {
            double trainingSpike = instance[0];

            if (!trainingSpikes.containsKey((int) clazz)) {
                trainingSpikes.put((int) clazz, new ArrayList<Double>());
            }/*w  w  w  .j  ava 2  s.com*/
            trainingSpikes.get((int) clazz).add(trainingSpike);
        }
    }

    Map<Integer, Integer[]> trainingInstanceLimits = new HashMap<Integer, Integer[]>();

    // calculate interval one standard deviation away from mean of labeled power spikes for each class
    for (Integer clazz : trainingSpikes.keySet()) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (Double spikeValue : trainingSpikes.get(clazz)) {
            stats.addValue(spikeValue);
        }
        trainingInstanceLimits.put(clazz,
                new Integer[] { (int) (stats.getMean() - stats.getStandardDeviation()),
                        (int) (stats.getMean() + stats.getStandardDeviation()) });
    }

    return trainingInstanceLimits;
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step11GoldDataStatistics.java

/**
 * Relevant sentences per document (per query)
 *///  w  w  w.ja va 2  s. c o  m
public static void statistics6(File inputDir, File outputDir) throws IOException {
    PrintWriter pw = new PrintWriter(new FileWriter(new File(outputDir, "stats6.csv")));

    SortedMap<String, DescriptiveStatistics> result = new TreeMap<>();
    result.put("relevantSentencesDocumentPercent", new DescriptiveStatistics());

    // print header
    for (String mapKey : result.keySet()) {
        pw.printf(Locale.ENGLISH, "%s\t%sStdDev\t", mapKey, mapKey);
    }
    pw.println();

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        System.out.println("Processing " + f);

        for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {

            if (rankedResult.plainText != null && !rankedResult.plainText.isEmpty()) {

                int relevantSentences = 0;
                int totalSentences = 0;

                if (rankedResult.goldEstimatedLabels != null) {
                    for (QueryResultContainer.SingleSentenceRelevanceVote sentenceRelevanceVote : rankedResult.goldEstimatedLabels) {
                        totalSentences++;

                        if (Boolean.valueOf(sentenceRelevanceVote.relevant)) {
                            relevantSentences++;
                        }
                    }

                    // percent relevant

                    result.get("relevantSentencesDocumentPercent")
                            .addValue((double) relevantSentences / (double) totalSentences);
                }
            }
        }
    }

    // print results
    // print header
    for (String mapKey : result.keySet()) {
        pw.printf(Locale.ENGLISH, "%.3f\t%.3f\t", result.get(mapKey).getMean(),
                result.get(mapKey).getStandardDeviation());
    }

    pw.close();

}

From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java

public double median(final List<Integer> values) {
    final DescriptiveStatistics stats = new DescriptiveStatistics();
    for (final Integer value : values) {
        stats.addValue(value);//from w ww .j  a  v  a  2  s  .co  m
    }
    return stats.getPercentile(0.5);

}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * /*from  w ww  . j a v a  2  s  . co  m*/
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:guineu.modules.filter.report.qualityReport.ReportTask.java

private void writeDataset(List<sample> samples) {
    DecimalFormat formatter = new DecimalFormat("####.##");

    SimpleBasicDataset dataset = new SimpleQualityControlDataset("Summary Report");
    dataset.setType(DatasetType.QUALITYCONTROL);
    ((SimpleQualityControlDataset) dataset).setParameters(date, sampleSet, ionMode, injection, sampleType,
            comments);/*  www. jav  a2 s .co  m*/

    for (int i = 1; i <= 12; i++) {
        dataset.addColumnName(String.valueOf(i));
    }

    dataset.addRow(getRow("Date:", date));

    dataset.addRow(getRow("SampleSet:", sampleSet));

    dataset.addRow(getRow("Ion Mode:", ionMode));

    dataset.addRow(getRow("Injection volume:", injection));

    dataset.addRow(getRow("Sample type:", sampleType));

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));

    dataset.addRow(this.getTitle());

    Stats = new DescriptiveStatistics[9];
    for (int i = 0; i < 9; i++) {
        Stats[i] = new DescriptiveStatistics();
    }
    for (sample s : samples) {
        PeakListRow row = s.getRow(Stats);
        dataset.addRow(row);
        ((SimpleQualityControlDataset) dataset).setRow(row);
    }

    SimplePeakListRowOther row = new SimplePeakListRowOther();
    row.setPeak("1", "MEAN");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter.format(Stats[i].getMean()).toString());
    }
    dataset.addRow(row);

    row = new SimplePeakListRowOther();
    row.setPeak("1", "RSD");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2),
                formatter.format((Stats[i].getStandardDeviation() * 100) / Stats[i].getMean()).toString());
    }
    dataset.addRow(row);

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));
    // row8
    SimplePeakListRowOther row8 = new SimplePeakListRowOther();
    row8.setPeak("1", "Additional parameters for seronorm control samples & batch standard:");
    dataset.addRow(row8);

    dataset.addRow(this.getTitle2());

    superStats = new DescriptiveStatistics[9];
    for (int i = 0; i < 9; i++) {
        superStats[i] = new DescriptiveStatistics();
    }
    for (sample s : samples) {
        PeakListRow row2 = s.getRow2(superStats);
        dataset.addRow(row2);
        ((SimpleQualityControlDataset) dataset).setAdditionalRow(row2);
    }

    row = new SimplePeakListRowOther();
    row.setPeak("1", "MEAN");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter.format(superStats[i].getMean()).toString());
    }
    dataset.addRow(row);

    row = new SimplePeakListRowOther();
    row.setPeak("1", "RSD");
    for (int i = 0; i < 9; i++) {
        row.setPeak(String.valueOf(i + 2), formatter
                .format((superStats[i].getStandardDeviation() * 100) / superStats[i].getMean()).toString());
    }
    dataset.addRow(row);

    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("", ""));
    dataset.addRow(getRow("Comments:", comments));
    //creates internal frame with the table
    GUIUtils.showNewTable(dataset, true);
}