List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics
public DescriptiveStatistics()
From source file:edu.berkeley.sparrow.examples.BackendBenchmarkProfiler.java
/** * Run an experiment which launches tasks at {@code arrivalRate} for {@code durationMs} * seconds and waits for all tasks to finish. Return a {@link DescriptiveStatistics} * object which contains stats about the distribution of task finish times. Tasks * are executed in a thread pool which contains at least {@code corePoolSize} threads * and grows up to {@code maxPoolSize} threads (growing whenever a new task arrives * and all existing threads are used). //from ww w.j a v a 2 s. c o m * * Setting {@code maxPoolSize} to a very large number enacts time sharing, while * setting it equal to {@code corePoolSize} creates a fixed size task pool. * * The derivative of task finishes is tracked by bucketing tasks at the granularity * {@code bucketSize}. If it is detected that task finishes are increasing in an * unbounded fashion (i.e. infinite queuing is occuring) a {@link RuntimeException} * is thrown. */ public static void runExperiment(double arrivalRate, int corePoolSize, int maxPoolSize, long bucketSize, long durationMs, DescriptiveStatistics runTimes, DescriptiveStatistics waitTimes) { long startTime = System.currentTimeMillis(); long keepAliveTime = 10; Random r = new Random(); BlockingQueue<Runnable> runQueue = new LinkedBlockingQueue<Runnable>(); ExecutorService threadPool = new ThreadPoolExecutor(corePoolSize, maxPoolSize, keepAliveTime, TimeUnit.MILLISECONDS, runQueue); if (maxPoolSize == Integer.MAX_VALUE) { threadPool = Executors.newCachedThreadPool(); } // run times indexed by bucketing interval HashMap<Long, List<Long>> bucketedRunTimes = new HashMap<Long, List<Long>>(); // wait times indexed by bucketing interval HashMap<Long, List<Long>> bucketedWaitTimes = new HashMap<Long, List<Long>>(); /* * This is a little tricky. * * We want to generate inter-arrival delays according to the arrival rate specified. * The simplest option would be to generate an arrival delay and then sleep() for it * before launching each task. This has in issue, however: sleep() might wait * several ms longer than we ask it to. When task arrival rates get really fast, * i.e. one task every 10 ms, sleeping an additional few ms will mean we launch * tasks at a much lower rate than requested. * * Instead, we keep track of task launches in a way that does not depend on how long * sleep() actually takes. We still might have tasks launch slightly after their * scheduled launch time, but we will not systematically "fall behind" due to * compounding time lost during sleep()'s; */ long currTime = startTime; while (true) { long delay = (long) (generateInterarrivalDelay(r, arrivalRate) * 1000); // When should the next task launch, based on when the last task was scheduled // to launch. long nextTime = currTime + delay; // Diff gives how long we should wait for the next scheduled task. The difference // may be negative if our last sleep() lasted too long relative to the inter-arrival // delay based on the last scheduled launch, so we round up to 0 in that case. long diff = Math.max(0, nextTime - System.currentTimeMillis()); currTime = nextTime; if (diff > 0) { try { Thread.sleep(diff); } catch (InterruptedException e) { System.err.println("Unexpected interruption!"); System.exit(1); } } threadPool.submit((new BenchmarkRunnable(bucketedRunTimes, bucketedWaitTimes, bucketSize))); if (System.currentTimeMillis() > startTime + durationMs) { break; } } threadPool.shutdown(); try { threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); } catch (InterruptedException e1) { System.err.println("Unexpected interruption!"); System.exit(1); } List<Long> times = new ArrayList<Long>(bucketedRunTimes.keySet()); Collections.sort(times); HashMap<Long, DescriptiveStatistics> bucketStats = new HashMap<Long, DescriptiveStatistics>(); // Remove first and last buckets since they will not be completely full to do // discretization. times.remove(0); times.remove(times.size() - 1); for (Long time : times) { DescriptiveStatistics stats = new DescriptiveStatistics(); List<Long> list = bucketedRunTimes.get(time); for (Long l : list) { stats.addValue(l); runTimes.addValue(l); } bucketStats.put(time, stats); List<Long> waitList = bucketedWaitTimes.get(time); for (Long l : waitList) { waitTimes.addValue(l); } } int size = bucketStats.size(); if (size >= 2) { DescriptiveStatistics first = bucketStats.get(times.get(0)); DescriptiveStatistics last = bucketStats.get(times.get(times.size() - 1)); double increase = last.getPercentile(50) / first.getPercentile(50); // A simple heuristic, if the median runtime went up by five from the first to // last complete bucket, we assume we are seeing unbounded growth if (increase > 5.0) { throw new RuntimeException( "Queue not in steady state: " + last.getMean() + " vs " + first.getMean()); } } }
From source file:com.joliciel.jochre.graphics.JochreImageImpl.java
public double getAverageRowHeight() { if (averageRowHeight == 0) { DescriptiveStatistics rowHeightStats = new DescriptiveStatistics(); for (Paragraph paragraph : this.getParagraphs()) { for (RowOfShapes row : paragraph.getRows()) { int height = row.getXHeight(); rowHeightStats.addValue(height); }//from ww w. j ava 2 s . co m } averageRowHeight = rowHeightStats.getPercentile(50); LOG.debug("averageRowHeight: " + averageRowHeight); } return averageRowHeight; }
From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java
private Double[] minmax(Double[] vect) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (Double value : vect) { stats.addValue(value);/* ww w . j a v a 2 s .c o m*/ } return new Double[] { stats.getMin(), stats.getMax() }; }
From source file:de.mpicbg.knime.hcs.base.utils.MutualInformation.java
private Double[] minmax(Double[] vect1, Double[] vect2) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (Double value : vect1) { stats.addValue(value);//from w ww. jav a2 s . c o m } for (Double value : vect2) { stats.addValue(value); } return new Double[] { stats.getMin(), stats.getMax() }; }
From source file:com.linkedin.pinot.tools.PinotSegmentRebalancer.java
private void printSegmentAssignment(Map<String, Map<String, String>> mapping) throws Exception { StringWriter sw = new StringWriter(); objectMapper.writerWithDefaultPrettyPrinter().writeValue(sw, mapping); LOGGER.info(sw.toString());//from w ww . j a va 2 s . c o m Map<String, List<String>> serverToSegmentMapping = new TreeMap<>(); for (String segment : mapping.keySet()) { Map<String, String> serverToStateMap = mapping.get(segment); for (String server : serverToStateMap.keySet()) { if (!serverToSegmentMapping.containsKey(server)) { serverToSegmentMapping.put(server, new ArrayList<String>()); } serverToSegmentMapping.get(server).add(segment); } } DescriptiveStatistics stats = new DescriptiveStatistics(); for (String server : serverToSegmentMapping.keySet()) { List<String> list = serverToSegmentMapping.get(server); LOGGER.info("server " + server + " has " + list.size() + " segments"); stats.addValue(list.size()); } LOGGER.info("Segment Distrbution stat"); LOGGER.info(stats.toString()); }
From source file:info.raack.appliancelabeler.machinelearning.appliancedetection.algorithms.HighConfidenceFSMPowerSpikeDetectionAlgorithm.java
private Map<Integer, Integer[]> computeTrainingInstanceSpikeLimits( List<double[]> trainingInstancesWithClassLabels) { Map<Integer, List<Double>> trainingSpikes = new HashMap<Integer, List<Double>>(); // collect all spikes for each class for (double[] instance : trainingInstancesWithClassLabels) { double clazz = instance[instance.length - 1]; if (clazz != missingValue) { double trainingSpike = instance[0]; if (!trainingSpikes.containsKey((int) clazz)) { trainingSpikes.put((int) clazz, new ArrayList<Double>()); }/*w w w .j ava 2 s.com*/ trainingSpikes.get((int) clazz).add(trainingSpike); } } Map<Integer, Integer[]> trainingInstanceLimits = new HashMap<Integer, Integer[]>(); // calculate interval one standard deviation away from mean of labeled power spikes for each class for (Integer clazz : trainingSpikes.keySet()) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (Double spikeValue : trainingSpikes.get(clazz)) { stats.addValue(spikeValue); } trainingInstanceLimits.put(clazz, new Integer[] { (int) (stats.getMean() - stats.getStandardDeviation()), (int) (stats.getMean() + stats.getStandardDeviation()) }); } return trainingInstanceLimits; }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step11GoldDataStatistics.java
/** * Relevant sentences per document (per query) */// w w w.ja va 2 s. c o m public static void statistics6(File inputDir, File outputDir) throws IOException { PrintWriter pw = new PrintWriter(new FileWriter(new File(outputDir, "stats6.csv"))); SortedMap<String, DescriptiveStatistics> result = new TreeMap<>(); result.put("relevantSentencesDocumentPercent", new DescriptiveStatistics()); // print header for (String mapKey : result.keySet()) { pw.printf(Locale.ENGLISH, "%s\t%sStdDev\t", mapKey, mapKey); } pw.println(); // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); System.out.println("Processing " + f); for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) { if (rankedResult.plainText != null && !rankedResult.plainText.isEmpty()) { int relevantSentences = 0; int totalSentences = 0; if (rankedResult.goldEstimatedLabels != null) { for (QueryResultContainer.SingleSentenceRelevanceVote sentenceRelevanceVote : rankedResult.goldEstimatedLabels) { totalSentences++; if (Boolean.valueOf(sentenceRelevanceVote.relevant)) { relevantSentences++; } } // percent relevant result.get("relevantSentencesDocumentPercent") .addValue((double) relevantSentences / (double) totalSentences); } } } } // print results // print header for (String mapKey : result.keySet()) { pw.printf(Locale.ENGLISH, "%.3f\t%.3f\t", result.get(mapKey).getMean(), result.get(mapKey).getStandardDeviation()); } pw.close(); }
From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java
public double median(final List<Integer> values) { final DescriptiveStatistics stats = new DescriptiveStatistics(); for (final Integer value : values) { stats.addValue(value);//from w ww .j a v a 2 s .co m } return stats.getPercentile(0.5); }
From source file:datafu.hourglass.jobs.StagedOutputJob.java
/** * Writes Hadoop counters and other task statistics to a file in the file system. * /*from w ww . j a v a 2 s . co m*/ * @param fs * @throws IOException */ private void writeCounters(final FileSystem fs) throws IOException { final Path actualOutputPath = FileOutputFormat.getOutputPath(this); SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss"); String suffix = timestampFormat.format(new Date()); if (_countersParentPath != null) { if (!fs.exists(_countersParentPath)) { _log.info("Creating counter parent path " + _countersParentPath); fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x")); } // make the name as unique as possible in this case because this may be a directory // where other counter files will be dropped _countersPath = new Path(_countersParentPath, ".counters." + suffix); } else { _countersPath = new Path(actualOutputPath, ".counters." + suffix); } _log.info(String.format("Writing counters to %s", _countersPath)); FSDataOutputStream counterStream = fs.create(_countersPath); BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024); OutputStreamWriter writer = new OutputStreamWriter(buffer); for (String groupName : getCounters().getGroupNames()) { for (Counter counter : getCounters().getGroup(groupName)) { writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue())); } } JobID jobID = this.getJobID(); org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(), jobID.getId()); long minStart = Long.MAX_VALUE; long maxFinish = 0; long setupStart = Long.MAX_VALUE; long cleanupFinish = 0; DescriptiveStatistics mapStats = new DescriptiveStatistics(); DescriptiveStatistics reduceStats = new DescriptiveStatistics(); boolean success = true; JobClient jobClient = new JobClient(this.conf); Map<String, String> taskIdToType = new HashMap<String, String>(); TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId); if (setupReports.length > 0) { _log.info("Processing setup reports"); for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) { taskIdToType.put(report.getTaskID().toString(), "SETUP"); if (report.getStartTime() == 0) { _log.warn("Skipping report with zero start time"); continue; } setupStart = Math.min(setupStart, report.getStartTime()); } } else { _log.error("No setup reports"); } TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId); if (mapReports.length > 0) { _log.info("Processing map reports"); for (TaskReport report : mapReports) { taskIdToType.put(report.getTaskID().toString(), "MAP"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } minStart = Math.min(minStart, report.getStartTime()); mapStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No map reports"); } TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId); if (reduceReports.length > 0) { _log.info("Processing reduce reports"); for (TaskReport report : reduceReports) { taskIdToType.put(report.getTaskID().toString(), "REDUCE"); if (report.getFinishTime() == 0 || report.getStartTime() == 0) { _log.warn("Skipping report with zero start or finish time"); continue; } maxFinish = Math.max(maxFinish, report.getFinishTime()); reduceStats.addValue(report.getFinishTime() - report.getStartTime()); } } else { _log.error("No reduce reports"); } TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId); if (cleanupReports.length > 0) { _log.info("Processing cleanup reports"); for (TaskReport report : cleanupReports) { taskIdToType.put(report.getTaskID().toString(), "CLEANUP"); if (report.getFinishTime() == 0) { _log.warn("Skipping report with finish time of zero"); continue; } cleanupFinish = Math.max(cleanupFinish, report.getFinishTime()); } } else { _log.error("No cleanup reports"); } if (minStart == Long.MAX_VALUE) { _log.error("Could not determine map-reduce start time"); success = false; } if (maxFinish == 0) { _log.error("Could not determine map-reduce finish time"); success = false; } if (setupStart == Long.MAX_VALUE) { _log.error("Could not determine setup start time"); success = false; } if (cleanupFinish == 0) { _log.error("Could not determine cleanup finish time"); success = false; } // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup. // Unfortunately the job client doesn't have an easier way to get these statistics. Map<String, Integer> attemptStats = new HashMap<String, Integer>(); _log.info("Processing task attempts"); for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) { String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString()); String status = event.getTaskStatus().toString(); String key = String.format("%s_%s_ATTEMPTS", status, type); if (!attemptStats.containsKey(key)) { attemptStats.put(key, 0); } attemptStats.put(key, attemptStats.get(key) + 1); } if (success) { writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart)); writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish)); writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart)); writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart)); writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish)); writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart)); writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN())); writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax())); writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin())); writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean())); writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation())); writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum())); writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN())); writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax())); writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin())); writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean())); writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation())); writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum())); writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d", (long) mapStats.getSum() + (long) reduceStats.getSum())); for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) { writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue())); } } writer.close(); buffer.close(); counterStream.close(); }
From source file:guineu.modules.filter.report.qualityReport.ReportTask.java
private void writeDataset(List<sample> samples) { DecimalFormat formatter = new DecimalFormat("####.##"); SimpleBasicDataset dataset = new SimpleQualityControlDataset("Summary Report"); dataset.setType(DatasetType.QUALITYCONTROL); ((SimpleQualityControlDataset) dataset).setParameters(date, sampleSet, ionMode, injection, sampleType, comments);/* www. jav a2 s .co m*/ for (int i = 1; i <= 12; i++) { dataset.addColumnName(String.valueOf(i)); } dataset.addRow(getRow("Date:", date)); dataset.addRow(getRow("SampleSet:", sampleSet)); dataset.addRow(getRow("Ion Mode:", ionMode)); dataset.addRow(getRow("Injection volume:", injection)); dataset.addRow(getRow("Sample type:", sampleType)); dataset.addRow(getRow("", "")); dataset.addRow(getRow("", "")); dataset.addRow(this.getTitle()); Stats = new DescriptiveStatistics[9]; for (int i = 0; i < 9; i++) { Stats[i] = new DescriptiveStatistics(); } for (sample s : samples) { PeakListRow row = s.getRow(Stats); dataset.addRow(row); ((SimpleQualityControlDataset) dataset).setRow(row); } SimplePeakListRowOther row = new SimplePeakListRowOther(); row.setPeak("1", "MEAN"); for (int i = 0; i < 9; i++) { row.setPeak(String.valueOf(i + 2), formatter.format(Stats[i].getMean()).toString()); } dataset.addRow(row); row = new SimplePeakListRowOther(); row.setPeak("1", "RSD"); for (int i = 0; i < 9; i++) { row.setPeak(String.valueOf(i + 2), formatter.format((Stats[i].getStandardDeviation() * 100) / Stats[i].getMean()).toString()); } dataset.addRow(row); dataset.addRow(getRow("", "")); dataset.addRow(getRow("", "")); // row8 SimplePeakListRowOther row8 = new SimplePeakListRowOther(); row8.setPeak("1", "Additional parameters for seronorm control samples & batch standard:"); dataset.addRow(row8); dataset.addRow(this.getTitle2()); superStats = new DescriptiveStatistics[9]; for (int i = 0; i < 9; i++) { superStats[i] = new DescriptiveStatistics(); } for (sample s : samples) { PeakListRow row2 = s.getRow2(superStats); dataset.addRow(row2); ((SimpleQualityControlDataset) dataset).setAdditionalRow(row2); } row = new SimplePeakListRowOther(); row.setPeak("1", "MEAN"); for (int i = 0; i < 9; i++) { row.setPeak(String.valueOf(i + 2), formatter.format(superStats[i].getMean()).toString()); } dataset.addRow(row); row = new SimplePeakListRowOther(); row.setPeak("1", "RSD"); for (int i = 0; i < 9; i++) { row.setPeak(String.valueOf(i + 2), formatter .format((superStats[i].getStandardDeviation() * 100) / superStats[i].getMean()).toString()); } dataset.addRow(row); dataset.addRow(getRow("", "")); dataset.addRow(getRow("", "")); dataset.addRow(getRow("Comments:", comments)); //creates internal frame with the table GUIUtils.showNewTable(dataset, true); }