List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics
public DescriptiveStatistics()
From source file:com.gtwm.pb.model.manageData.WordCloud.java
/** * @param textLowerCase//from www . jav a 2s. com * Input text, must be lower case * @param minWeight * Minimum tag weight, e.g. a font size * @param maxWeight * Max. tag weight * @param maxTags * Maximum number of tags to return, -1 for all tags * @param additionalStopWords * Set of words to specifically exclude, in addition to the * standard set [and, not, after, yes, no, ...] */ public WordCloud(String textLowerCase, int minWeight, int maxWeight, int maxTags, Set<String> additionalStopWords) { String[] wordArray = textLowerCase.split("\\W"); Set<String> stopWords = new HashSet<String>(Arrays.asList(stopWordsArray)); for (String additionalStopWord : additionalStopWords) { stopWords.add(additionalStopWord.toLowerCase().trim()); } LancasterStemmer stemmer = new LancasterStemmer(); String wordStem; Frequency frequencies = new Frequency(); for (String wordString : wordArray) { if ((!stopWords.contains(wordString)) && (wordString.length() >= minWordLength)) { wordStem = stemmer.stripSuffixes(wordString); // Record the mapping of the stem to its origin so the most // common origin can be re-introduced when the cloud is // generated this.recordStemOrigin(wordString, wordStem); frequencies.addValue(wordStem); } } // Compute std. dev of frequencies so we can remove outliers DescriptiveStatistics stats = new DescriptiveStatistics(); Iterator freqIt = frequencies.valuesIterator(); long stemFreq; while (freqIt.hasNext()) { stemFreq = frequencies.getCount(freqIt.next()); stats.addValue(stemFreq); } double mean = stats.getMean(); double stdDev = stats.getStandardDeviation(); long minFreq = Long.MAX_VALUE; long maxFreq = 0; // Remove outliers freqIt = frequencies.valuesIterator(); int upperLimit = (int) (mean + (stdDev * 10)); int lowerLimit = (int) (mean - stdDev); if (lowerLimit < 2) { lowerLimit = 2; } int numWords = 0; int numRawWords = wordArray.length; boolean removeLowOutliers = (numRawWords > (maxTags * 10)); while (freqIt.hasNext()) { wordStem = (String) freqIt.next(); stemFreq = frequencies.getCount(wordStem); // For a large input set, remove high and low outliers. // For a smaller set, just high freq. outliers if ((stemFreq > upperLimit) || ((stemFreq < lowerLimit) && removeLowOutliers)) { freqIt.remove(); } else { numWords++; if (stemFreq > maxFreq) { maxFreq = stemFreq; } else if (stemFreq < minFreq) { minFreq = stemFreq; } } } // Cut down to exact required number of tags by removing smallest if (lowerLimit < minFreq) { lowerLimit = (int) minFreq; } if (numWords > maxTags) { while (numWords > maxTags) { freqIt = frequencies.valuesIterator(); SMALLREMOVAL: while (freqIt.hasNext()) { stemFreq = frequencies.getCount(freqIt.next()); if (stemFreq < lowerLimit) { freqIt.remove(); numWords--; if (numWords == maxTags) { break SMALLREMOVAL; } } } int step = (int) ((mean - lowerLimit) / 3); if (step < 1) { step = 1; } lowerLimit += step; } // The new min. freq. may have changed minFreq = Long.MAX_VALUE; freqIt = frequencies.valuesIterator(); while (freqIt.hasNext()) { stemFreq = frequencies.getCount(freqIt.next()); if (stemFreq < minFreq) { minFreq = stemFreq; } } } // Scale and create tag objects double scaleFactor; if (maxFreq == minFreq) { scaleFactor = (double) (maxWeight - minWeight) / 4; // TODO: a realistic // scale factor in this // case } else { scaleFactor = (double) (maxWeight - minWeight) / (maxFreq - minFreq); } freqIt = frequencies.valuesIterator(); int weight; while (freqIt.hasNext()) { wordStem = (String) freqIt.next(); stemFreq = frequencies.getCount(wordStem); // Might still be some left less than the min. threshold if (stemFreq <= minFreq) { weight = minWeight; } else { weight = (int) (Math.ceil((double) (stemFreq - minFreq) * scaleFactor) + minWeight); } SortedSet<WordInfo> origins = this.stemOriginMap.get(wordStem); String mostCommonOrigin = origins.last().getName(); Set<String> synonyms = new TreeSet<String>(); for (WordInfo origin : origins) { synonyms.add(origin.getName()); } WordInfo word = new Word(mostCommonOrigin, weight, synonyms); this.words.add(word); } }
From source file:hdr_plugin.ImageCompare.java
private void button1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_button1ActionPerformed ImagePlus imp = WindowManager.getImage(chcStack.getSelectedItem()); for (int i = 0; i < imp.getNSlices() - 1; i++) { DescriptiveStatistics des = new DescriptiveStatistics(); Object pixels = imp.getImageStack().getPixels(i + 1); Object pixels2 = imp.getImageStack().getPixels(i + 2); for (int j = 0; j < imp.getWidth() * imp.getHeight(); j++) { try { // get pixel value at position j and the current channel int m = ImageJTools.getPixelValue(pixels, j, imp.getType(), 0); int n = ImageJTools.getPixelValue(pixels2, j, imp.getType(), 0); des.addValue((double) n / m); } catch (TypeNotSupportedException ex) { Logger.getLogger(ImageCompare.class.getName()).log(Level.SEVERE, null, ex); }/*from w w w . j av a 2 s .c o m*/ } System.out.println(des.getMean()); } }
From source file:guineu.modules.dataanalysis.Ttest.TTestTask.java
public double[] Ttest(int mol) throws IllegalArgumentException, MathException { DescriptiveStatistics stats1 = new DescriptiveStatistics(); DescriptiveStatistics stats2 = new DescriptiveStatistics(); double[] values = new double[3]; String parameter1 = ""; try {/*from w ww . jav a2 s. c om*/ // Determine groups for selected raw data files List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter); int numberOfGroups = availableParameterValues.size(); if (numberOfGroups > 1) { parameter1 = availableParameterValues.get(0); String parameter2 = availableParameterValues.get(1); for (String sampleName : dataset.getAllColumnNames()) { if (dataset.getParametersValue(sampleName, parameter) != null && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) { try { stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName)); } catch (Exception e) { } } else if (dataset.getParametersValue(sampleName, parameter) != null && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) { try { stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName)); } catch (Exception e) { } } } } else { return null; } } catch (Exception e) { } TTestImpl ttest = new TTestImpl(); values[0] = ttest.tTest((StatisticalSummary) stats1, (StatisticalSummary) stats2); values[1] = stats1.getMean(); values[2] = stats2.getMean(); return values; }
From source file:com.griddynamics.jagger.util.statistics.percentiles.PercentilesProcessor.java
public void reset() { currentStrategy = initialStrategy;//from w w w. ja va 2 s . c o m switch (currentStrategy) { case HEURISTIC: adaptiveHistogram = new AdaptiveHistogram(); break; case AUTO: case EXACT: descriptiveStatistics = new DescriptiveStatistics(); } }
From source file:guineu.modules.dataanalysis.wilcoxontest.WilcoxonTestTask.java
public double[] Ttest(int mol) throws IllegalArgumentException { DescriptiveStatistics stats1 = new DescriptiveStatistics(); DescriptiveStatistics stats2 = new DescriptiveStatistics(); double[] values = new double[3]; String parameter1 = ""; if (parameter == null) { for (int i = 0; i < group1.length; i++) { try { stats1.addValue((Double) this.dataset.getRow(mol).getPeak(group1[i])); } catch (Exception e) { e.printStackTrace();//from ww w . j ava 2 s. c om } } for (int i = 0; i < group2.length; i++) { try { stats2.addValue((Double) this.dataset.getRow(mol).getPeak(group2[i])); } catch (Exception e) { e.printStackTrace(); } } } else { try { // Determine groups for selected raw data files List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter); int numberOfGroups = availableParameterValues.size(); if (numberOfGroups > 1) { parameter1 = availableParameterValues.get(0); String parameter2 = availableParameterValues.get(1); for (String sampleName : dataset.getAllColumnNames()) { if (dataset.getParametersValue(sampleName, parameter) != null && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) { try { stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName)); } catch (Exception e) { } } else if (dataset.getParametersValue(sampleName, parameter) != null && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) { try { stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName)); } catch (Exception e) { } } } } else { return null; } } catch (Exception e) { e.printStackTrace(); } } try { final Rengine rEngine; try { rEngine = RUtilities.getREngine(); } catch (Throwable t) { throw new IllegalStateException( "Wilcoxon test requires R but it couldn't be loaded (" + t.getMessage() + ')'); } synchronized (RUtilities.R_SEMAPHORE) { rEngine.eval("x <- 0"); rEngine.eval("y <- 0"); long group1 = rEngine.rniPutDoubleArray(stats1.getValues()); rEngine.rniAssign("x", group1, 0); long group2 = rEngine.rniPutDoubleArray(stats2.getValues()); rEngine.rniAssign("y", group2, 0); /* if(mol == 1){ rEngine.eval("write.csv(x, \"x.csv\")"); rEngine.eval("write.csv(y, \"y.csv\")"); }*/ rEngine.eval("result <- 0"); rEngine.eval("result <- wilcox.test(as.numeric(t(x)),as.numeric(t(y)))"); long e = rEngine.rniParse("result$p.value", 1); long r = rEngine.rniEval(e, 0); REXP x = new REXP(rEngine, r); values[0] = x.asDouble(); } rEngine.end(); setStatus(TaskStatus.FINISHED); } catch (Exception ex) { Logger.getLogger(WilcoxonTestTask.class.getName()).log(Level.SEVERE, null, ex); setStatus(TaskStatus.ERROR); } values[1] = stats1.getMean(); values[2] = stats2.getMean(); return values; }
From source file:guineu.modules.dataanalysis.variationCoefficient.VariationCoefficientTask.java
private double getvariationCoefficient(Dataset dataset) { DescriptiveStatistics superStats = new DescriptiveStatistics(); DescriptiveStatistics stats = new DescriptiveStatistics(); for (PeakListRow row : dataset.getRows()) { stats.clear();//from w w w . java2 s. com for (String experimentName : dataset.getAllColumnNames()) { Object value = row.getPeak(experimentName); if (value != null && value instanceof Double) { stats.addValue((Double) value); } else { try { stats.addValue(Double.valueOf((String) value)); } catch (Exception e) { } } } if (stats.getMean() > 0) { double value = stats.getStandardDeviation() / stats.getMean(); superStats.addValue(value); } } return superStats.getMean(); }
From source file:de.unidue.langtech.teaching.rp.uimatools.Stopwatch.java
@Override public void collectionProcessComplete() throws AnalysisEngineProcessException { super.collectionProcessComplete(); if (isDownstreamTimer()) { getLogger().info("Results from Timer '" + timerName + "' after processing all documents."); DescriptiveStatistics statTimes = new DescriptiveStatistics(); for (Long timeValue : times) { statTimes.addValue((double) timeValue / 1000); }/* w w w. j a v a2 s .c o m*/ double sum = statTimes.getSum(); double mean = statTimes.getMean(); double stddev = statTimes.getStandardDeviation(); StringBuilder sb = new StringBuilder(); sb.append("Estimate after processing " + times.size() + " documents."); sb.append("\n"); Formatter formatter = new Formatter(sb, Locale.US); formatter.format("Aggregated time: %,.1fs\n", sum); formatter.format("Time / Document: %,.3fs (%,.3fs)\n", mean, stddev); formatter.close(); getLogger().info(sb.toString()); if (outputFile != null) { try { Properties props = new Properties(); props.setProperty(KEY_SUM, "" + sum); props.setProperty(KEY_MEAN, "" + mean); props.setProperty(KEY_STDDEV, "" + stddev); OutputStream out = new FileOutputStream(outputFile); props.store(out, "timer " + timerName + " result file"); } catch (FileNotFoundException e) { throw new AnalysisEngineProcessException(e); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } } }
From source file:com.joliciel.jochre.graphics.SourceImageImpl.java
public SourceImageImpl(GraphicsServiceInternal graphicsService, String name, BufferedImage image) { super(image); this.name = name; this.setOriginalImage(image); this.setGraphicsService(graphicsService); this.setWidth(this.getPixelGrabber().getWidth()); this.setHeight(this.getPixelGrabber().getHeight()); // to normalise the image, we need to figure out where black and white are // we want to leave out anomalies (ink blots!) int[] pixelSpread = new int[256]; // To save on memory for (int y = 0; y < this.getHeight(); y++) for (int x = 0; x < this.getWidth(); x++) { int pixel = this.getPixelGrabber().getPixelBrightness(x, y); pixelSpread[pixel]++;/*from ww w .j a va 2 s.com*/ } if (LOG.isTraceEnabled()) { for (int i = 0; i < 256; i++) LOG.trace("Brightness " + i + ": " + pixelSpread[i]); } DescriptiveStatistics countStats = new DescriptiveStatistics(); for (int i = 0; i < 256; i++) { countStats.addValue(pixelSpread[i]); } int startWhite = -1; int endWhite = -1; for (int i = 255; i >= 0; i--) { if (startWhite < 0 && pixelSpread[i] > countStats.getMean()) startWhite = i; if (startWhite >= 0 && endWhite < 0 && pixelSpread[i] < countStats.getMean()) { endWhite = i; break; } } LOG.debug("Start white: " + startWhite); LOG.debug("End white: " + endWhite); DescriptiveStatistics blackCountStats = new DescriptiveStatistics(); DescriptiveStatistics blackSpread = new DescriptiveStatistics(); for (int i = 0; i <= endWhite; i++) { blackCountStats.addValue(pixelSpread[i]); for (int j = 0; j < pixelSpread[i]; j++) { blackSpread.addValue(i); } } LOG.debug("mean counts: " + countStats.getMean()); LOG.debug("mean black counts: " + blackCountStats.getMean()); LOG.debug("std dev black counts: " + blackCountStats.getStandardDeviation()); int startBlack = -1; for (int i = 0; i < 256; i++) { if (pixelSpread[i] > blackCountStats.getMean()) { startBlack = i; break; } } LOG.debug("Start black: " + startBlack); this.setBlackLimit(startBlack); this.setWhiteLimit(startWhite); this.greyscaleMultiplier = (255.0 / (double) (whiteLimit - blackLimit)); // use mean + 2 sigma to find the black threshold // we make the threshold high (darker) to put more pixels in the letter when analysing double blackthresholdCount = blackCountStats.getMean() + (2.0 * blackCountStats.getStandardDeviation()); LOG.debug("blackthresholdCount: " + blackthresholdCount); int blackThresholdValue = endWhite; for (int i = endWhite; i >= startBlack; i--) { if (pixelSpread[i] < blackthresholdCount) { blackThresholdValue = i; break; } } LOG.debug("Black threshold value (old): " + blackThresholdValue); blackThreshold = (int) Math.round((blackThresholdValue - blackLimit) * greyscaleMultiplier); LOG.debug("Black threshold (old): " + blackThreshold); blackThresholdValue = (int) Math.round(blackSpread.getPercentile(60.0)); LOG.debug("Black threshold value (new): " + blackThresholdValue); LOG.debug("Black spread 25 percentile: " + (int) Math.round(blackSpread.getPercentile(25.0))); LOG.debug("Black spread 50 percentile: " + (int) Math.round(blackSpread.getPercentile(50.0))); LOG.debug("Black spread 75 percentile: " + (int) Math.round(blackSpread.getPercentile(75.0))); blackThreshold = (int) Math.round((blackThresholdValue - blackLimit) * greyscaleMultiplier); LOG.debug("Black threshold (new): " + blackThreshold); // use mean + 1 sigma to find the separation threshold // we keep threshold low (1 sigma) to encourage letter breaks double separationthresholdCount = blackCountStats.getMean() + (1.0 * blackCountStats.getStandardDeviation()); LOG.debug("Separation threshold value: " + separationthresholdCount); int separationThresholdValue = endWhite; for (int i = endWhite; i >= startBlack; i--) { if (pixelSpread[i] < separationthresholdCount) { separationThresholdValue = i; break; } } LOG.debug("Separation threshold value (old): " + separationThresholdValue); separationThresholdValue = (int) Math.round(blackSpread.getPercentile(75.0)); LOG.debug("Separation threshold value (new): " + separationThresholdValue); LOG.debug("Black spread 25 percentile: " + (int) Math.round(blackSpread.getPercentile(25.0))); LOG.debug("Black spread 50 percentile: " + (int) Math.round(blackSpread.getPercentile(50.0))); LOG.debug("Black spread 75 percentile: " + (int) Math.round(blackSpread.getPercentile(75.0))); separationThreshold = (int) Math.round((separationThresholdValue - blackLimit) * greyscaleMultiplier); LOG.debug("Separation threshold: " + separationThreshold); if (drawPixelSpread) this.drawChart(pixelSpread, countStats, blackCountStats, blackSpread, startWhite, endWhite, startBlack, blackThresholdValue); }
From source file:net.sourceforge.jags.model.ModelTest.java
@Test public void testUnobservedStochasticNode() throws MathException { Node mu = model.addConstantNode(new int[] { 1 }, new double[] { 0 }); Node tau = model.addConstantNode(new int[] { 1 }, new double[] { 1 }); int N = 1000; Node n = model.addStochasticNode("dnorm", new Node[] { mu, tau }, null, null, null); model.initialize(true);// w ww . j a va2 s .c o m model.stopAdapting(); Monitor m = model.addTraceMonitor(n); model.update(N); assertEquals(N, model.getCurrentIteration()); assertEquals(N, m.dim()[1]); // Iterations dimension DescriptiveStatistics stats = new DescriptiveStatistics(); for (double v : m.value(0)) { stats.addValue(v); } TTest test = new TTestImpl(); assertFalse(test.tTest(0, m.value(0), 0.05)); }
From source file:edu.usc.goffish.gopher.sample.N_Hop_Stat_Collector.java
@Override public void compute(List<SubGraphMessage> subGraphMessages) { /**/*from www. j a v a 2s . c o m*/ * We do this in following steps. * Calculate stats for each subgraph. * Calculate aggregate stats for partition. * In this case a single sub-graph will do the aggregation * Aggregate partition level stats and combine at the smallest partition. */ if (superStep == 0) { SubGraphMessage msg = subGraphMessages.get(0); String data = new String(msg.getData()); String[] dataSplit = data.split("#"); N = Integer.parseInt(dataSplit[0]); String[] vps = dataSplit[1].split(","); for (String vp : vps) { vantagePoints.add(vp.trim()); } try { Iterable<? extends ISubgraphInstance> subgraphInstances = subgraph.getInstances(Long.MIN_VALUE, Long.MAX_VALUE, PropertySet.EmptyPropertySet, subgraph.getEdgeProperties(), false); // sliceManager.readInstances(subgraph, // Long.MIN_VALUE, Long.MAX_VALUE, // PropertySet.EmptyPropertySet, subgraph.getEdgeProperties()); for (ISubgraphInstance instance : subgraphInstances) { Map<String, DescriptiveStatistics> statsMap = new HashMap<String, DescriptiveStatistics>(); for (TemplateEdge edge : subgraph.edges()) { ISubgraphObjectProperties edgeProps = instance.getPropertiesForEdge(edge.getId()); Integer isExist = (Integer) edgeProps.getValue(IS_EXIST_PROP); if (isExist == 1) { String[] vantageIps = ((String) edgeProps.getValue(VANTAGE_IP_PROP)).split(","); String[] latencies = ((String) edgeProps.getValue(LATENCY_PROP)).split(","); String[] hops = ((String) edgeProps.getValue(HOP_PROP)).split(","); Integer[] vantangeIdx = vantageIpIndex(vantageIps); if (vantangeIdx == null) { continue; } for (int i : vantangeIdx) { String vantage = vantageIps[i]; String latency = latencies[i]; String hop = hops[i]; double latency_num = Double.parseDouble(latency); int hop_num = Integer.parseInt(hop); if (latency_num >= 0 && hop_num == N) { if (statsMap.containsKey(vantage)) { statsMap.get(vantage).addValue(latency_num); } else { DescriptiveStatistics statistics = new DescriptiveStatistics(); statistics.addValue(latency_num); statsMap.put(vantage, statistics); } } ; } } } int c = 0; StringBuffer msgBuffer = new StringBuffer(); for (String v : statsMap.keySet()) { c++; DescriptiveStatistics statistics = statsMap.get(v); String m = createMessageString(v, instance.getTimestampStart(), instance.getTimestampEnd(), statistics.getStandardDeviation(), statistics.getMean(), statistics.getN()); if (c == statsMap.keySet().size()) { msgBuffer.append(m); } else { msgBuffer.append(m).append("|"); } } SubGraphMessage subMsg = new SubGraphMessage(msgBuffer.toString().getBytes()); sentMessage(partition.getId(), subMsg); } } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } else if (superStep == 1) { //Ok here every sub-graph will receive message from its own partition. //Each message is belongs to a given some time span. Map<String, List<String[]>> vantageGroup = new HashMap<String, List<String[]>>(); for (SubGraphMessage subGraphMessage : subGraphMessages) { String msgData = new String(subGraphMessage.getData()); String[] dataParts = msgData.split("|"); for (String data : dataParts) { String[] vantageParts = data.split(","); //Group by vantage point and startTime if (vantageGroup.containsKey(vantageParts[0] + "|" + vantageParts[1])) { vantageGroup.get(vantageParts[0] + "|" + vantageParts[1]).add(vantageParts); } else { ArrayList<String[]> arrayList = new ArrayList<String[]>(); arrayList.add(vantageParts); vantageGroup.put(vantageParts[0] + "|" + vantageParts[1], arrayList); } } } for (String key : vantageGroup.keySet()) { if (!acquireLock(key)) { continue; } List<String[]> data = vantageGroup.get(key); double totalN = 0; double totalAvgVal = 0; double totalVar = 0; for (String[] d : data) { //average double mean = Double.parseDouble(d[4]); long sN = Long.parseLong(d[5]); totalN += sN; totalAvgVal += mean * sN; double sd = Double.parseDouble(d[3]); totalVar += ((double) sd * sd) / ((double) sN); } double avg = totalAvgVal / totalN; double newSD = Math.sqrt(totalVar); //create message //sent to all the partitions except me. String msg = key + "," + newSD + "," + avg + "," + totalN; for (int pid : partitions) { sentMessage(pid, new SubGraphMessage(msg.getBytes())); } } } else if (superStep >= 2) { if (partition.getId() == Collections.min(partitions)) { Map<String, List<String[]>> group = new HashMap<String, List<String[]>>(); for (SubGraphMessage msg : subGraphMessages) { String data = new String(msg.getData()); String[] dataParts = data.split(","); if (group.containsKey(dataParts[0])) { group.get(dataParts[0]).add(dataParts); } else { List<String[]> list = new ArrayList<String[]>(); list.add(dataParts); group.put(dataParts[0], list); } } if (!acquireLock("" + partition.getId())) { voteToHalt(); return; } PrintWriter writer; try { writer = new PrintWriter(new FileWriter("TimeSeriesStats.csv")); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } for (String key : group.keySet()) { List<String[]> data = group.get(key); double totalN = 0; double totalAvgVal = 0; double totalVar = 0; for (String[] d : data) { //average //key + "," + newSD + "," + avg + "," + totalN; double mean = Double.parseDouble(d[2]); long sN = Long.parseLong(d[3]); totalN += sN; totalAvgVal += mean * sN; double sd = Double.parseDouble(d[1]); totalVar += ((double) sd * sd) / ((double) sN); } double avg = totalAvgVal / totalN; double newSD = Math.sqrt(totalVar); String vantage = key.split("|")[0]; String timeStamp = key.split("|")[1]; log(writer, vantage, timeStamp, avg, newSD); } writer.flush(); voteToHalt(); } } }