List of usage examples for org.apache.commons.math.stat.descriptive SummaryStatistics getSum
public double getSum()
From source file:gsn.tests.performance.Queries.java
private String printStats(SummaryStatistics stats, String unit) { return new StringBuilder().append("sum:").append(format(stats.getSum())).append(", min:") .append(format(stats.getMin())).append(", max:").append(format(stats.getMax())).append(", mean:") .append(format(stats.getMean())).append(", var:").append(format(stats.getVariance())).append(" [") .append(unit).append("]").toString(); }
From source file:de.escidoc.core.om.performance.Statistics.java
/** * @return the statistics of all measured methods. *///from w w w .j a v a 2 s .co m @ManagedAttribute(description = "Get all currently available statistics") public String getKeys() { final StringBuilder b = new StringBuilder(); for (final String key : this.statisticsMap.keySet()) { final SummaryStatistics s = getStatistics(key); if (s != null) { b.append(key).append(", #:").append(s.getN()).append(", min (ms):").append((long) s.getMin()) .append(", max (ms):").append((long) s.getMax()).append(", mean (ms):") .append((long) s.getMean()).append(", stddev (ms):").append((long) s.getStandardDeviation()) .append(", total (ms):").append((long) s.getSum()).append('\n'); } } System.gc(); return b.toString(); }
From source file:net.shipilev.fjptrace.tasks.PrintSummaryTask.java
private void summarizeEvents(PrintWriter pw, Events events) { SummaryStatistics completeTimes = new SummaryStatistics(); SummaryStatistics execTimes = new SummaryStatistics(); Map<Integer, Long> times = new HashMap<>(); for (Event e : events) { switch (e.eventType) { case COMPLETING: times.put(e.tag, e.time);/*from w w w . ja v a2s .c o m*/ break; case COMPLETED: { Long startTime = times.get(e.tag); if (startTime != null) { completeTimes.addValue(e.time - startTime); } break; } case EXEC: times.put(e.tag, e.time); break; case EXECUTED: Long startTime = times.get(e.tag); if (startTime != null) { execTimes.addValue(e.time - startTime); } break; } } pw.println(); pw.println("EXEC -> EXECUTED: " + TimeUnit.NANOSECONDS.toMillis((long) execTimes.getSum()) + "ms"); pw.println( "COMPLETING -> COMPLETED: " + TimeUnit.NANOSECONDS.toMillis((long) completeTimes.getSum()) + "ms"); }
From source file:org.apache.hadoop.hive.ql.exec.tez.TestHostAffinitySplitLocationProvider.java
private double testHashDistribution(int locs, final int missCount, FileSplit[] splits, AtomicInteger errorCount) { // This relies heavily on what method determineSplits ... calls and doesn't. // We could do a wrapper with only size() and get() methods instead of List, to be sure. @SuppressWarnings("unchecked") List<String> partLocs = (List<String>) Mockito.mock(List.class); Mockito.when(partLocs.size()).thenReturn(locs); final AtomicInteger state = new AtomicInteger(0); Mockito.when(partLocs.get(Mockito.anyInt())).thenAnswer(new Answer<String>() { @Override/*from w w w . j a v a2 s.c o m*/ public String answer(InvocationOnMock invocation) throws Throwable { return (state.getAndIncrement() == missCount) ? "not-null" : null; } }); int[] hitCounts = new int[locs]; for (int splitIx = 0; splitIx < splits.length; ++splitIx) { state.set(0); int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null); ++hitCounts[index]; } SummaryStatistics ss = new SummaryStatistics(); for (int hitCount : hitCounts) { ss.addValue(hitCount); } // All of this is completely bogus and mostly captures the following function: // f(output) = I-eyeballed-the(output) == they-look-ok. // It's pretty much a golden file... // The fact that stdev doesn't increase with increasing missCount is captured outside. double avg = ss.getSum() / ss.getN(), stdev = ss.getStandardDeviation(), cv = stdev / avg; double allowedMin = avg - 2.5 * stdev, allowedMax = avg + 2.5 * stdev; if (allowedMin > ss.getMin() || allowedMax < ss.getMax() || cv > 0.22) { LOG.info("The distribution for " + locs + " locations, " + missCount + " misses isn't to " + "our liking: avg " + avg + ", stdev " + stdev + ", cv " + cv + ", min " + ss.getMin() + ", max " + ss.getMax()); errorCount.incrementAndGet(); } return cv; }
From source file:org.apache.mahout.freqtermsets.fpgrowth.FPGrowth.java
/** * Internal TopKFrequentPattern Generation algorithm, which represents the * A's as integers and transforms features to use only integers * // ww w . j a v a 2 s. c om * @param transactions * Transaction database Iterator * @param attributeFrequency * array representing the Frequency of the corresponding * attribute id * @param minSupport * minimum support of the pattern to be mined * @param k * Max value of the Size of the Max-Heap in which Patterns are * held * @param featureSetSize * number of features * @param returnFeatures * the id's of the features for which Top K patterns have to be * mined * @param topKPatternsOutputCollector * the outputCollector which transforms the given Pattern in * integer format to the corresponding A Format */ private void generateTopKFrequentPatterns( // Iterator<Pair<int[], Long>> transactions, TransactionTree cTree, OpenObjectIntHashMap<A> attributeIdMapping, long[] attributeFrequency, long minSupport, int k, int featureSetSize, Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector, StatusUpdater updater) throws IOException { // YA: BONSAAAAAAAII { // FPTree tree = new FPTree(featureSetSize); FPTree tree = null; boolean change = true; int pruneIters = 0; IntArrayList pruneByContingencyCount = new IntArrayList(); IntArrayList pruneBySpreadCount = new IntArrayList(); while (change) { pruneByContingencyCount.add(0); pruneBySpreadCount.add(0); change = false; tree = new FPTree(featureSetSize); OpenIntLongHashMap[] childJointFreq; long[] sumChildSupport; if (BONSAI_PRUNE) { childJointFreq = new OpenIntLongHashMap[featureSetSize]; sumChildSupport = new long[featureSetSize]; } double supportGrandTotal = 0; // } YA: BONSAAAAAAAII for (int i = 0; i < featureSetSize; i++) { tree.addHeaderCount(i, attributeFrequency[i]); // YA: BONSAAAAAAAII { if (attributeFrequency[i] < 0) { continue; // this is an attribute not satisfying the // monotone constraint } if (BONSAI_PRUNE) { childJointFreq[i] = new OpenIntLongHashMap(); supportGrandTotal += attributeFrequency[i]; } // } YA: Bonsai } // Constructing initial FPTree from the list of transactions // YA Bonsai : To pass the tree itself the iterator now would work // only with ints.. the A type argument is // not checked in the constructor. TOD: remove the type argument and // force using ints only Iterator<Pair<int[], Long>> transactions = new IntTransactionIterator(cTree.iterator(), attributeIdMapping); int nodecount = 0; // int attribcount = 0; int i = 0; while (transactions.hasNext()) { Pair<int[], Long> transaction = transactions.next(); Arrays.sort(transaction.getFirst()); // attribcount += transaction.length; // YA: Bonsai { // nodecount += treeAddCount(tree, transaction.getFirst(), // transaction.getSecond(), minSupport, attributeFrequency); int temp = FPTree.ROOTNODEID; boolean addCountMode = true; for (int attribute : transaction.getFirst()) { if (attributeFrequency[attribute] < 0) { continue; // this is an attribute not satisfying the // monotone constraint } if (attributeFrequency[attribute] < minSupport) { break; } if (BONSAI_PRUNE && tree.attribute(temp) != -1) { // Root node childJointFreq[tree.attribute(temp)].put(attribute, childJointFreq[tree.attribute(temp)].get(attribute) + transaction.getSecond()); sumChildSupport[tree.attribute(temp)] += transaction.getSecond(); } int child; if (addCountMode) { child = tree.childWithAttribute(temp, attribute); if (child == -1) { addCountMode = false; } else { tree.addCount(child, transaction.getSecond()); temp = child; } } if (!addCountMode) { child = tree.createNode(temp, attribute, transaction.getSecond()); temp = child; nodecount++; } } // } YA Bonsai i++; if (i % 10000 == 0) { log.info("FPTree Building: Read {} Transactions", i); } } log.info("Number of Nodes in the FP Tree: {}", nodecount); // YA: BONSAAAAAAAII { if (BONSAI_PRUNE) { if (log.isTraceEnabled()) log.info("Bonsai prunining tree: {}", tree.toString()); for (int a = 0; a < tree.getHeaderTableCount(); ++a) { int attr = tree.getAttributeAtIndex(a); if (attributeFrequency[attr] < 0) { continue; // this is an attribute not satisfying the // monotone constraint } if (attributeFrequency[attr] < minSupport) { break; } // if (sumChildSupport[attr] < attributeFrequency[attr]) { // // the case of . (full stop) as the next child // childJointFreq[attr] // .put(-1, // (long) (attributeFrequency[attr] - sumChildSupport[attr])); // } float numChildren = childJointFreq[attr].size(); // if (numChildren < LEAST_NUM_CHILDREN_TO_VOTE_FOR_NOISE) { // continue; // } if (log.isTraceEnabled()) { log.trace("Voting for noisiness of attribute {} with number of children: {}", attr, numChildren); log.trace("Attribute support: {} - Total Children support: {}", attributeFrequency[attr], sumChildSupport[attr]); } // EMD and the such.. the threshold isn't easy to define, and it // also doesn't take into account the weights of children. // // double uniformProb = 1.0 / numChildren; // // double uniformProb = sumChildSupport[attr] / // supportGrandTotal; // double uniformFreq = attributeFrequency[attr] / numChildren; // IntArrayList childAttrArr = childJointFreq[attr].keys(); // // IntArrayList childAttrArr = new IntArrayList(); // // childJointFreq[attr].keysSortedByValue(childAttrArr); // double totalDifference = 0; // double sumOfWeights = 0; // // double emd = 0; // for (int c = childAttrArr.size() - 1; c >=0 ; --c) { // int childAttr = childAttrArr.get(c); // double childJF = childJointFreq[attr].get(childAttr); // double childWeight = attributeFrequency[childAttr]; // totalDifference += childWeight * Math.abs(childJF - // uniformFreq); // sumOfWeights += childWeight; // // // double jointProb = childJF / // // supportGrandTotal; // // double childProb = attributeFrequency[childAttr] / // // supportGrandTotal; // // double childConditional = childJF / // attributeFrequency[attr]; // // emd = childConditional + emd - uniformProb; // // emd = childJF + emd - uniformFreq; // // totalDifference += Math.abs(emd); // } // // Probability (D > observed ) = QKS Ne + 0.12 + 0.11/ Ne D // // double pNotUniform = totalDifference / attrSupport; // // double threshold = (numChildren * (numChildren - 1) * 1.0) // // / (2.0 * attributeFrequency[attr]); // double weightedDiff = totalDifference / sumOfWeights; // double threshold = sumOfWeights / 2.0; // each child can be // up to // // 1 over or below the // // uniform freq // boolean noise = weightedDiff < threshold; // log.info("EMD: {} - Threshold: {}", weightedDiff, threshold); // /////////////////////////////////// // Log odds.. this is my hartala, and it needs ot be shifted // according to the number of children // // // if there is one child then the prob of random choice // // will be // // // 1, so anything would be // // // noise // // // and if there are few then the probability that this is // // // actually noise declines // // if (numChildren >= LEAST_NUM_CHILDREN_TO_VOTE_FOR_NOISE) // // { // // log.info( // // // "Voting for noisiness of attribute {} with number of children: {}", // // currentAttribute, numChildren); // // log.info( // // "Attribute support: {} - Total Children support: {}", // // attrSupport, sumOfChildSupport); // // int noiseVotes = 0; // // double randomSelectionLogOdds = 1.0 / numChildren; // // randomSelectionLogOdds = Math.log(randomSelectionLogOdds // // / (1 - randomSelectionLogOdds)); // // randomSelectionLogOdds = // // Math.abs(randomSelectionLogOdds); // // // // IntArrayList childAttrArr = childJointFreq.keys(); // // for (int c = 0; c < childAttrArr.size(); ++c) { // // double childConditional = 1.0 // // * childJointFreq.get(childAttrArr.get(c)) // // / sumOfChildSupport; // attrSupport; // // double childLogOdds = Math.log(childConditional // // / (1 - childConditional)); // // if (Math.abs(childLogOdds) <= randomSelectionLogOdds) { // // // probability of the child given me is different // // // than // // // probability of choosing the // // // child randomly // // // from among my children.. using absolute log odds // // // because they are symmetric // // ++noiseVotes; // // } // // } // // log.info("Noisy if below: {} - Noise votes: {}", // // randomSelectionLogOdds, noiseVotes); // // noise = noiseVotes == numChildren; // //////////////////////////////////////////////////// // // Kullback-liebler divergence from the uniform distribution // double randomChild = 1.0 / numChildren; // IntArrayList childAttrArr = childJointFreq[attr].keys(); // // double klDivergence = 0; // for (int c = 0; c < childAttrArr.size(); ++c) { // double childConditional = 1.0 // * childJointFreq[attr].get(childAttrArr.get(c)) // / attributeFrequency[attr]; // if (childConditional == 0) { // continue; // a7a! // } // klDivergence += childConditional // * Math.log(childConditional / randomChild); // } // // boolean noise = Math.abs(klDivergence) < 0.05; // log.info("KL-Divergence: {} - Noise less than: {}", // klDivergence, 0.05); // ////////////////////////////////////// // Pair wise metric with different children SummaryStatistics metricSummary = new SummaryStatistics(); // double[] metric = new double[(int) numChildren]; // SummaryStatistics spreadSummary = new SummaryStatistics(); // double uniformSpread = attributeFrequency[attr] / // numChildren; double goodnessOfFit = 0.0; // If I don't take the . into account: sumChildSupport[attr] / // numChildren; double sumOfWeights = 0; IntArrayList childAttrArr = childJointFreq[attr].keys(); for (int c = 0; c < childAttrArr.size(); ++c) { int childAttr = childAttrArr.get(c); double[][] contingencyTable = new double[2][2]; if (childAttr == -1) { // this is meaningless, as yuleq will just be 1 contingencyTable[1][1] = childJointFreq[attr].get(childAttr); contingencyTable[1][0] = sumChildSupport[attr]; // equals attributeFrequency[attr] - // contingencyTable[1][1]; contingencyTable[0][1] = 0; contingencyTable[0][0] = supportGrandTotal - attributeFrequency[attr]; } else { contingencyTable[1][1] = childJointFreq[attr].get(childAttr); contingencyTable[1][0] = attributeFrequency[attr] - contingencyTable[1][1]; contingencyTable[0][1] = attributeFrequency[childAttr] - contingencyTable[1][1]; contingencyTable[0][0] = supportGrandTotal - attributeFrequency[attr] - attributeFrequency[childAttr] + contingencyTable[1][1]; // because of the meninglessness of yuleq in case of . } double ad = contingencyTable[0][0] * contingencyTable[1][1]; double bc = contingencyTable[0][1] * contingencyTable[1][0]; double yuleq = (ad - bc) / (ad + bc); double weight = attributeFrequency[childAttr]; sumOfWeights += weight; metricSummary.addValue(Math.abs(yuleq * weight)); // metricSummary.addValue(yuleq * yuleq * weight); } // spreadSummary.addValue(Math.abs(uniformSpread // - contingencyTable[1][1]) // / numChildren); // spreadSummary.addValue(contingencyTable[1][1]); // * // weight goodnessOfFit += contingencyTable[1][1] * contingencyTable[1][1]; } // double weightedquadraticMean = // Math.sqrt(metricSummary.getSum() / sumOfWeights); double weightedMean = (metricSummary.getSum() / sumOfWeights); boolean noise = false; // if (weightedMean < 0.5) { // pruneByContingencyCount.set(pruneIters, pruneByContingencyCount.get(pruneIters) + 1); // noise = true; // } else if (weightedMean < 0.95) { if (numChildren > 1) { double n = sumChildSupport[attr]; // attributeFrequency[attr]; goodnessOfFit /= (n / numChildren); goodnessOfFit -= n; ChiSquaredDistributionImpl chisqDist = new ChiSquaredDistributionImpl(numChildren - 1); double criticalPoint = -1; try { criticalPoint = chisqDist.inverseCumulativeProbability(1.0 - SIGNIFICANCE / 2.0); } catch (MathException e) { log.error(e.getMessage(), e); } if (goodnessOfFit < criticalPoint) { pruneBySpreadCount.set(pruneIters, pruneBySpreadCount.get(pruneIters) + 1); noise = true; } // // double spreadCentraltendency = (spreadSummary.getMax() // - // // spreadSummary.getMin()) / 2.0; // // spreadSummary.getMean(); // // double uniformSpread = sumChildSupport[attr] / // // numChildren; // // // noise = Math.abs(spreadCentraltendency - // uniformSpread) < // // 1e-4; // // double spreadCentraltendency = spreadSummary.getMean(); // // (spreadSummary.getMax() - // // spreadSummary.getMin()) / 2.0; // if(spreadCentraltendency < 1e-6){ // noise = true; // } // // if (!noise && numChildren > 0) { // // see if the difference is statitically significant // double spreadCI = getConfidenceIntervalHalfWidth( // spreadSummary, SIGNIFICANCE); // spreadCentraltendency -= spreadCI; // if (spreadCentraltendency < 0) { // noise = true; // } // // // noise if the CI contains the uniform spread // // threshold // // if (spreadCentraltendency > uniformSpread) { // // noise = (spreadCentraltendency - spreadCI) < // // uniformSpread; // // } else { // // noise = (spreadCentraltendency + spreadCI) > // // uniformSpread; // // } // } } change |= noise; if (noise) { if (log.isTraceEnabled()) log.info("Pruning attribute {} with child joint freq {}", attr, childJointFreq[attr]); returnFeatures.remove(attr); attributeFrequency[attr] = -1; } } } ++pruneIters; } if (log.isTraceEnabled()) { log.info("Pruned tree: {}", tree.toString()); log.info("Prune by contingency: {} - Prune by spread: {}", pruneByContingencyCount.toString(), pruneBySpreadCount.toString()); } // } YA: Bonsai fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater); }
From source file:rapture.stat.memory.ValueMemoryType.java
@Override public boolean calculate() { if (System.currentTimeMillis() > nextRecord) { ValueStat v = new ValueStat(); v.setKey(key);/*from w w w.j av a 2s .co m*/ if (workingValues.isEmpty()) { v.setValue(0.0); } else { SummaryStatistics summary = new SummaryStatistics(); synchronized (workingValues) { for (ValueStat vs : workingValues) { summary.addValue(vs.getValue()); } } switch (operation) { case AVERAGE: v.setValue(summary.getMean()); break; case SUM: v.setValue(summary.getSum()); break; } } history.add(v); nextRecord = nextExtractionTime(); return true; } return false; }
From source file:uk.ac.diamond.scisoft.analysis.dataset.AbstractDataset.java
/** * Calculate summary statistics for a dataset along an axis */// w ww .j a v a2 s. co m protected void calculateSummaryStats(final int axis) { int rank = getRank(); int[] oshape = getShape(); int alen = oshape[axis]; oshape[axis] = 1; int[] nshape = new int[rank - 1]; for (int i = 0; i < axis; i++) { nshape[i] = oshape[i]; } for (int i = axis + 1; i < rank; i++) { nshape[i - 1] = oshape[i]; } final int dtype = getDtype(); IntegerDataset count = new IntegerDataset(nshape); AbstractDataset max = zeros(nshape, dtype); AbstractDataset min = zeros(nshape, dtype); IntegerDataset maxIndex = new IntegerDataset(nshape); IntegerDataset minIndex = new IntegerDataset(nshape); AbstractDataset sum = zeros(nshape, getLargestDType(dtype)); DoubleDataset mean = new DoubleDataset(nshape); DoubleDataset var = new DoubleDataset(nshape); IndexIterator qiter = max.getIterator(true); int[] qpos = qiter.getPos(); int[] spos = oshape.clone(); while (qiter.hasNext()) { int i = 0; for (; i < axis; i++) { spos[i] = qpos[i]; } spos[i++] = 0; for (; i < rank; i++) { spos[i] = qpos[i - 1]; } final SummaryStatistics stats = new SummaryStatistics(); for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (Double.isInfinite(val) || Double.isNaN(val)) { continue; } stats.addValue(val); } count.setAbs(qiter.index, (int) stats.getN()); final double amax = stats.getMax(); max.setObjectAbs(qiter.index, amax); for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (val == amax) { maxIndex.setAbs(qiter.index, j); break; } } final double amin = stats.getMin(); min.setObjectAbs(qiter.index, amax); for (int j = 0; j < alen; j++) { spos[axis] = j; final double val = getDouble(spos); if (val == amin) { minIndex.setAbs(qiter.index, j); break; } } sum.setObjectAbs(qiter.index, stats.getSum()); mean.setAbs(qiter.index, stats.getMean()); var.setAbs(qiter.index, stats.getVariance()); } setStoredValue("count-" + axis, count); storedValues.put("max-" + axis, max); storedValues.put("min-" + axis, min); storedValues.put("sum-" + axis, sum); storedValues.put("mean-" + axis, mean); storedValues.put("var-" + axis, var); storedValues.put("maxIndex-" + axis, maxIndex); storedValues.put("minIndex-" + axis, minIndex); }
From source file:uk.ac.diamond.scisoft.analysis.dataset.ComplexDoubleDataset.java
@Override public Object sum() { if (storedValues == null) { calculateSummaryStats();// w w w . j ava 2 s . c o m } final SummaryStatistics rstats = (SummaryStatistics) storedValues.get("stats-0"); final SummaryStatistics istats = (SummaryStatistics) storedValues.get("stats-1"); return new Complex(rstats.getSum(), istats.getSum()); }