List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getPercentile
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException
From source file:com.mapd.bench.Benchmark.java
String executeQuery(String sql, int expected, int iterations, int queryNum) { Connection conn = null;//from w w w .j ava 2 s.c o m Statement stmt = null; Long firstExecute = 0l; Long firstJdbc = 0l; Long firstIterate = 0l; DescriptiveStatistics statsExecute = new DescriptiveStatistics(); DescriptiveStatistics statsJdbc = new DescriptiveStatistics(); DescriptiveStatistics statsIterate = new DescriptiveStatistics(); DescriptiveStatistics statsTotal = new DescriptiveStatistics(); long totalTime = 0; try { //Open a connection logger.debug("Connecting to database url :" + url); conn = DriverManager.getConnection(url, iUser, iPasswd); long startTime = System.currentTimeMillis(); for (int loop = 0; loop < iterations; loop++) { //Execute a query stmt = conn.createStatement(); long timer = System.currentTimeMillis(); ResultSet rs = stmt.executeQuery(sql); long executeTime = 0; long jdbcTime = 0; // gather internal execute time for MapD as we are interested in that if (driver.equals(JDBC_DRIVER)) { executeTime = stmt.getQueryTimeout(); jdbcTime = (System.currentTimeMillis() - timer) - executeTime; } else { jdbcTime = (System.currentTimeMillis() - timer); executeTime = 0; } // this is fake to get our intenal execute time. logger.debug("Query Timeout/AKA internal Execution Time was " + stmt.getQueryTimeout() + " ms Elapsed time in JVM space was " + (System.currentTimeMillis() - timer) + "ms"); timer = System.currentTimeMillis(); //Extract data from result set int resultCount = 0; while (rs.next()) { Object obj = rs.getObject(1); if (obj != null && obj.equals(statsExecute)) { logger.info("Impossible"); } resultCount++; } long iterateTime = (System.currentTimeMillis() - timer); if (resultCount != expected) { logger.error("Expect " + expected + " actual " + resultCount + " for query " + sql); // don't run anymore break; } if (loop == 0) { firstJdbc = jdbcTime; firstExecute = executeTime; firstIterate = iterateTime; } else { statsJdbc.addValue(jdbcTime); statsExecute.addValue(executeTime); statsIterate.addValue(iterateTime); statsTotal.addValue(jdbcTime + executeTime + iterateTime); } //Clean-up environment rs.close(); stmt.close(); } totalTime = System.currentTimeMillis() - startTime; conn.close(); } catch (SQLException se) { //Handle errors for JDBC se.printStackTrace(); } catch (Exception e) { //Handle errors for Class.forName e.printStackTrace(); } finally { //finally block used to close resources try { if (stmt != null) { stmt.close(); } } catch (SQLException se2) { } // nothing we can do try { if (conn != null) { conn.close(); } } catch (SQLException se) { se.printStackTrace(); } //end finally try } //end try return String.format(lineDescriptor, queryNum, statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate); }
From source file:com.mapd.bench.BenchmarkCloud.java
String executeQuery(Connection conn1, String qid, String sql, int iterations) { Statement stmt = null;/* ww w . j ava 2s . c o m*/ Connection conn = getConnection(url, iUser, iPasswd); Long firstExecute = 0l; Long firstJdbc = 0l; Long firstIterate = 0l; DescriptiveStatistics statsExecute = new DescriptiveStatistics(); DescriptiveStatistics statsJdbc = new DescriptiveStatistics(); DescriptiveStatistics statsIterate = new DescriptiveStatistics(); DescriptiveStatistics statsTotal = new DescriptiveStatistics(); long totalTime = 0; int resultCount = 0; try { long startTime = System.currentTimeMillis(); for (int loop = 0; loop < iterations; loop++) { //Execute a query stmt = conn.createStatement(); long timer = System.currentTimeMillis(); if (loop == 0) { System.out.println(String.format("Query Id is %s : query is '%s'", qid, sql)); } ResultSet rs = stmt.executeQuery(sql); long executeTime = 0; long jdbcTime = 0; // gather internal execute time for MapD as we are interested in that if (driver.equals(JDBC_DRIVER)) { executeTime = stmt.getQueryTimeout(); jdbcTime = (System.currentTimeMillis() - timer) - executeTime; } else { jdbcTime = (System.currentTimeMillis() - timer); executeTime = 0; } // this is fake to get our intenal execute time. logger.debug("Query Timeout/AKA internal Execution Time was " + stmt.getQueryTimeout() + " ms Elapsed time in JVM space was " + (System.currentTimeMillis() - timer) + "ms"); timer = System.currentTimeMillis(); //Extract data from result set resultCount = 0; while (rs.next()) { Object obj = rs.getObject(1); if (obj != null && obj.equals(statsExecute)) { logger.info("Impossible"); } resultCount++; } long iterateTime = (System.currentTimeMillis() - timer); // if (resultCount != expected) { // logger.error("Expect " + expected + " actual " + resultCount + " for query " + sql); // // don't run anymore // break; // } if (loop == 0) { firstJdbc = jdbcTime; firstExecute = executeTime; firstIterate = iterateTime; } else { statsJdbc.addValue(jdbcTime); statsExecute.addValue(executeTime); statsIterate.addValue(iterateTime); statsTotal.addValue(jdbcTime + executeTime + iterateTime); } //Clean-up environment rs.close(); stmt.close(); } totalTime = System.currentTimeMillis() - startTime; conn.close(); } catch (SQLException se) { //Handle errors for JDBC se.printStackTrace(); System.exit(4); } catch (Exception e) { //Handle errors for Class.forName e.printStackTrace(); System.exit(3); } finally { //finally block used to close resources try { if (stmt != null) { stmt.close(); } } catch (SQLException se2) { } // nothing we can do try { if (conn != null) { conn.close(); } } catch (SQLException se) { se.printStackTrace(); System.exit(6); } //end finally try } //end try // write it to the db here as well String insertPart = String.format(insertDescriptor, this.rid, this.rTimestamp, url, this.driver, label, gpuCount, this.tableName, qid, resultCount, "", statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate, targetDBVersion); LResult.add("Insert into results values " + insertPart); return String.format(lineDescriptor, qid, statsTotal.getMean(), statsTotal.getMin(), statsTotal.getMax(), statsTotal.getPercentile(85), statsExecute.getMean(), statsExecute.getMin(), statsExecute.getMax(), statsExecute.getPercentile(85), statsExecute.getPercentile(25), statsExecute.getStandardDeviation(), statsJdbc.getMean(), statsJdbc.getMin(), statsJdbc.getMax(), statsJdbc.getPercentile(85), statsIterate.getMean(), statsIterate.getMin(), statsIterate.getMax(), statsIterate.getPercentile(85), firstExecute, firstJdbc, firstIterate, iterations, totalTime, (long) statsTotal.getSum() + firstExecute + firstJdbc + firstIterate); }
From source file:org.apache.metron.common.math.stats.OnlineStatisticsProviderTest.java
public static void validateStatisticsProvider(StatisticsProvider statsProvider, SummaryStatistics summaryStats, DescriptiveStatistics stats) { //N/*w w w . j a v a2s .c o m*/ Assert.assertEquals(statsProvider.getCount(), stats.getN()); //sum Assert.assertEquals(statsProvider.getSum(), stats.getSum(), 1e-3); //sum of squares Assert.assertEquals(statsProvider.getSumSquares(), stats.getSumsq(), 1e-3); //sum of squares Assert.assertEquals(statsProvider.getSumLogs(), summaryStats.getSumOfLogs(), 1e-3); //Mean Assert.assertEquals(statsProvider.getMean(), stats.getMean(), 1e-3); //Quadratic Mean Assert.assertEquals(statsProvider.getQuadraticMean(), summaryStats.getQuadraticMean(), 1e-3); //SD Assert.assertEquals(statsProvider.getStandardDeviation(), stats.getStandardDeviation(), 1e-3); //Variance Assert.assertEquals(statsProvider.getVariance(), stats.getVariance(), 1e-3); //Min Assert.assertEquals(statsProvider.getMin(), stats.getMin(), 1e-3); //Max Assert.assertEquals(statsProvider.getMax(), stats.getMax(), 1e-3); //Kurtosis Assert.assertEquals(stats.getKurtosis(), statsProvider.getKurtosis(), 1e-3); //Skewness Assert.assertEquals(stats.getSkewness(), statsProvider.getSkewness(), 1e-3); for (double d = 10.0; d < 100.0; d += 10) { //This is a sketch, so we're a bit more forgiving here in our choice of \epsilon. Assert.assertEquals("Percentile mismatch for " + d + "th %ile", statsProvider.getPercentile(d), stats.getPercentile(d), 1e-2); } }
From source file:org.apache.metron.common.stellar.benchmark.Microbenchmark.java
public static String describe(DescriptiveStatistics stats, Double[] percentiles) { StringBuilder sb = new StringBuilder(); sb.append(String.format("round: mean of %dms [+-%d], measured %d rounds;\n", (long) stats.getMean(), (long) stats.getStandardDeviation(), stats.getN())); sb.append("\tMin - " + (long) stats.getMin() + "\n"); for (double pctile : percentiles) { sb.append("\t" + pctile + " - " + stats.getPercentile(pctile) + "\n"); }//from w w w . jav a2s .c o m sb.append("\tMax - " + (long) stats.getMax()); return sb.toString(); }
From source file:org.apache.metron.statistics.StatisticalBinningPerformanceDriver.java
public static void main(String... argv) { DescriptiveStatistics perfStats = new DescriptiveStatistics(); OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider(); List<Double> values = new ArrayList<>(); GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(0L)); for (int i = 0; i < NUM_DATA_POINTS; ++i) { //get the data point out of the [0,1] range double d = 1000 * gaussian.nextNormalizedDouble(); values.add(d);//from www. j a va 2 s . c o m statsProvider.addValue(d); } for (int perfRun = 0; perfRun < NUM_RUNS; ++perfRun) { StellarStatisticsFunctions.StatsBin bin = new StellarStatisticsFunctions.StatsBin(); long start = System.currentTimeMillis(); Random r = new Random(0); for (int i = 0; i < TRIALS_PER_RUN; ++i) { //grab a random value and fuzz it a bit so we make sure there's no cheating via caching in t-digest. bin.apply(ImmutableList.of(statsProvider, values.get(r.nextInt(values.size())) - 3.5, PERCENTILES)); } perfStats.addValue(System.currentTimeMillis() - start); } System.out.println("Min/25th/50th/75th/Max Milliseconds: " + perfStats.getMin() + " / " + perfStats.getPercentile(25) + " / " + perfStats.getPercentile(50) + " / " + perfStats.getPercentile(75) + " / " + perfStats.getMax()); }
From source file:org.cirdles.calamari.algorithms.TukeyBiweight.java
/** * Calculates arithmetic median of array of doubles. * * @pre values has one element/*from w ww. ja va2s . co m*/ * @param values * @return */ public static double calculateMedian(double[] values) { double median; // enforce precondition if (values.length == 0) { median = 0.0; } else { DescriptiveStatistics stats = new DescriptiveStatistics(); // Add the data from the array for (int i = 0; i < values.length; i++) { stats.addValue(values[i]); } median = stats.getPercentile(50); } return median; }
From source file:org.cirdles.ludwig.isoplot3.Means.java
/** * Ludwig's WeightedAverage and assumes ConstExtErr = true since all * possible values are returned and caller can decide * * @param inValues as double[] with length nPts * @param inErrors as double[] with length nPts * @param canReject//from w w w .jav a 2 s . c o m * @param canTukeys * @return double[7][]{mean, sigmaMean, err68, err95, MSWD, probability, externalFlag}, {values * with rejected as 0.0}. externalFlag = 1.0 for external uncertainty, 0.0 for internal */ public static double[][] weightedAverage(double[] inValues, double[] inErrors, boolean canReject, boolean canTukeys) { double[] values = inValues.clone(); double[] errors = inErrors.clone(); double[][] retVal = new double[][] { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, {} }; // check precondition of same size values and errors and at least 3 points int nPts = values.length; int nN = nPts; int count = 0; // where does this come from?? boolean hardRej = false; if ((nPts == errors.length) && nPts > 2) { // proceed double[] inverseVar = new double[nPts]; double[] wtdResid = new double[nPts]; double[] yy; double[] iVarY; double[] tbX = new double[nPts]; double[][] wRejected = new double[nPts][2]; for (int i = 0; i < nPts; i++) { inverseVar[i] = 1.0 / Math.pow(errors[i], 2); } double intMean = 0.0; double MSWD = 0.0; double intSigmaMean = 0.0; double probability = 0.0; double intMeanErr95 = 0.0; double intErr68 = 0.0; double extMean = 0.0; double extMeanErr95 = 0.0; double extMeanErr68 = 0.0; double extSigma = 0.0; double biWtMean = 0.0; double biWtSigma = 0.0; boolean reCalc; // entry point for RECALC goto - consider another private method? do { reCalc = false; extSigma = 0.0; double weight = 0.0; double sumWtdRatios = 0.0; double q = 0.0; count++; for (int i = 0; i < nPts; i++) { if (values[i] * errors[i] != 0.0) { weight += inverseVar[i]; sumWtdRatios += inverseVar[i] * values[i]; q += inverseVar[i] * Math.pow(values[i], 2); } } int nU = nN - 1;// ' Deg. freedom TDistribution studentsT = new TDistribution(nU); // see https://stackoverflow.com/questions/21730285/calculating-t-inverse // for explanation of cutting the tail mass in two to get agreement with Excel two-tail double t68 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.6826) / 2.0)); double t95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2)); intMean = sumWtdRatios / weight;// ' "Internal" error of wtd average double sums = 0.0; for (int i = 0; i < nPts; i++) { if (values[i] * errors[i] != 0.0) { double resid = values[i] - intMean;// ' Simple residual wtdResid[i] = resid / errors[i];// ' Wtd residual double wtdR2 = Math.pow(wtdResid[i], 2);//' Square of wtd residual sums += wtdR2; } } sums = Math.max(sums, 0.0); MSWD = sums / nU;// ' Mean square of weighted deviates intSigmaMean = Math.sqrt(1.0 / weight); // http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/distribution/FDistribution.html FDistribution fdist = new FDistribution(nU, 1E9); probability = 1.0 - fdist.cumulativeProbability(MSWD);// ChiSquare(.MSWD, (nU)) intMeanErr95 = intSigmaMean * (double) (probability >= 0.3 ? 1.96 : t95 * Math.sqrt(MSWD)); intErr68 = intSigmaMean * (double) (probability >= 0.3 ? 0.9998 : t68 * Math.sqrt(MSWD)); extMean = 0.0; extMeanErr95 = 0.0; extMeanErr68 = 0.0; // need to find external uncertainty List<Double> yyList = new ArrayList<>(); List<Double> iVarYList = new ArrayList<>(); if ((probability < SQUID_MINIMUM_PROBABILITY) && (MSWD > 1.0)) { // Find the MLE constant external variance nN = 0; for (int i = 0; i < nPts; i++) { if (values[i] != 0.0) { yyList.add(values[i]); iVarYList.add(errors[i] * errors[i]); nN++; } } // resize arrays yy = yyList.stream().mapToDouble(Double::doubleValue).toArray(); iVarY = iVarYList.stream().mapToDouble(Double::doubleValue).toArray(); // call secant method double[] wtdExtRtsec = wtdExtRTSEC(0, 10.0 * intSigmaMean * intSigmaMean, yy, iVarY); // check for failure if (wtdExtRtsec[3] == 0.0) { extMean = wtdExtRtsec[1]; extSigma = Math.sqrt(wtdExtRtsec[0]); studentsT = new TDistribution(2 * nN - 2); extMeanErr95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2.0)) * wtdExtRtsec[2]; } else if (MSWD > 4.0) { //Failure of RTSEC algorithm because of extremely high MSWD DescriptiveStatistics stats = new DescriptiveStatistics(yy); extSigma = stats.getStandardDeviation(); extMean = stats.getMean(); extMeanErr95 = t95 * extSigma / Math.sqrt(nN); } else { extSigma = 0.0; extMean = 0.0; extMeanErr95 = 0.0; } extMeanErr68 = t68 / t95 * extMeanErr95; } if (canReject && (probability < SQUID_MINIMUM_PROBABILITY)) { // GOSUB REJECT double wtdAvg = 0.0; if (extSigma != 0.0) { wtdAvg = extMean; } else { wtdAvg = intMean; } // reject outliers int n0 = nN; for (int i = 0; i < nPts; i++) { if ((values[i] != 0.0) && (nN > 0.85 * nPts)) { // Reject no more than 30% of ratios // Start rej. tolerance at 2-sigma, increase slightly each pass. double pointError = 2.0 * Math.sqrt(errors[i] * errors[i] + extSigma * extSigma); // 2-sigma error of point being tested double totalError = Math .sqrt(pointError * pointError + (4.0 * extMeanErr68 * extMeanErr68)); // 1st-pass tolerance is 2-sigma; 2nd is 2.25-sigma; 3rd is 2.5-sigma. double tolerance = (1.0 + (double) (count - 1.0) / 4.0) * totalError; if (hardRej) { tolerance = tolerance * 1.25; } // 1st-pass tolerance is 2-sigma; 2nd is 2.5-sigma; 3rd is 3-sigma... q = values[i] - wtdAvg; if ((Math.abs(q) > tolerance) && nN > 2) { nN--; wRejected[i][0] = values[i]; values[i] = 0.0; wRejected[i][1] = errors[i]; errors[i] = 0.0; } // check tolerance } // Reject no more than 30% of ratios } // nPts loop reCalc = (nN < n0); } // canReject test } while (reCalc); if (canTukeys) { // March 2018 not finished as not sure where used System.arraycopy(values, 0, tbX, 0, nPts); double[] tukey = SquidMathUtils.tukeysBiweight(tbX, 6); biWtMean = tukey[0]; biWtSigma = tukey[1]; DescriptiveStatistics stats = new DescriptiveStatistics(tbX); double biWtErr95Median = stats.getPercentile(50); double median = median(tbX); double medianConf = medianConfLevel(nPts); double medianPlusErr = medianUpperLim(tbX) - median; double medianMinusErr = median - medianLowerLim(tbX); } // determine whether to return internal or external if (extMean != 0.0) { retVal = new double[][] { { extMean, extSigma, extMeanErr68, extMeanErr95, MSWD, probability, 1.0 }, // contains zeroes for each reject values }; } else { retVal = new double[][] { { intMean, intSigmaMean, intErr68, intMeanErr95, MSWD, probability, 0.0 }, // contains zeroes for each reject values }; } } return retVal; }
From source file:org.cirdles.ludwig.isoplot3.Means.java
/** * Upper error on median of double[] values * * @param values// w w w .ja va2s . c om * @return double median upper error */ public static double medianUpperLim(double[] values) { double retVal; // Table from Rock et al, based on Sign test & table of binomial probs for a ranked data-set. double[] uR = new double[] { 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8 }; int n = values.length; double u = 0.0; if (n > 25) { u = 0.5 * (n + 1.0 - 1.96 * Math.sqrt(n)); } else if (n > 2) { u = uR[n - 3]; // because 0-based array } DescriptiveStatistics stats = new DescriptiveStatistics(values); retVal = stats.getPercentile((n - u + 1) / n); // vba = App.Large(v, u); return retVal; }
From source file:org.cirdles.ludwig.isoplot3.Means.java
/** * Lower error on median of double[] values * * @param values//from w ww . ja v a2 s .com * @return */ public static double medianLowerLim(double[] values) { double retVal; // Table from Rock et al, based on Sign test & table of binomial probs for a ranked data-set. double[] lR = new double[] { 3, 4, 5, 6, 7, 07, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 14, 15, 16, 16, 17, 18, 18 }; int n = values.length; double u = 0.0; double l = 0.0; if (n > 25) { u = 0.5 * (n + 1.0 - 1.96 * Math.sqrt(n)); l = n + 1 - u; } else if (n > 2) { l = lR[n - 3]; // because 0-based array } DescriptiveStatistics stats = new DescriptiveStatistics(values); retVal = stats.getPercentile((n - l + 1) / n); // vba = App.Large(v, l); return retVal; }
From source file:org.cirdles.ludwig.squid25.Utilities.java
/** * Calculates arithmetic median of array of doubles. * * @param values a double[] array of values * @return median as double/* w ww .ja v a 2 s .c om*/ * @pre values has at least one element */ public static double median(double[] values) { double median; // enforce precondition if (values.length == 0) { median = 0.0; } else { DescriptiveStatistics stats = new DescriptiveStatistics(values); median = stats.getPercentile(50); } return median; }