List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues
public double[] getValues()
From source file:fr.ens.transcriptome.aozan.util.StatisticsUtils.java
/** * Print dataset./*from w ww . j av a2 s . c o m*/ * @return string of dataset */ private String printValues(final DescriptiveStatistics stat) { final StringBuilder s = new StringBuilder(); for (final double d : stat.getValues()) { s.append(d); s.append("\n"); } return s.toString(); }
From source file:com.insightml.models.meta.VoteModel.java
private double resolve(final DescriptiveStatistics stats) { switch (strategy) { case AVERAGE: return stats.getMean(); case MEDIAN:/*from ww w . j a v a 2 s . c om*/ return stats.getPercentile(50); case GEOMETRIC: return stats.getGeometricMean(); case HARMONIC: double sum = 0; for (final double value : stats.getValues()) { sum += 1 / value; } return stats.getN() * 1.0 / sum; default: throw new IllegalStateException(); } }
From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java
public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception { boolean signed = false; boolean isMmap = false; long start, end; boolean fullScan = true; boolean batchRead = true; boolean singleRead = true; PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking"); com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader( heapBuffer, numDocs, numBits, signed); if (fullScan) { DescriptiveStatistics stats = new DescriptiveStatistics(); ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length()); RandomAccessFile raf = new RandomAccessFile(file, "r"); raf.getChannel().read(buffer);//from ww w . j ava 2 s . c o m raf.close(); int[] input = new int[numBits]; int[] output = new int[32]; int numBatches = (numDocs + 31) / 32; for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numBatches; i++) { for (int j = 0; j < numBits; j++) { input[j] = buffer.getInt(i * numBits * 4 + j * 4); } BitPacking.fastunpack(input, 0, output, 0, numBits); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println(" v2 full scan stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } if (singleRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); // sequential read for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int value = reader.getInt(i); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println(" v2 sequential single read for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } if (batchRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); int batchSize = Math.min(5000, numDocs); int[] output = new int[batchSize]; int[] rowIds = new int[batchSize]; // sequential read for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); int rowId = 0; while (rowId < numDocs) { int length = Math.min(batchSize, numDocs - rowId); for (int i = 0; i < length; i++) { rowIds[i] = rowId + i; } reader.getIntBatch(rowIds, output, length); rowId = rowId + length; } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println("v2 sequential batch read stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } reader.close(); }
From source file:io.yields.math.framework.data.DataProvidersTest.java
@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000) @Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers") public void testRandomDistribution(Explorer<Pair> explorer) { KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest(); DescriptiveStatistics xStats = new DescriptiveStatistics(); DescriptiveStatistics yStats = new DescriptiveStatistics(); explorer.all().forEach(result -> { Pair pair = result.getFunctionOutcome().orElse(new Pair()); xStats.addValue(pair.getX1());/*from w w w .ja v a2 s . c o m*/ yStats.addValue(pair.getX2()); }); DescriptiveStatistics cross = new DescriptiveStatistics(); for (int i = 0; i < xStats.getN(); i++) { cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5)); } /** * x and y should be uniformly distributed */ assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues())) .isEqualTo(0, Delta.delta(.015)); assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues())) .isEqualTo(0, Delta.delta(.015)); /** * and have zero correlation */ assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05)); }
From source file:iac_soap.statsq.NormVerdService.java
@Override public NormVerdResponse calculateNormVerd(List<Double> data) throws MyFault { //Service Requirements if (data.isEmpty()) { throw new MyFault("No data is provided"); } else if (data.size() < 2) { throw new MyFault("A minimum of two data elements is required."); }//from w w w. j a v a 2s. co m //Declaring Apache Commons DescriptiveStatistics DescriptiveStatistics stats = new DescriptiveStatistics(); //Filling DescriptiveStatistics class with the provided dataset for (int i = 0; i < data.size(); i++) { stats.addValue(data.get(i)); } //Let the DescriptiveStatistics class calculate the mean and standard deviation double mean = stats.getMean(); double std = stats.getStandardDeviation(); //Implementing the KolmogorovSmirnov test & calculating the kurtosis and skewness NormalDistribution x = new NormalDistribution(mean, std); double p_value = TestUtils.kolmogorovSmirnovTest(x, stats.getValues(), false); double kurtosis = stats.getKurtosis(); double skewness = stats.getSkewness(); boolean result = false; //Check if the dataset is a normal distribution: //KolmogorovSmirnov p_value should be >= 0.05 //Both kurtosis and skewness should be between -2.0 and 2.0 if (kurtosis < 2.0 && kurtosis > -2.0 && skewness < 2.0 && skewness > -2.0 && p_value >= 0.05) { result = true; } //Response message: NormVerdResponse nvr = new NormVerdResponse(result, p_value, kurtosis, skewness); return nvr; }
From source file:gdsc.smlm.ij.plugins.PSFEstimator.java
private void getPairedP(DescriptiveStatistics sample1, DescriptiveStatistics sample2, int i, double[] p, boolean[] identical) throws IllegalArgumentException { if (sample1.getN() < 2) return;// w ww . java 2 s . c o m // The number returned is the smallest significance level at which one can reject the null // hypothesis that the mean of the paired differences is 0 in favor of the two-sided alternative // that the mean paired difference is not equal to 0. For a one-sided test, divide the returned value by 2 p[i] = TestUtils.pairedTTest(sample1.getValues(), sample2.getValues()); identical[i] = (p[i] > settings.pValue); }
From source file:gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) { long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions, RoundingMode.CEILING); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;/*from w w w . jav a2 s . c o m*/ Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } // Move the candidate to a new bucket if the attempted total is 2x of interval if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } // If the last group is used as the last partition point if (count == 0) { // Exchange the last partition point with global high watermark partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark)); } else { // Summarize last group statistics.addValue(count); // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); } log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }
From source file:org.apache.gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions, long lowWatermark, long expectedHighWatermark) { int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;// w w w . j a va 2s. c om Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } /** * Using greedy algorithm by keep adding group until it exceeds the interval size (x2) * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group, * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size) * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient. */ if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } if (partitionPoints.isEmpty()) { throw new RuntimeException("Unexpected empty partition list"); } if (count > 0) { // Summarize last group statistics.addValue(count); } // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }
From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java
/** * Get new statistics that excludes values beyond mean +/- 2 * stdev * * @param data Source data/*from w w w. j a v a2s . co m*/ * @return new {@link @DescriptiveStatistics objects with sanitized data} */ private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) { double meanValue = data.getMean(); double delta = MARGIN * meanValue; double minVal = meanValue - delta; double maxVal = meanValue + delta; return new DescriptiveStatistics( Arrays.stream(data.getValues()).filter(x -> x > minVal && x < maxVal).toArray()); }
From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java
/** * Get median value for given statistics. * @param data collected datapoints.//from w w w .j a v a 2s . c o m * @return median value. */ private static double median(@NotNull DescriptiveStatistics data) { return new Median().evaluate(data.getValues()); }