Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues.

Prototype

public double[] getValues()

Source Link

Document

Returns the current set of values in an array of double primitives.

Usage

From source file:fr.ens.transcriptome.aozan.util.StatisticsUtils.java

/**
 * Print dataset./*from w ww .  j av a2  s .  c  o m*/
 * @return string of dataset
 */
private String printValues(final DescriptiveStatistics stat) {
    final StringBuilder s = new StringBuilder();

    for (final double d : stat.getValues()) {
        s.append(d);
        s.append("\n");
    }

    return s.toString();
}

From source file:com.insightml.models.meta.VoteModel.java

private double resolve(final DescriptiveStatistics stats) {
    switch (strategy) {
    case AVERAGE:
        return stats.getMean();
    case MEDIAN:/*from ww  w  .  j  a  v a  2 s  . c om*/
        return stats.getPercentile(50);
    case GEOMETRIC:
        return stats.getGeometricMean();
    case HARMONIC:
        double sum = 0;
        for (final double value : stats.getValues()) {
            sum += 1 / value;
        }
        return stats.getN() * 1.0 / sum;
    default:
        throw new IllegalStateException();
    }
}

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    long start, end;
    boolean fullScan = true;

    boolean batchRead = true;
    boolean singleRead = true;

    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY,
            "benchmarking");
    com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(
            heapBuffer, numDocs, numBits, signed);

    if (fullScan) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        RandomAccessFile raf = new RandomAccessFile(file, "r");
        raf.getChannel().read(buffer);//from   ww w  . j ava  2  s  .  c  o m
        raf.close();
        int[] input = new int[numBits];
        int[] output = new int[32];
        int numBatches = (numDocs + 31) / 32;
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numBatches; i++) {
                for (int j = 0; j < numBits; j++) {
                    input[j] = buffer.getInt(i * numBits * 4 + j * 4);
                }
                BitPacking.fastunpack(input, 0, output, 0, numBits);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 full scan stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (singleRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int value = reader.getInt(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 sequential single read for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (batchRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        int batchSize = Math.min(5000, numDocs);
        int[] output = new int[batchSize];
        int[] rowIds = new int[batchSize];

        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            int rowId = 0;
            while (rowId < numDocs) {
                int length = Math.min(batchSize, numDocs - rowId);
                for (int i = 0; i < length; i++) {
                    rowIds[i] = rowId + i;
                }
                reader.getIntBatch(rowIds, output, length);
                rowId = rowId + length;
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v2 sequential batch read stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();

}

From source file:io.yields.math.framework.data.DataProvidersTest.java

@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000)
@Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers")
public void testRandomDistribution(Explorer<Pair> explorer) {
    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
    DescriptiveStatistics xStats = new DescriptiveStatistics();
    DescriptiveStatistics yStats = new DescriptiveStatistics();
    explorer.all().forEach(result -> {
        Pair pair = result.getFunctionOutcome().orElse(new Pair());
        xStats.addValue(pair.getX1());/*from   w w  w .ja  v a2  s  .  c o  m*/
        yStats.addValue(pair.getX2());
    });
    DescriptiveStatistics cross = new DescriptiveStatistics();
    for (int i = 0; i < xStats.getN(); i++) {
        cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5));
    }
    /**
     * x and y should be uniformly distributed
     */
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    /**
     * and have zero correlation
     */
    assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05));
}

From source file:iac_soap.statsq.NormVerdService.java

@Override
public NormVerdResponse calculateNormVerd(List<Double> data) throws MyFault {

    //Service Requirements 
    if (data.isEmpty()) {
        throw new MyFault("No data is provided");
    } else if (data.size() < 2) {
        throw new MyFault("A minimum of two data elements is required.");
    }//from w  w w. j a v a  2s. co m

    //Declaring Apache Commons DescriptiveStatistics
    DescriptiveStatistics stats = new DescriptiveStatistics();

    //Filling DescriptiveStatistics class with the provided dataset
    for (int i = 0; i < data.size(); i++) {
        stats.addValue(data.get(i));
    }

    //Let the DescriptiveStatistics class calculate the mean and standard deviation
    double mean = stats.getMean();
    double std = stats.getStandardDeviation();

    //Implementing the KolmogorovSmirnov test & calculating the kurtosis and skewness
    NormalDistribution x = new NormalDistribution(mean, std);
    double p_value = TestUtils.kolmogorovSmirnovTest(x, stats.getValues(), false);
    double kurtosis = stats.getKurtosis();
    double skewness = stats.getSkewness();
    boolean result = false;

    //Check if the dataset is a normal distribution:
    //KolmogorovSmirnov p_value should be >= 0.05
    //Both kurtosis and skewness should be between -2.0 and 2.0
    if (kurtosis < 2.0 && kurtosis > -2.0 && skewness < 2.0 && skewness > -2.0 && p_value >= 0.05) {
        result = true;
    }

    //Response message:
    NormVerdResponse nvr = new NormVerdResponse(result, p_value, kurtosis, skewness);

    return nvr;
}

From source file:gdsc.smlm.ij.plugins.PSFEstimator.java

private void getPairedP(DescriptiveStatistics sample1, DescriptiveStatistics sample2, int i, double[] p,
        boolean[] identical) throws IllegalArgumentException {
    if (sample1.getN() < 2)
        return;// w ww .  java 2  s . c  o  m

    // The number returned is the smallest significance level at which one can reject the null 
    // hypothesis that the mean of the paired differences is 0 in favor of the two-sided alternative 
    // that the mean paired difference is not equal to 0. For a one-sided test, divide the returned value by 2
    p[i] = TestUtils.pairedTTest(sample1.getValues(), sample2.getValues());
    identical[i] = (p[i] > settings.pValue);
}

From source file:gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) {
    long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions,
            RoundingMode.CEILING);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;/*from  w w  w . jav a2  s  .  c  o m*/
    Iterator<HistogramGroup> it = groups.iterator();
    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        // Move the candidate to a new bucket if the attempted total is 2x of interval
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    // If the last group is used as the last partition point
    if (count == 0) {
        // Exchange the last partition point with global high watermark
        partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark));
    } else {
        // Summarize last group
        statistics.addValue(count);
        // Add global high watermark as last point
        partitionPoints.add(Long.toString(expectedHighWatermark));
    }

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions,
        long lowWatermark, long expectedHighWatermark) {
    int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;// w w  w  . j a va 2s.  c  om
    Iterator<HistogramGroup> it = groups.iterator();

    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        /**
         * Using greedy algorithm by keep adding group until it exceeds the interval size (x2)
         * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group,
         * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size)
         * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient.
         */
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    if (partitionPoints.isEmpty()) {
        throw new RuntimeException("Unexpected empty partition list");
    }

    if (count > 0) {
        // Summarize last group
        statistics.addValue(count);
    }

    // Add global high watermark as last point
    partitionPoints.add(Long.toString(expectedHighWatermark));

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java

/**
 * Get new statistics that excludes values beyond mean +/- 2 * stdev
 *
 * @param data Source data/*from   w w w. j a  v  a2s . co  m*/
 * @return new {@link @DescriptiveStatistics objects with sanitized data}
 */
private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) {
    double meanValue = data.getMean();
    double delta = MARGIN * meanValue;
    double minVal = meanValue - delta;
    double maxVal = meanValue + delta;
    return new DescriptiveStatistics(
            Arrays.stream(data.getValues()).filter(x -> x > minVal && x < maxVal).toArray());
}

From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java

/**
 * Get median value for given statistics.
 * @param data collected datapoints.//from   w  w  w  .j a  v a  2s .  c  o  m
 * @return median value.
 */
private static double median(@NotNull DescriptiveStatistics data) {
    return new Median().evaluate(data.getValues());
}