Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getValues.

Prototype

public double[] getValues() 

Source Link

Document

Returns the current set of values in an array of double primitives.

Usage

From source file:fr.ens.transcriptome.aozan.util.StatisticsUtils.java

/**
 * Print dataset./*from w ww .  j av a2  s .  c  o m*/
 * @return string of dataset
 */
private String printValues(final DescriptiveStatistics stat) {
    final StringBuilder s = new StringBuilder();

    for (final double d : stat.getValues()) {
        s.append(d);
        s.append("\n");
    }

    return s.toString();
}

From source file:com.insightml.models.meta.VoteModel.java

private double resolve(final DescriptiveStatistics stats) {
    switch (strategy) {
    case AVERAGE:
        return stats.getMean();
    case MEDIAN:/*from ww  w  .  j  a  v a  2 s  . c om*/
        return stats.getPercentile(50);
    case GEOMETRIC:
        return stats.getGeometricMean();
    case HARMONIC:
        double sum = 0;
        for (final double value : stats.getValues()) {
            sum += 1 / value;
        }
        return stats.getN() * 1.0 / sum;
    default:
        throw new IllegalStateException();
    }
}

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    long start, end;
    boolean fullScan = true;

    boolean batchRead = true;
    boolean singleRead = true;

    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY,
            "benchmarking");
    com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(
            heapBuffer, numDocs, numBits, signed);

    if (fullScan) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        RandomAccessFile raf = new RandomAccessFile(file, "r");
        raf.getChannel().read(buffer);//from   ww w  . j ava  2  s  .  c  o m
        raf.close();
        int[] input = new int[numBits];
        int[] output = new int[32];
        int numBatches = (numDocs + 31) / 32;
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numBatches; i++) {
                for (int j = 0; j < numBits; j++) {
                    input[j] = buffer.getInt(i * numBits * 4 + j * 4);
                }
                BitPacking.fastunpack(input, 0, output, 0, numBits);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 full scan stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (singleRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int value = reader.getInt(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 sequential single read for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (batchRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        int batchSize = Math.min(5000, numDocs);
        int[] output = new int[batchSize];
        int[] rowIds = new int[batchSize];

        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            int rowId = 0;
            while (rowId < numDocs) {
                int length = Math.min(batchSize, numDocs - rowId);
                for (int i = 0; i < length; i++) {
                    rowIds[i] = rowId + i;
                }
                reader.getIntBatch(rowIds, output, length);
                rowId = rowId + length;
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v2 sequential batch read stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();

}

From source file:io.yields.math.framework.data.DataProvidersTest.java

@Explore(name = "check distributional properties of random numbers", dataProvider = DataProviders.FixedMersenneTwisterDataProvider.class, nrOfRuns = 10000)
@Exploration(name = "2D uniform samples", context = FunctionExplorerWithoutProperties.class, group = "data providers")
public void testRandomDistribution(Explorer<Pair> explorer) {
    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
    DescriptiveStatistics xStats = new DescriptiveStatistics();
    DescriptiveStatistics yStats = new DescriptiveStatistics();
    explorer.all().forEach(result -> {
        Pair pair = result.getFunctionOutcome().orElse(new Pair());
        xStats.addValue(pair.getX1());/*from   w w  w .ja  v a2  s  .  c o  m*/
        yStats.addValue(pair.getX2());
    });
    DescriptiveStatistics cross = new DescriptiveStatistics();
    for (int i = 0; i < xStats.getN(); i++) {
        cross.addValue((xStats.getValues()[i] - .5) * (yStats.getValues()[i] - .5));
    }
    /**
     * x and y should be uniformly distributed
     */
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), xStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    assertThat(ksTest.kolmogorovSmirnovStatistic(new UniformRealDistribution(0, 1), yStats.getValues()))
            .isEqualTo(0, Delta.delta(.015));
    /**
     * and have zero correlation
     */
    assertThat(cross.getMean()).isEqualTo(0, Delta.delta(.05));
}

From source file:iac_soap.statsq.NormVerdService.java

@Override
public NormVerdResponse calculateNormVerd(List<Double> data) throws MyFault {

    //Service Requirements 
    if (data.isEmpty()) {
        throw new MyFault("No data is provided");
    } else if (data.size() < 2) {
        throw new MyFault("A minimum of two data elements is required.");
    }//from w  w w. j a v a  2s. co m

    //Declaring Apache Commons DescriptiveStatistics
    DescriptiveStatistics stats = new DescriptiveStatistics();

    //Filling DescriptiveStatistics class with the provided dataset
    for (int i = 0; i < data.size(); i++) {
        stats.addValue(data.get(i));
    }

    //Let the DescriptiveStatistics class calculate the mean and standard deviation
    double mean = stats.getMean();
    double std = stats.getStandardDeviation();

    //Implementing the KolmogorovSmirnov test & calculating the kurtosis and skewness
    NormalDistribution x = new NormalDistribution(mean, std);
    double p_value = TestUtils.kolmogorovSmirnovTest(x, stats.getValues(), false);
    double kurtosis = stats.getKurtosis();
    double skewness = stats.getSkewness();
    boolean result = false;

    //Check if the dataset is a normal distribution:
    //KolmogorovSmirnov p_value should be >= 0.05
    //Both kurtosis and skewness should be between -2.0 and 2.0
    if (kurtosis < 2.0 && kurtosis > -2.0 && skewness < 2.0 && skewness > -2.0 && p_value >= 0.05) {
        result = true;
    }

    //Response message:
    NormVerdResponse nvr = new NormVerdResponse(result, p_value, kurtosis, skewness);

    return nvr;
}

From source file:gdsc.smlm.ij.plugins.PSFEstimator.java

private void getPairedP(DescriptiveStatistics sample1, DescriptiveStatistics sample2, int i, double[] p,
        boolean[] identical) throws IllegalArgumentException {
    if (sample1.getN() < 2)
        return;// w ww .  java 2  s . c  o  m

    // The number returned is the smallest significance level at which one can reject the null 
    // hypothesis that the mean of the paired differences is 0 in favor of the two-sided alternative 
    // that the mean paired difference is not equal to 0. For a one-sided test, divide the returned value by 2
    p[i] = TestUtils.pairedTTest(sample1.getValues(), sample2.getValues());
    identical[i] = (p[i] > settings.pValue);
}

From source file:gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) {
    long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions,
            RoundingMode.CEILING);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;/*from  w w  w . jav a2  s  .  c  o m*/
    Iterator<HistogramGroup> it = groups.iterator();
    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        // Move the candidate to a new bucket if the attempted total is 2x of interval
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    // If the last group is used as the last partition point
    if (count == 0) {
        // Exchange the last partition point with global high watermark
        partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark));
    } else {
        // Summarize last group
        statistics.addValue(count);
        // Add global high watermark as last point
        partitionPoints.add(Long.toString(expectedHighWatermark));
    }

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions,
        long lowWatermark, long expectedHighWatermark) {
    int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;// w w  w  . j a va 2s.  c  om
    Iterator<HistogramGroup> it = groups.iterator();

    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        /**
         * Using greedy algorithm by keep adding group until it exceeds the interval size (x2)
         * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group,
         * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size)
         * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient.
         */
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    if (partitionPoints.isEmpty()) {
        throw new RuntimeException("Unexpected empty partition list");
    }

    if (count > 0) {
        // Summarize last group
        statistics.addValue(count);
    }

    // Add global high watermark as last point
    partitionPoints.add(Long.toString(expectedHighWatermark));

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java

/**
 * Get new statistics that excludes values beyond mean +/- 2 * stdev
 *
 * @param data Source data/*from   w w w. j a  v  a2s . co  m*/
 * @return new {@link @DescriptiveStatistics objects with sanitized data}
 */
private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) {
    double meanValue = data.getMean();
    double delta = MARGIN * meanValue;
    double minVal = meanValue - delta;
    double maxVal = meanValue + delta;
    return new DescriptiveStatistics(
            Arrays.stream(data.getValues()).filter(x -> x > minVal && x < maxVal).toArray());
}

From source file:org.apache.hadoop.hive.metastore.tools.BenchmarkSuite.java

/**
 * Get median value for given statistics.
 * @param data collected datapoints.//from   w  w  w  .j a  v a  2s .  c  o  m
 * @return median value.
 */
private static double median(@NotNull DescriptiveStatistics data) {
    return new Median().evaluate(data.getValues());
}