Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics toString

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics toString

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics toString.

Prototype

@Override
public String toString() 

Source Link

Document

Generates a text report displaying univariate statistics from values that have been added.

Usage

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void singleValuedReadBenchMarkV1(File file, int numDocs, int columnSizeInBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY,
            "benchmark");
    BaseSingleColumnSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(
            heapBuffer, numDocs, columnSizeInBits, signed);
    // sequential read
    long start, end;
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int run = 0; run < MAX_RUNS; run++) {
        start = System.currentTimeMillis();
        for (int i = 0; i < numDocs; i++) {
            int value = reader.getInt(i);
        }/*from   ww  w.  j a  v  a 2s . com*/
        end = System.currentTimeMillis();
        stats.addValue(end - start);
    }
    System.out.println(" v1 sequential read stats for " + file.getName());
    System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    reader.close();
    heapBuffer.close();
}

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void multiValuedReadBenchMarkV2(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc,
        int columnSizeInBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    boolean readOneEachTime = true;
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY,
            "benchmarking");
    com.linkedin.pinot.core.io.reader.impl.v2.FixedBitMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitMultiValueReader(
            heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);

    int[] intArray = new int[maxEntriesPerDoc];
    long start, end;

    // read one entry at a time
    if (readOneEachTime) {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int length = reader.getIntArray(i, intArray);
            }//from ww  w . j  a  va2s.  com
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v2 multi value sequential read one stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();
    heapBuffer.close();
}

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc,
        int columnSizeInBits) throws Exception {
    System.out.println("******************************************************************");
    System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:"
            + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits);
    long start, end;
    boolean readFile = true;
    boolean randomRead = true;
    boolean contextualRead = true;
    boolean signed = false;
    boolean isMmap = false;
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY,
            "benchmarking");
    BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(
            heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);

    int[] intArray = new int[maxEntriesPerDoc];
    File outfile = new File("/tmp/" + file.getName() + ".raw");
    FileWriter fw = new FileWriter(outfile);
    for (int i = 0; i < numDocs; i++) {
        int length = reader.getIntArray(i, intArray);
        StringBuilder sb = new StringBuilder();
        String delim = "";
        for (int j = 0; j < length; j++) {
            sb.append(delim);//w w  w.  j a v a 2  s.  com
            sb.append(intArray[j]);
            delim = ",";
        }
        fw.write(sb.toString());
        fw.write("\n");
    }
    fw.close();

    // sequential read
    if (readFile) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        RandomAccessFile raf = new RandomAccessFile(file, "rw");
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        raf.getChannel().read(buffer);
        for (int run = 0; run < MAX_RUNS; run++) {
            long length = file.length();
            start = System.currentTimeMillis();
            for (int i = 0; i < length; i++) {
                byte b = buffer.get(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value read bytes stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));

        raf.close();
    }
    if (randomRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int length = reader.getIntArray(i, intArray);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value sequential read one stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }

    if (contextualRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int run = 0; run < MAX_RUNS; run++) {
            MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext();
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int length = reader.getIntArray(i, intArray, context);
            }
            end = System.currentTimeMillis();
            // System.out.println("RUN:" + run + "Time:" + (end-start));
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value sequential read one with context stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));

    }
    reader.close();
    heapBuffer.close();
    System.out.println("******************************************************************");

}

From source file:com.intuit.tank.persistence.databases.BucketDataItemTest.java

/**
 * Run the DescriptiveStatistics getStats() method test.
 * /*from  www. ja va2  s  .c o m*/
 * @throws Exception
 * 
 * @generatedBy CodePro at 9/10/14 10:32 AM
 */
@Test
public void testGetStats_1() throws Exception {
    BucketDataItem fixture = new BucketDataItem(1, new Date(), new DescriptiveStatistics());

    DescriptiveStatistics result = fixture.getStats();

    assertNotNull(result);
    assertEquals(
            "DescriptiveStatistics:\nn: 0\nmin: NaN\nmax: NaN\nmean: NaN\nstd dev: NaN\nmedian: NaN\nskewness: NaN\nkurtosis: NaN\n",
            result.toString());
    assertEquals(Double.NaN, result.getMax(), 1.0);
    assertEquals(Double.NaN, result.getVariance(), 1.0);
    assertEquals(Double.NaN, result.getMean(), 1.0);
    assertEquals(-1, result.getWindowSize());
    assertEquals(0.0, result.getSumsq(), 1.0);
    assertEquals(Double.NaN, result.getKurtosis(), 1.0);
    assertEquals(0.0, result.getSum(), 1.0);
    assertEquals(Double.NaN, result.getSkewness(), 1.0);
    assertEquals(Double.NaN, result.getPopulationVariance(), 1.0);
    assertEquals(Double.NaN, result.getStandardDeviation(), 1.0);
    assertEquals(Double.NaN, result.getGeometricMean(), 1.0);
    assertEquals(0L, result.getN());
    assertEquals(Double.NaN, result.getMin(), 1.0);
}

From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java

public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    long start, end;
    boolean fullScan = true;

    boolean batchRead = true;
    boolean singleRead = true;

    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY,
            "benchmarking");
    com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(
            heapBuffer, numDocs, numBits, signed);

    if (fullScan) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        RandomAccessFile raf = new RandomAccessFile(file, "r");
        raf.getChannel().read(buffer);//from w  ww. ja  v  a 2 s .c o  m
        raf.close();
        int[] input = new int[numBits];
        int[] output = new int[32];
        int numBatches = (numDocs + 31) / 32;
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numBatches; i++) {
                for (int j = 0; j < numBits; j++) {
                    input[j] = buffer.getInt(i * numBits * 4 + j * 4);
                }
                BitPacking.fastunpack(input, 0, output, 0, numBits);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 full scan stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (singleRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int value = reader.getInt(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 sequential single read for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (batchRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        int batchSize = Math.min(5000, numDocs);
        int[] output = new int[batchSize];
        int[] rowIds = new int[batchSize];

        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            int rowId = 0;
            while (rowId < numDocs) {
                int length = Math.min(batchSize, numDocs - rowId);
                for (int i = 0; i < length; i++) {
                    rowIds[i] = rowId + i;
                }
                reader.getIntBatch(rowIds, output, length);
                rowId = rowId + length;
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v2 sequential batch read stats for " + file.getName());
        System.out.println(
                stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();

}

From source file:cc.redberry.core.tensor.BulkTestsForParser.java

@Test
public void testAllExpressionsInTestDirectory() {
    File testDirectory = new File("src/test");
    Counter c = new Counter(), m = new Counter();
    DescriptiveStatistics statistics = new DescriptiveStatistics();
    testParseRecurrently(testDirectory, c, m, statistics);
    System.out.println("Total number of lines containing parse(..): " + c.counter);
    System.out.println("Total number of matched and parsed lines: " + m.counter);
    System.out.println("Strings statistics: \n\t" + statistics.toString().replace("\n", "\n\t"));
    Assert.assertTrue((c.counter - m.counter) < 2);
}

From source file:gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) {
    long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions,
            RoundingMode.CEILING);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;//from   ww w . j a  v  a 2s  .c  o m
    Iterator<HistogramGroup> it = groups.iterator();
    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        // Move the candidate to a new bucket if the attempted total is 2x of interval
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints
                    .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    // If the last group is used as the last partition point
    if (count == 0) {
        // Exchange the last partition point with global high watermark
        partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark));
    } else {
        // Summarize last group
        statistics.addValue(count);
        // Add global high watermark as last point
        partitionPoints.add(Long.toString(expectedHighWatermark));
    }

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}

From source file:org.apache.gobblin.salesforce.SalesforceSource.java

String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions,
        long lowWatermark, long expectedHighWatermark) {
    int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions);
    int totalGroups = histogram.getGroups().size();

    log.info("Histogram total record count: " + histogram.totalRecordCount);
    log.info("Histogram total groups: " + totalGroups);
    log.info("maxPartitions: " + maxPartitions);
    log.info("interval: " + interval);

    List<HistogramGroup> groups = histogram.getGroups();
    List<String> partitionPoints = new ArrayList<>();
    DescriptiveStatistics statistics = new DescriptiveStatistics();

    int count = 0;
    HistogramGroup group;//from  w w  w.  j a  v  a  2 s .  c  om
    Iterator<HistogramGroup> it = groups.iterator();

    while (it.hasNext()) {
        group = it.next();
        if (count == 0) {
            // Add a new partition point;
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
        }

        /**
         * Using greedy algorithm by keep adding group until it exceeds the interval size (x2)
         * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group,
         * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size)
         * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient.
         */
        if (count != 0 && count + group.count >= 2 * interval) {
            // Summarize current group
            statistics.addValue(count);
            // A step-in start
            partitionPoints.add(
                    Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
            count = group.count;
        } else {
            // Add group into current partition
            count += group.count;
        }

        if (count >= interval) {
            // Summarize current group
            statistics.addValue(count);
            // A fresh start next time
            count = 0;
        }
    }

    if (partitionPoints.isEmpty()) {
        throw new RuntimeException("Unexpected empty partition list");
    }

    if (count > 0) {
        // Summarize last group
        statistics.addValue(count);
    }

    // Add global high watermark as last point
    partitionPoints.add(Long.toString(expectedHighWatermark));

    log.info("Dynamic partitioning statistics: ");
    log.info("data: " + Arrays.toString(statistics.getValues()));
    log.info(statistics.toString());
    String specifiedPartitions = Joiner.on(",").join(partitionPoints);
    log.info("Calculated specified partitions: " + specifiedPartitions);
    return specifiedPartitions;
}