List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics toString
@Override
public String toString()
From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java
public static void singleValuedReadBenchMarkV1(File file, int numDocs, int columnSizeInBits) throws Exception { boolean signed = false; boolean isMmap = false; PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmark"); BaseSingleColumnSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader( heapBuffer, numDocs, columnSizeInBits, signed); // sequential read long start, end; DescriptiveStatistics stats = new DescriptiveStatistics(); for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int value = reader.getInt(i); }/*from ww w. j a v a 2s . com*/ end = System.currentTimeMillis(); stats.addValue(end - start); } System.out.println(" v1 sequential read stats for " + file.getName()); System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); reader.close(); heapBuffer.close(); }
From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java
public static void multiValuedReadBenchMarkV2(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception { boolean signed = false; boolean isMmap = false; boolean readOneEachTime = true; PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking"); com.linkedin.pinot.core.io.reader.impl.v2.FixedBitMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitMultiValueReader( heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed); int[] intArray = new int[maxEntriesPerDoc]; long start, end; // read one entry at a time if (readOneEachTime) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int length = reader.getIntArray(i, intArray); }//from ww w . j a va2s. com end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println("v2 multi value sequential read one stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } reader.close(); heapBuffer.close(); }
From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java
public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception { System.out.println("******************************************************************"); System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:" + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits); long start, end; boolean readFile = true; boolean randomRead = true; boolean contextualRead = true; boolean signed = false; boolean isMmap = false; PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "benchmarking"); BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader( heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed); int[] intArray = new int[maxEntriesPerDoc]; File outfile = new File("/tmp/" + file.getName() + ".raw"); FileWriter fw = new FileWriter(outfile); for (int i = 0; i < numDocs; i++) { int length = reader.getIntArray(i, intArray); StringBuilder sb = new StringBuilder(); String delim = ""; for (int j = 0; j < length; j++) { sb.append(delim);//w w w. j a v a 2 s. com sb.append(intArray[j]); delim = ","; } fw.write(sb.toString()); fw.write("\n"); } fw.close(); // sequential read if (readFile) { DescriptiveStatistics stats = new DescriptiveStatistics(); RandomAccessFile raf = new RandomAccessFile(file, "rw"); ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length()); raf.getChannel().read(buffer); for (int run = 0; run < MAX_RUNS; run++) { long length = file.length(); start = System.currentTimeMillis(); for (int i = 0; i < length; i++) { byte b = buffer.get(i); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println("v1 multi value read bytes stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); raf.close(); } if (randomRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int length = reader.getIntArray(i, intArray); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println("v1 multi value sequential read one stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } if (contextualRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int run = 0; run < MAX_RUNS; run++) { MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext(); start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int length = reader.getIntArray(i, intArray, context); } end = System.currentTimeMillis(); // System.out.println("RUN:" + run + "Time:" + (end-start)); stats.addValue((end - start)); } System.out.println("v1 multi value sequential read one with context stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } reader.close(); heapBuffer.close(); System.out.println("******************************************************************"); }
From source file:com.intuit.tank.persistence.databases.BucketDataItemTest.java
/** * Run the DescriptiveStatistics getStats() method test. * /*from www. ja va2 s .c o m*/ * @throws Exception * * @generatedBy CodePro at 9/10/14 10:32 AM */ @Test public void testGetStats_1() throws Exception { BucketDataItem fixture = new BucketDataItem(1, new Date(), new DescriptiveStatistics()); DescriptiveStatistics result = fixture.getStats(); assertNotNull(result); assertEquals( "DescriptiveStatistics:\nn: 0\nmin: NaN\nmax: NaN\nmean: NaN\nstd dev: NaN\nmedian: NaN\nskewness: NaN\nkurtosis: NaN\n", result.toString()); assertEquals(Double.NaN, result.getMax(), 1.0); assertEquals(Double.NaN, result.getVariance(), 1.0); assertEquals(Double.NaN, result.getMean(), 1.0); assertEquals(-1, result.getWindowSize()); assertEquals(0.0, result.getSumsq(), 1.0); assertEquals(Double.NaN, result.getKurtosis(), 1.0); assertEquals(0.0, result.getSum(), 1.0); assertEquals(Double.NaN, result.getSkewness(), 1.0); assertEquals(Double.NaN, result.getPopulationVariance(), 1.0); assertEquals(Double.NaN, result.getStandardDeviation(), 1.0); assertEquals(Double.NaN, result.getGeometricMean(), 1.0); assertEquals(0L, result.getN()); assertEquals(Double.NaN, result.getMin(), 1.0); }
From source file:com.linkedin.pinot.perf.ForwardIndexReaderBenchmark.java
public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception { boolean signed = false; boolean isMmap = false; long start, end; boolean fullScan = true; boolean batchRead = true; boolean singleRead = true; PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking"); com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader( heapBuffer, numDocs, numBits, signed); if (fullScan) { DescriptiveStatistics stats = new DescriptiveStatistics(); ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length()); RandomAccessFile raf = new RandomAccessFile(file, "r"); raf.getChannel().read(buffer);//from w ww. ja v a 2 s .c o m raf.close(); int[] input = new int[numBits]; int[] output = new int[32]; int numBatches = (numDocs + 31) / 32; for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numBatches; i++) { for (int j = 0; j < numBits; j++) { input[j] = buffer.getInt(i * numBits * 4 + j * 4); } BitPacking.fastunpack(input, 0, output, 0, numBits); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println(" v2 full scan stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } if (singleRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); // sequential read for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); for (int i = 0; i < numDocs; i++) { int value = reader.getInt(i); } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println(" v2 sequential single read for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } if (batchRead) { DescriptiveStatistics stats = new DescriptiveStatistics(); int batchSize = Math.min(5000, numDocs); int[] output = new int[batchSize]; int[] rowIds = new int[batchSize]; // sequential read for (int run = 0; run < MAX_RUNS; run++) { start = System.currentTimeMillis(); int rowId = 0; while (rowId < numDocs) { int length = Math.min(batchSize, numDocs - rowId); for (int i = 0; i < length; i++) { rowIds[i] = rowId + i; } reader.getIntBatch(rowIds, output, length); rowId = rowId + length; } end = System.currentTimeMillis(); stats.addValue((end - start)); } System.out.println("v2 sequential batch read stats for " + file.getName()); System.out.println( stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues())); } reader.close(); }
From source file:cc.redberry.core.tensor.BulkTestsForParser.java
@Test public void testAllExpressionsInTestDirectory() { File testDirectory = new File("src/test"); Counter c = new Counter(), m = new Counter(); DescriptiveStatistics statistics = new DescriptiveStatistics(); testParseRecurrently(testDirectory, c, m, statistics); System.out.println("Total number of lines containing parse(..): " + c.counter); System.out.println("Total number of matched and parsed lines: " + m.counter); System.out.println("Strings statistics: \n\t" + statistics.toString().replace("\n", "\n\t")); Assert.assertTrue((c.counter - m.counter) < 2); }
From source file:gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int maxPartitions, long expectedHighWatermark) { long interval = DoubleMath.roundToLong((double) histogram.totalRecordCount / maxPartitions, RoundingMode.CEILING); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;//from ww w . j a v a 2s .c o m Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } // Move the candidate to a new bucket if the attempted total is 2x of interval if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints .add(Utils.toDateTimeFormat(group.getKey(), DAY_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } // If the last group is used as the last partition point if (count == 0) { // Exchange the last partition point with global high watermark partitionPoints.set(partitionPoints.size() - 1, Long.toString(expectedHighWatermark)); } else { // Summarize last group statistics.addValue(count); // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); } log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }
From source file:org.apache.gobblin.salesforce.SalesforceSource.java
String generateSpecifiedPartitions(Histogram histogram, int minTargetPartitionSize, int maxPartitions, long lowWatermark, long expectedHighWatermark) { int interval = computeTargetPartitionSize(histogram, minTargetPartitionSize, maxPartitions); int totalGroups = histogram.getGroups().size(); log.info("Histogram total record count: " + histogram.totalRecordCount); log.info("Histogram total groups: " + totalGroups); log.info("maxPartitions: " + maxPartitions); log.info("interval: " + interval); List<HistogramGroup> groups = histogram.getGroups(); List<String> partitionPoints = new ArrayList<>(); DescriptiveStatistics statistics = new DescriptiveStatistics(); int count = 0; HistogramGroup group;//from w w w. j a v a 2 s . c om Iterator<HistogramGroup> it = groups.iterator(); while (it.hasNext()) { group = it.next(); if (count == 0) { // Add a new partition point; partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); } /** * Using greedy algorithm by keep adding group until it exceeds the interval size (x2) * Proof: Assuming nth group violates 2 x interval size, then all groups from 0th to (n-1)th, plus nth group, * will have total size larger or equal to interval x 2. Hence, we are saturating all intervals (with original size) * without leaving any unused space in between. We could choose x3,x4... but it is not space efficient. */ if (count != 0 && count + group.count >= 2 * interval) { // Summarize current group statistics.addValue(count); // A step-in start partitionPoints.add( Utils.toDateTimeFormat(group.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT)); count = group.count; } else { // Add group into current partition count += group.count; } if (count >= interval) { // Summarize current group statistics.addValue(count); // A fresh start next time count = 0; } } if (partitionPoints.isEmpty()) { throw new RuntimeException("Unexpected empty partition list"); } if (count > 0) { // Summarize last group statistics.addValue(count); } // Add global high watermark as last point partitionPoints.add(Long.toString(expectedHighWatermark)); log.info("Dynamic partitioning statistics: "); log.info("data: " + Arrays.toString(statistics.getValues())); log.info(statistics.toString()); String specifiedPartitions = Joiner.on(",").join(partitionPoints); log.info("Calculated specified partitions: " + specifiedPartitions); return specifiedPartitions; }