List of usage examples for org.apache.hadoop.io BytesWritable getBytes
@Override public byte[] getBytes()
From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java
License:Apache License
@Test public void evaluateEmpty() { IntersectSketchUDF testObject = new IntersectSketchUDF(); BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable()); Memory mem = new NativeMemory(intermResult.getBytes()); Sketch testResult = Sketches.heapifySketch(mem); assertEquals(0.0, testResult.getEstimate()); }
From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java
License:Apache License
@Test public void evaluateValidSketch() { IntersectSketchUDF testObject = new IntersectSketchUDF(); UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024); for (int i = 0; i < 128; i++) { sketch1.update(i);//w w w. ja va 2s .co m } UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024); for (int i = 100; i < 128; i++) { sketch2.update(i); } BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray()); BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray()); BytesWritable output = testObject.evaluate(input1, input2); Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes())); assertEquals(28.0, result.getEstimate()); }
From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java
License:Apache License
@Test public void evaluateValidSketchExpicitSeed() { IntersectSketchUDF testObject = new IntersectSketchUDF(); final long seed = 1; UpdateSketch sketch1 = Sketches.updateSketchBuilder().setSeed(seed).build(1024); for (int i = 0; i < 128; i++) { sketch1.update(i);/*w w w . jav a 2s. c om*/ } UpdateSketch sketch2 = Sketches.updateSketchBuilder().setSeed(seed).build(1024); for (int i = 100; i < 128; i++) { sketch2.update(i); } BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray()); BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray()); BytesWritable output = testObject.evaluate(input1, input2, seed); Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()), seed); assertEquals(28.0, result.getEstimate()); }
From source file:com.yahoo.sketches.hive.theta.SampleSketchUDF.java
License:Apache License
/** * Main logic called by hive, produces new sketch from original using * specified size and sampling probablility. * // w ww . j a v a2s.c om * @param binarySketch * sketch to be sampled passed in as bytes writable. * @param sketchSize * Size to use for the new sketch. * This must be a power of 2 and larger than 16. If zero, DEFAULT is used. * @param probability * The sampling probability to use for the new sketch. * Should be greater than zero and less than or equal to 1.0 * @return The sampled sketch encoded as a BytesWritable */ public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) { // Null checks if (binarySketch == null) { return null; } byte[] serializedSketch = binarySketch.getBytes(); if (serializedSketch.length <= 8) { return null; } // The builder will catch errors with improper sketchSize or probability Union union = SetOperation.builder().setP(probability).buildUnion(sketchSize); union.update(new NativeMemory(serializedSketch)); //Union can accept Memory object directly Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap) byte[] resultSketch = intermediateSketch.toByteArray(); BytesWritable result = new BytesWritable(); result.set(resultSketch, 0, resultSketch.length); return result; }
From source file:com.yahoo.sketches.hive.theta.UnionEvaluator.java
License:Apache License
@Override public void merge(final @SuppressWarnings("deprecation") AggregationBuffer agg, final Object partial) throws HiveException { if (partial == null) return;//from w w w . j ava2s . c om final UnionState state = (UnionState) agg; if (!state.isInitialized()) { initializeState(state, partial); } final BytesWritable serializedSketch = (BytesWritable) intermediateObjectInspector .getStructFieldData(partial, intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD)); state.update(new NativeMemory(serializedSketch.getBytes())); }
From source file:com.yahoo.sketches.hive.theta.UnionSketchUDF.java
License:Apache License
/** * Main logic called by hive if sketchSize is also passed in. Union two * sketches of same or different column. * /* w w w. j a v a2 s . c o m*/ * @param firstSketch * first sketch to be unioned. * @param secondSketch * second sketch to be unioned. * @param sketchSize * final output unioned sketch size. * This must be a power of 2 and larger than 16. * @param seed using the seed is not recommended unless you really know why you need it. * @return resulting sketch of union. */ public BytesWritable evaluate(final BytesWritable firstSketch, final BytesWritable secondSketch, final int sketchSize, final long seed) { final Union union = SetOperation.builder().setSeed(seed).buildUnion(sketchSize); if ((firstSketch != null) && (firstSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) { union.update(new NativeMemory(firstSketch.getBytes())); } if ((secondSketch != null) && (secondSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) { union.update(new NativeMemory(secondSketch.getBytes())); } return new BytesWritable(union.getResult().toByteArray()); }
From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java
License:Apache License
@Test public void evaluateNull() { UnionSketchUDF testObject = new UnionSketchUDF(); BytesWritable intermResult = testObject.evaluate(null, null); Memory mem = new NativeMemory(intermResult.getBytes()); Sketch testResult = Sketches.heapifySketch(mem); Assert.assertEquals(testResult.getEstimate(), 0.0); }
From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java
License:Apache License
@Test public void testEvaluateEmpty() { UnionSketchUDF testObject = new UnionSketchUDF(); BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable()); Memory mem = new NativeMemory(intermResult.getBytes()); Sketch testResult = Sketches.heapifySketch(mem); Assert.assertEquals(testResult.getEstimate(), 0.0); }
From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java
License:Apache License
@Test public void evaluateValidSketch() { UnionSketchUDF testObject = new UnionSketchUDF(); UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024); for (int i = 0; i < 128; i++) { sketch1.update(i);//from w ww . j a v a2s . c o m } UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024); for (int i = 100; i < 256; i++) { sketch2.update(i); } BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray()); BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray()); BytesWritable output = testObject.evaluate(input1, input2); Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes())); Assert.assertEquals(256.0, result.getEstimate()); }
From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java
License:Apache License
@Test public void evaluateValidSketchExplicitSizeAndSeed() { UnionSketchUDF testObject = new UnionSketchUDF(); final long seed = 1; UpdateSketch sketch1 = Sketches.updateSketchBuilder().setSeed(seed).build(1024); for (int i = 0; i < 128; i++) { sketch1.update(i);//from ww w . j a v a 2s .c om } UpdateSketch sketch2 = Sketches.updateSketchBuilder().setSeed(seed).build(1024); for (int i = 100; i < 256; i++) { sketch2.update(i); } BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray()); BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray()); BytesWritable output = testObject.evaluate(input1, input2, 128, seed); Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()), seed); Assert.assertEquals(256.0, result.getEstimate(), 256 * 0.02); Assert.assertTrue(result.getRetainedEntries(true) <= 128.0); }