Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java

License:Apache License

@Test
public void evaluateEmpty() {
    IntersectSketchUDF testObject = new IntersectSketchUDF();
    BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
    Memory mem = new NativeMemory(intermResult.getBytes());
    Sketch testResult = Sketches.heapifySketch(mem);
    assertEquals(0.0, testResult.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketch() {
    IntersectSketchUDF testObject = new IntersectSketchUDF();

    UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);//w w w. ja  va 2s .co  m
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 100; i < 128; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()));

    assertEquals(28.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketchExpicitSeed() {
    IntersectSketchUDF testObject = new IntersectSketchUDF();

    final long seed = 1;
    UpdateSketch sketch1 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);/*w  w  w .  jav  a  2s. c  om*/
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 100; i < 128; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2, seed);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()), seed);

    assertEquals(28.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.SampleSketchUDF.java

License:Apache License

/**
 * Main logic called by hive, produces new sketch from original using
 * specified size and sampling probablility.
 * // w  ww  .  j  a  v  a2s.c  om
 * @param binarySketch
 *          sketch to be sampled passed in as bytes writable.
 * @param sketchSize 
 *          Size to use for the new sketch.
 *          This must be a power of 2 and larger than 16. If zero, DEFAULT is used.
 * @param probability
 *          The sampling probability to use for the new sketch. 
 *          Should be greater than zero and less than or equal to 1.0 
 * @return The sampled sketch encoded as a BytesWritable
 */
public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) {

    // Null checks
    if (binarySketch == null) {
        return null;
    }

    byte[] serializedSketch = binarySketch.getBytes();

    if (serializedSketch.length <= 8) {
        return null;
    }

    //  The builder will catch errors with improper sketchSize or probability
    Union union = SetOperation.builder().setP(probability).buildUnion(sketchSize);

    union.update(new NativeMemory(serializedSketch)); //Union can accept Memory object directly

    Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap)
    byte[] resultSketch = intermediateSketch.toByteArray();

    BytesWritable result = new BytesWritable();
    result.set(resultSketch, 0, resultSketch.length);

    return result;
}

From source file:com.yahoo.sketches.hive.theta.UnionEvaluator.java

License:Apache License

@Override
public void merge(final @SuppressWarnings("deprecation") AggregationBuffer agg, final Object partial)
        throws HiveException {
    if (partial == null)
        return;//from  w w w .  j ava2s .  c  om
    final UnionState state = (UnionState) agg;
    if (!state.isInitialized()) {
        initializeState(state, partial);
    }
    final BytesWritable serializedSketch = (BytesWritable) intermediateObjectInspector
            .getStructFieldData(partial, intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD));
    state.update(new NativeMemory(serializedSketch.getBytes()));
}

From source file:com.yahoo.sketches.hive.theta.UnionSketchUDF.java

License:Apache License

/**
 * Main logic called by hive if sketchSize is also passed in. Union two
 * sketches of same or different column.
 * /*  w  w w. j  a  v a2  s .  c  o  m*/
 * @param firstSketch
 *          first sketch to be unioned.
 * @param secondSketch
 *          second sketch to be unioned.
 * @param sketchSize
 *          final output unioned sketch size.
 *          This must be a power of 2 and larger than 16.
 * @param seed using the seed is not recommended unless you really know why you need it.
 * @return resulting sketch of union.
 */
public BytesWritable evaluate(final BytesWritable firstSketch, final BytesWritable secondSketch,
        final int sketchSize, final long seed) {

    final Union union = SetOperation.builder().setSeed(seed).buildUnion(sketchSize);

    if ((firstSketch != null) && (firstSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) {
        union.update(new NativeMemory(firstSketch.getBytes()));
    }

    if ((secondSketch != null) && (secondSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) {
        union.update(new NativeMemory(secondSketch.getBytes()));
    }

    return new BytesWritable(union.getResult().toByteArray());
}

From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java

License:Apache License

@Test
public void evaluateNull() {
    UnionSketchUDF testObject = new UnionSketchUDF();
    BytesWritable intermResult = testObject.evaluate(null, null);
    Memory mem = new NativeMemory(intermResult.getBytes());
    Sketch testResult = Sketches.heapifySketch(mem);
    Assert.assertEquals(testResult.getEstimate(), 0.0);
}

From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java

License:Apache License

@Test
public void testEvaluateEmpty() {
    UnionSketchUDF testObject = new UnionSketchUDF();
    BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
    Memory mem = new NativeMemory(intermResult.getBytes());
    Sketch testResult = Sketches.heapifySketch(mem);
    Assert.assertEquals(testResult.getEstimate(), 0.0);
}

From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketch() {
    UnionSketchUDF testObject = new UnionSketchUDF();

    UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);//from   w  ww  .  j  a  v  a2s  . c  o  m
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 100; i < 256; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()));

    Assert.assertEquals(256.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.UnionSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketchExplicitSizeAndSeed() {
    UnionSketchUDF testObject = new UnionSketchUDF();

    final long seed = 1;
    UpdateSketch sketch1 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);//from   ww  w . j a v  a 2s  .c om
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 100; i < 256; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact().toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact().toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2, 128, seed);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()), seed);

    Assert.assertEquals(256.0, result.getEstimate(), 256 * 0.02);
    Assert.assertTrue(result.getRetainedEntries(true) <= 128.0);
}