Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:com.yahoo.sketches.hive.quantiles.UnionStringsSketchUDAFTest.java

License:Apache License

@Test
public void complete1ModeDefaultK() throws Exception {
    ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
    GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false);
    GenericUDAFEvaluator eval = new UnionStringsSketchUDAF().getEvaluator(info);
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsUnionState<String> state = (ItemsUnionState<String>) eval.getNewAggregationBuffer();

    ItemsSketch<String> sketch1 = ItemsSketch.getInstance(comparator);
    sketch1.update("a");
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray(serDe)) });

    ItemsSketch<String> sketch2 = ItemsSketch.getInstance(comparator);
    sketch2.update("b");
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray(serDe)) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(new NativeMemory(bytes.getBytes()), comparator,
            serDe);//from ww  w . jav a 2 s  . c om
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), "a");
    Assert.assertEquals(resultSketch.getMaxValue(), "b");

    eval.reset(state);
    Assert.assertNull(eval.terminate(state));

    eval.close();
}

From source file:com.yahoo.sketches.hive.theta.EstimateSketchUDF.java

License:Apache License

/**
 * Returns the estimate unique count of sketch.
 * /*from w  w  w  . ja va2s  .c o  m*/
 * @param binarySketch sketch to be estimated passed in as bytes writable.
 * @param seed value used to build the sketch if different from the default 
 * @return the estimate of unique count from given sketch.
 */
public Double evaluate(final BytesWritable binarySketch, final long seed) {
    if (binarySketch == null) {
        return 0.0;
    }

    final byte[] serializedSketch = binarySketch.getBytes();

    if (serializedSketch.length <= EMPTY_SKETCH_SIZE_BYTES) {
        return 0.0;
    }

    return Sketch.wrap(new NativeMemory(serializedSketch), seed).getEstimate();
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDF.java

License:Apache License

/**
 * Main logic called by hive if sketchSize is also passed in. Computes the
 * hash in first sketch excluding the hash in second sketch of two sketches of
 * same or different column.//  www  .  ja v a2 s.c o  m
 * 
 * @param firstSketchBytes
 *          first sketch to be included.
 * @param secondSketchBytes
 *          second sketch to be excluded.
 * @param hashSeed
 *          Only required if input sketches were constructed using an update seed that was not the default.
 * @return resulting sketch of exclusion.
 */
public BytesWritable evaluate(final BytesWritable firstSketchBytes, final BytesWritable secondSketchBytes,
        final long hashSeed) {

    Sketch firstSketch = null;
    if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) {
        firstSketch = Sketch.wrap(new NativeMemory(firstSketchBytes.getBytes()), hashSeed);
    }

    Sketch secondSketch = null;
    if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) {
        secondSketch = Sketch.wrap(new NativeMemory(secondSketchBytes.getBytes()), hashSeed);
    }

    final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB();
    anotb.update(firstSketch, secondSketch);
    final byte[] excludeSketchBytes = anotb.getResult().toByteArray();
    final BytesWritable result = new BytesWritable();
    result.set(excludeSketchBytes, 0, excludeSketchBytes.length);
    return result;
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDFTest.java

License:Apache License

@Test
public void evaluateNull() {
    ExcludeSketchUDF testObject = new ExcludeSketchUDF();

    BytesWritable intermResult = testObject.evaluate(null, null);

    Memory mem = new NativeMemory(intermResult.getBytes());

    Sketch testResult = Sketches.heapifySketch(mem);

    assertEquals(0.0, testResult.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDFTest.java

License:Apache License

@Test
public void evaluateEmpty() {
    ExcludeSketchUDF testObject = new ExcludeSketchUDF();

    BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());

    Memory mem = new NativeMemory(intermResult.getBytes());

    Sketch testResult = Sketches.heapifySketch(mem);

    assertEquals(0.0, testResult.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketch() {
    ExcludeSketchUDF testObject = new ExcludeSketchUDF();

    UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);/*from   w  ww . j av a2s .co  m*/
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 100; i < 128; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact(true, null).toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact(true, null).toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()));

    assertEquals(100.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketchWithDefaultSeed() {
    ExcludeSketchUDF testObject = new ExcludeSketchUDF();

    UpdateSketch sketch1 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);/*  w  ww.ja  v  a  2  s. c om*/
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().build(1024);
    for (int i = 100; i < 128; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact(true, null).toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact(true, null).toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2, DEFAULT_UPDATE_SEED);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()));

    assertEquals(100.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDFTest.java

License:Apache License

@Test
public void evaluateValidSketchWithCustomSeed() {
    ExcludeSketchUDF testObject = new ExcludeSketchUDF();

    final long seed = 1;
    UpdateSketch sketch1 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 0; i < 128; i++) {
        sketch1.update(i);/*w w  w. j a  va 2 s  .  c  o m*/
    }

    UpdateSketch sketch2 = Sketches.updateSketchBuilder().setSeed(seed).build(1024);
    for (int i = 100; i < 128; i++) {
        sketch2.update(i);
    }

    BytesWritable input1 = new BytesWritable(sketch1.compact(true, null).toByteArray());
    BytesWritable input2 = new BytesWritable(sketch2.compact(true, null).toByteArray());

    BytesWritable output = testObject.evaluate(input1, input2, seed);

    Sketch result = Sketches.heapifySketch(new NativeMemory(output.getBytes()), seed);

    assertEquals(100.0, result.getEstimate());
}

From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDF.java

License:Apache License

/**
 * Main logic called by hive if sketchSize is also passed in. Computes the
 * intersection of two sketches of same or different column.
 * /*from  ww w  . ja v a  2  s . com*/
 * @param firstSketchBytes
 *          first sketch to be intersected.
 * @param secondSketchBytes
 *          second sketch to be intersected.
 * @param hashSeed
 *          Only required if input sketches were constructed using an update seed that was not the default.
 * @return resulting sketch of intersection.
 */
public BytesWritable evaluate(final BytesWritable firstSketchBytes, final BytesWritable secondSketchBytes,
        final long hashSeed) {
    Sketch firstSketch = null;
    if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) {
        firstSketch = Sketch.wrap(new NativeMemory(firstSketchBytes.getBytes()), hashSeed);
    }

    Sketch secondSketch = null;
    if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) {
        secondSketch = Sketch.wrap(new NativeMemory(secondSketchBytes.getBytes()), hashSeed);
    }

    final Intersection intersect = SetOperation.builder().setSeed(hashSeed).buildIntersection();
    intersect.update(firstSketch);
    intersect.update(secondSketch);
    return new BytesWritable(intersect.getResult().toByteArray());
}

From source file:com.yahoo.sketches.hive.theta.IntersectSketchUDFTest.java

License:Apache License

@Test
public void evaluateNull() {
    IntersectSketchUDF testObject = new IntersectSketchUDF();
    BytesWritable intermResult = testObject.evaluate(null, null);
    Memory mem = new NativeMemory(intermResult.getBytes());
    Sketch testResult = Sketches.heapifySketch(mem);
    assertEquals(0.0, testResult.getEstimate());
}