Example usage for org.apache.hadoop.util.bloom DynamicBloomFilter DynamicBloomFilter

List of usage examples for org.apache.hadoop.util.bloom DynamicBloomFilter DynamicBloomFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.util.bloom DynamicBloomFilter DynamicBloomFilter.

Prototype

public DynamicBloomFilter() 

Source Link

Document

Zero-args constructor for the serialization.

Usage

From source file:hivemall.sketch.bloom.BloomAndUDF.java

License:Apache License

@Nullable
public Text evaluate(@Nullable Text bloom1Str, @Nullable Text bloom2Str) throws HiveException {
    if (bloom1Str == null || bloom2Str == null) {
        return null;
    }/*from   w w  w  .j a va 2 s.  c  o  m*/

    final Filter bloom1;
    final Filter bloom2;
    try {
        bloom1 = BloomFilterUtils.deserialize(bloom1Str, new DynamicBloomFilter());
        bloom2 = BloomFilterUtils.deserialize(bloom2Str, new DynamicBloomFilter());
    } catch (IOException e) {
        throw new HiveException(e);
    }

    bloom1.and(bloom2);

    try {
        return BloomFilterUtils.serialize(bloom1, new Text());
    } catch (IOException e) {
        throw new HiveException(e);
    }
}

From source file:hivemall.sketch.bloom.BloomAndUDFTest.java

License:Apache License

@Test
public void test() throws IOException, HiveException {
    BloomAndUDF udf = new BloomAndUDF();

    DynamicBloomFilter bf1 = createBloomFilter(1L, 10000);
    DynamicBloomFilter bf2 = createBloomFilter(2L, 10000);

    Text bf1str = BloomFilterUtils.serialize(bf1, new Text());
    Text bf2str = BloomFilterUtils.serialize(bf2, new Text());

    bf1.and(bf2);//from ww w .j  a  va 2 s  .  c o  m
    Text expected = BloomFilterUtils.serialize(bf1, new Text());

    Text actual = udf.evaluate(bf1str, bf2str);

    Assert.assertEquals(expected, actual);

    DynamicBloomFilter deserialized = BloomFilterUtils.deserialize(actual, new DynamicBloomFilter());
    assertNotContains(bf1, deserialized, 1L, 10000);
    assertNotContains(bf1, deserialized, 2L, 10000);
}

From source file:hivemall.sketch.bloom.BloomContainsUDF.java

License:Apache License

@Nullable
public Boolean evaluate(@Nullable Text bloomStr, @Nullable Text keyStr) throws HiveException {
    if (bloomStr == null || key == null) {
        return null;
    }/*from  www  . j av  a 2 s.co  m*/

    final Filter bloom;
    if (prevFilter != null && prevKey.equals(keyStr)) {
        bloom = prevFilter;
    } else {
        try {
            bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter());
        } catch (IOException e) {
            throw new HiveException(e);
        }
        this.prevKey = keyStr;
        this.prevFilter = bloom;
        key.set(keyStr.getBytes(), 1.0d);
    }

    return Boolean.valueOf(bloom.membershipTest(key));
}

From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java

License:Apache License

@Test
public void testDynamicBloomFilterSerde() throws IOException {
    final Key key = new Key();

    DynamicBloomFilter dbf1 = BloomFilterUtils.newDynamicBloomFilter(300000);
    final Random rnd1 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);// ww w  .  j  a  v a 2s  . c  om
        dbf1.add(key);
    }

    DynamicBloomFilter dbf2 = BloomFilterUtils.deserialize(BloomFilterUtils.serialize(dbf1),
            new DynamicBloomFilter());
    final Random rnd2 = new Random(43L);
    for (int i = 0; i < 1000000; i++) {
        double d = rnd2.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertTrue(dbf2.membershipTest(key));
    }
}

From source file:hivemall.sketch.bloom.BloomNotUDF.java

License:Apache License

@Nullable
public Text evaluate(@Nullable Text bloomStr) throws HiveException {
    if (bloomStr == null) {
        return null;
    }// w  w  w .j  a v a 2s.com

    final Filter bloom;
    try {
        bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter());
    } catch (IOException e) {
        throw new HiveException(e);
    }

    bloom.not();

    try {
        return BloomFilterUtils.serialize(bloom, new Text());
    } catch (IOException e) {
        throw new HiveException(e);
    }
}

From source file:hivemall.sketch.bloom.BloomNotUDFTest.java

License:Apache License

@Test
public void test() throws IOException, HiveException {
    BloomNotUDF udf = new BloomNotUDF();

    DynamicBloomFilter bf1 = createBloomFilter(1L, 10000);
    Text bf1str = BloomFilterUtils.serialize(bf1, new Text());

    Text result = udf.evaluate(bf1str);
    DynamicBloomFilter actual = BloomFilterUtils.deserialize(result, new DynamicBloomFilter());

    bf1.not();// www  .  j  av a  2  s.c  o  m

    Assert.assertEquals(bf1.toString(), actual.toString());
}

From source file:hivemall.sketch.bloom.BloomOrUDF.java

License:Apache License

@Nullable
public Text evaluate(@Nullable Text bloom1Str, @Nullable Text bloom2Str) throws HiveException {
    if (bloom1Str == null || bloom2Str == null) {
        return null;
    }/*from ww w.  j a v a2s  .com*/

    final Filter bloom1;
    final Filter bloom2;
    try {
        bloom1 = BloomFilterUtils.deserialize(bloom1Str, new DynamicBloomFilter());
        bloom2 = BloomFilterUtils.deserialize(bloom2Str, new DynamicBloomFilter());
    } catch (IOException e) {
        throw new HiveException(e);
    }

    bloom1.or(bloom2);

    try {
        return BloomFilterUtils.serialize(bloom1, new Text());
    } catch (IOException e) {
        throw new HiveException(e);
    }
}

From source file:hivemall.sketch.bloom.BloomOrUDFTest.java

License:Apache License

@Test
public void test() throws IOException, HiveException {
    BloomOrUDF udf = new BloomOrUDF();

    DynamicBloomFilter bf1 = createBloomFilter(1L, 10000);
    DynamicBloomFilter bf2 = createBloomFilter(2L, 10000);

    Text bf1str = BloomFilterUtils.serialize(bf1, new Text());
    Text bf2str = BloomFilterUtils.serialize(bf2, new Text());

    bf1.or(bf2);//from w ww .  j  a  v a2s .  c  o m
    Text expected = BloomFilterUtils.serialize(bf1, new Text());

    Text actual = udf.evaluate(bf1str, bf2str);

    Assert.assertEquals(expected, actual);

    DynamicBloomFilter deserialized = BloomFilterUtils.deserialize(actual, new DynamicBloomFilter());
    assertEquals(bf1, deserialized, 1L, 10000);
    assertEquals(bf1, deserialized, 2L, 10000);
}