List of usage examples for org.apache.hadoop.util.bloom DynamicBloomFilter DynamicBloomFilter
public DynamicBloomFilter()
From source file:hivemall.sketch.bloom.BloomAndUDF.java
License:Apache License
@Nullable public Text evaluate(@Nullable Text bloom1Str, @Nullable Text bloom2Str) throws HiveException { if (bloom1Str == null || bloom2Str == null) { return null; }/*from w w w .j a va 2 s. c o m*/ final Filter bloom1; final Filter bloom2; try { bloom1 = BloomFilterUtils.deserialize(bloom1Str, new DynamicBloomFilter()); bloom2 = BloomFilterUtils.deserialize(bloom2Str, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } bloom1.and(bloom2); try { return BloomFilterUtils.serialize(bloom1, new Text()); } catch (IOException e) { throw new HiveException(e); } }
From source file:hivemall.sketch.bloom.BloomAndUDFTest.java
License:Apache License
@Test public void test() throws IOException, HiveException { BloomAndUDF udf = new BloomAndUDF(); DynamicBloomFilter bf1 = createBloomFilter(1L, 10000); DynamicBloomFilter bf2 = createBloomFilter(2L, 10000); Text bf1str = BloomFilterUtils.serialize(bf1, new Text()); Text bf2str = BloomFilterUtils.serialize(bf2, new Text()); bf1.and(bf2);//from ww w .j a va 2 s . c o m Text expected = BloomFilterUtils.serialize(bf1, new Text()); Text actual = udf.evaluate(bf1str, bf2str); Assert.assertEquals(expected, actual); DynamicBloomFilter deserialized = BloomFilterUtils.deserialize(actual, new DynamicBloomFilter()); assertNotContains(bf1, deserialized, 1L, 10000); assertNotContains(bf1, deserialized, 2L, 10000); }
From source file:hivemall.sketch.bloom.BloomContainsUDF.java
License:Apache License
@Nullable public Boolean evaluate(@Nullable Text bloomStr, @Nullable Text keyStr) throws HiveException { if (bloomStr == null || key == null) { return null; }/*from www . j av a 2 s.co m*/ final Filter bloom; if (prevFilter != null && prevKey.equals(keyStr)) { bloom = prevFilter; } else { try { bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } this.prevKey = keyStr; this.prevFilter = bloom; key.set(keyStr.getBytes(), 1.0d); } return Boolean.valueOf(bloom.membershipTest(key)); }
From source file:hivemall.sketch.bloom.BloomFilterUtilsTest.java
License:Apache License
@Test public void testDynamicBloomFilterSerde() throws IOException { final Key key = new Key(); DynamicBloomFilter dbf1 = BloomFilterUtils.newDynamicBloomFilter(300000); final Random rnd1 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0);// ww w . j a v a 2s . c om dbf1.add(key); } DynamicBloomFilter dbf2 = BloomFilterUtils.deserialize(BloomFilterUtils.serialize(dbf1), new DynamicBloomFilter()); final Random rnd2 = new Random(43L); for (int i = 0; i < 1000000; i++) { double d = rnd2.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); Assert.assertTrue(dbf2.membershipTest(key)); } }
From source file:hivemall.sketch.bloom.BloomNotUDF.java
License:Apache License
@Nullable public Text evaluate(@Nullable Text bloomStr) throws HiveException { if (bloomStr == null) { return null; }// w w w .j a v a 2s.com final Filter bloom; try { bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } bloom.not(); try { return BloomFilterUtils.serialize(bloom, new Text()); } catch (IOException e) { throw new HiveException(e); } }
From source file:hivemall.sketch.bloom.BloomNotUDFTest.java
License:Apache License
@Test public void test() throws IOException, HiveException { BloomNotUDF udf = new BloomNotUDF(); DynamicBloomFilter bf1 = createBloomFilter(1L, 10000); Text bf1str = BloomFilterUtils.serialize(bf1, new Text()); Text result = udf.evaluate(bf1str); DynamicBloomFilter actual = BloomFilterUtils.deserialize(result, new DynamicBloomFilter()); bf1.not();// www . j av a 2 s.c o m Assert.assertEquals(bf1.toString(), actual.toString()); }
From source file:hivemall.sketch.bloom.BloomOrUDF.java
License:Apache License
@Nullable public Text evaluate(@Nullable Text bloom1Str, @Nullable Text bloom2Str) throws HiveException { if (bloom1Str == null || bloom2Str == null) { return null; }/*from ww w. j a v a2s .com*/ final Filter bloom1; final Filter bloom2; try { bloom1 = BloomFilterUtils.deserialize(bloom1Str, new DynamicBloomFilter()); bloom2 = BloomFilterUtils.deserialize(bloom2Str, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } bloom1.or(bloom2); try { return BloomFilterUtils.serialize(bloom1, new Text()); } catch (IOException e) { throw new HiveException(e); } }
From source file:hivemall.sketch.bloom.BloomOrUDFTest.java
License:Apache License
@Test public void test() throws IOException, HiveException { BloomOrUDF udf = new BloomOrUDF(); DynamicBloomFilter bf1 = createBloomFilter(1L, 10000); DynamicBloomFilter bf2 = createBloomFilter(2L, 10000); Text bf1str = BloomFilterUtils.serialize(bf1, new Text()); Text bf2str = BloomFilterUtils.serialize(bf2, new Text()); bf1.or(bf2);//from w ww . j a v a2s . c o m Text expected = BloomFilterUtils.serialize(bf1, new Text()); Text actual = udf.evaluate(bf1str, bf2str); Assert.assertEquals(expected, actual); DynamicBloomFilter deserialized = BloomFilterUtils.deserialize(actual, new DynamicBloomFilter()); assertEquals(bf1, deserialized, 1L, 10000); assertEquals(bf1, deserialized, 2L, 10000); }