Example usage for com.google.common.hash BloomFilter create

List of usage examples for com.google.common.hash BloomFilter create

Introduction

In this page you can find the example usage for com.google.common.hash BloomFilter create.

Prototype

@CheckReturnValue
public static <T> BloomFilter<T> create(Funnel<? super T> funnel, long expectedInsertions) 

Source Link

Document

Creates a BloomFilter BloomFilter with the expected number of insertions and a default expected false positive probability of 3%.

Usage

From source file:org.bboxdb.storage.BloomFilterBuilder.java

/**
 * Create a bloom filter for a given number of keys
 * @param entries/*w ww . j  a v a 2s  .  co  m*/
 * @return
 */
public static BloomFilter<String> buildBloomFilter(final long entries) {
    return BloomFilter.create(new TupleKeyFunnel(), entries);
}

From source file:org.apache.jackrabbit.oak.spi.blob.split.BlobIdSet.java

BlobIdSet(String repositoryDir, String filename) {
    store = new File(new File(repositoryDir), filename);
    bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 9000000); // about 8MB
    cache = CacheBuilder.newBuilder().maximumSize(1000).build();
    fillBloomFilter();//from  ww w .j a va 2  s .com
}

From source file:com.edduarte.argus.diff.DifferenceMatcher.java

@Override
public Set<DifferenceMatcher.Result> call() {
    Stopwatch sw = Stopwatch.createStarted();

    Set<Result> matchedDiffs = new ConcurrentHashSet<>();

    DifferenceAction lastAction = DifferenceAction.nothing;
    BloomFilter<String> lastBloomFilter = null;
    for (Difference r : differences) {
        if (lastAction == DifferenceAction.nothing || r.getAction() != lastAction) {
            // reset the bloom filter being used
            lastBloomFilter = BloomFilter.create((from, into) -> into.putUnencodedChars(from), 10);
            lastAction = r.getAction();/*from w ww .  j  a v  a2s. c  o  m*/
        }
        BloomFilter<String> bloomFilter = lastBloomFilter;
        bloomFilter.put(r.getOccurrenceText());

        // check if AT LEAST ONE of the keywords has ALL of its words
        // contained in the diff text
        keywords.parallelStream().unordered().filter(kw -> kw.textStream().allMatch(bloomFilter::mightContain))
                .map(kw -> new Pair<>(r, kw))
                .filter((pair) -> pair.b().textStream().anyMatch(pair.a().getOccurrenceText()::equals))
                .map((pair) -> {
                    Difference diff = pair.a();
                    Keyword keyword = pair.b();
                    DifferenceAction i = diff.getAction();
                    if (i == DifferenceAction.inserted && !ignoreAdded) {
                        return new Result(diff.getAction(), keyword, diff.getSnippet());

                    } else if (i == DifferenceAction.deleted && !ignoreRemoved) {
                        return new Result(diff.getAction(), keyword, diff.getSnippet());
                    }
                    return null;
                }).filter(diff -> diff != null).forEach(matchedDiffs::add);
    }

    sw.stop();
    logger.info("Completed difference matching for keywords '{}' in {}", keywords.toString(), sw.toString());
    return matchedDiffs;
}

From source file:com.edduarte.vokter.diff.DifferenceMatcher.java

@Override
public Set<DifferenceMatcher.Result> call() {
    Stopwatch sw = Stopwatch.createStarted();

    Set<Result> matchedDiffs = new ConcurrentHashSet<>();

    DifferenceEvent lastAction = DifferenceEvent.nothing;
    BloomFilter<String> lastBloomFilter = null;
    for (Difference r : differences) {
        if (lastAction == DifferenceEvent.nothing || r.getAction() != lastAction) {
            // reset the bloom filter being used
            lastBloomFilter = BloomFilter.create((from, into) -> into.putUnencodedChars(from), 10);
            lastAction = r.getAction();//  w  ww  .jav  a2s. c o m
        }
        BloomFilter<String> bloomFilter = lastBloomFilter;
        bloomFilter.put(r.getOccurrenceText());

        // check if AT LEAST ONE of the keywords has ALL of its words
        // contained in the diff text
        keywords.parallelStream().unordered().filter(kw -> kw.textStream().allMatch(bloomFilter::mightContain))
                .map(kw -> new Pair<>(r, kw))
                .filter((pair) -> pair.b().textStream().anyMatch(pair.a().getOccurrenceText()::equals))
                .map((pair) -> {
                    Difference diff = pair.a();
                    Keyword keyword = pair.b();
                    DifferenceEvent i = diff.getAction();
                    if (i == DifferenceEvent.inserted && !ignoreAdded) {
                        return new Result(diff.getAction(), keyword, diff.getSnippet());

                    } else if (i == DifferenceEvent.deleted && !ignoreRemoved) {
                        return new Result(diff.getAction(), keyword, diff.getSnippet());
                    }
                    return null;
                }).filter(diff -> diff != null).forEach(matchedDiffs::add);
    }

    sw.stop();
    logger.info("Completed difference matching for keywords '{}' in {}", keywords.toString(), sw.toString());
    return matchedDiffs;
}

From source file:org.apache.jackrabbit.oak.plugins.segment.file.CompactionGainEstimate.java

/**
 * Create a new instance of gain estimator. The estimation process can be stopped
 * by switching the supplier {@code stop} to {@code true}, in which case the returned
 * estimates are undefined.//w  ww.  j  a  va2s.c  o  m
 *
 * @param node  root node state
 * @param estimatedBulkCount
 * @param stop  stop signal
 */
CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount, Supplier<Boolean> stop) {
    uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount);
    collectReferencedSegments(node, new RecordIdSet(), stop);
}

From source file:org.apache.marmotta.kiwi.persistence.KiWiDialect.java

protected KiWiDialect() throws DriverNotFoundException {
    statements = new Properties();

    // load all statements.properties files that can be located in the same package (from different modules in different jar files)
    try {/*  w w  w .  j a v a2  s .  co m*/
        Enumeration<URL> urls = this.getClass().getClassLoader().getResources(
                this.getClass().getPackage().getName().replace('.', '/') + "/statements.properties");
        while (urls.hasMoreElements()) {
            statements.load(urls.nextElement().openStream());
        }
    } catch (Exception e) {
        log.error("could not load statement definitions (statement.properties)", e);
    }

    supportedFunctions = BloomFilter.create(URIFunnel.getInstance(), 1000);
}

From source file:org.largecollections.FastIntIntCacheMap.java

private void initializeBloomFilter() {
    this.myFunnel = new Funnel<Integer>() {
        public void funnel(Integer obj, PrimitiveSink into) {
            into.putInt(Math.abs(obj.hashCode()));

        }//from ww w  .  ja va  2  s  .  co m
    };
    this.bloomFilter = BloomFilter.create(myFunnel, this.bloomFilterSize);
}

From source file:org.largecollections.CacheSetWithUnqToString.java

private void initializeBloomFilter() {
    this.myFunnel = new Funnel<String>() {
        public void funnel(String obj, PrimitiveSink into) {
            into.putString(obj);/*from  www .ja  va2s .c  o m*/

        }
    };
    this.bloomFilter = BloomFilter.create(myFunnel, this.bloomFilterSize);
}

From source file:org.largecollections.CacheSetWithUnqHashCode.java

private void initializeBloomFilter() {
    this.myFunnel = new Funnel<Integer>() {
        public void funnel(Integer obj, PrimitiveSink into) {
            into.putInt(obj);//w  ww.  j  a  v a2s.c o m

        }
    };
    this.bloomFilter = BloomFilter.create(myFunnel, this.bloomFilterSize);
}

From source file:pt.haslab.dude.FileInfo.java

public FileInfo(String name, List<FileInfo> bigger, Polynomial polynomial, boolean detail, int minchunk,
        int minshare) {
    this.name = name;
    this.polynomial = polynomial;
    this.detail = detail;
    this.minchunk = minchunk;
    this.minshare = minshare;
    this.seq = bigger.size() + 1;

    stats = new AliasInfo[bigger.size()];
    for (int i = 0; i < stats.length; i++)
        stats[i] = new AliasInfo(bigger.get(i));

    bloom = BloomFilter.create(new Funnel<String>() {
        public void funnel(String arg0, PrimitiveSink arg1) {
            arg1.putString(arg0);/*www .  j  a va 2  s.c  o  m*/
        }
    }, 1000000);
}