Java tutorial
/******************************************************************************* * @(#)WechatDBDuplicateRemover.java May 10, 2016 * * Copyright 2016 Hengtian Soft Group Ltd. All rights reserved. * Hengtian Soft PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. *******************************************************************************/ package com.spider.webmagic.scheduler; import java.nio.charset.Charset; import java.util.concurrent.atomic.AtomicInteger; import org.springframework.context.ApplicationContext; import com.google.common.hash.BloomFilter; import com.google.common.hash.Funnels; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.scheduler.component.DuplicateRemover; /** * @author <a href="mailto:ruiyuanyu@hengtiansoft.com"> ruiyuanyu</a> * @version $Revision 1.1 $ May 10, 2016 3:21:56 PM */ public class WechatDBDuplicateRemover implements DuplicateRemover { private int expectedInsertions; private double fpp; private AtomicInteger counter; private BloomFilter<CharSequence> bloomFilter; public WechatDBDuplicateRemover(int expectedInsertions, double fpp, String tablename, ApplicationContext mybatisContext, String type) { this.expectedInsertions = expectedInsertions; this.fpp = fpp; this.bloomFilter = new DBReadUrls(tablename, mybatisContext, type).readHistoryDBTitle(rebuildBloomFilter()); } protected BloomFilter<CharSequence> rebuildBloomFilter() { counter = new AtomicInteger(0); return BloomFilter.create(Funnels.stringFunnel(Charset.defaultCharset()), expectedInsertions, fpp); } @Override public synchronized boolean isDuplicate(Request request, Task task) { boolean isDuplicate = bloomFilter.mightContain(getTitle(request)); if (!isDuplicate) { bloomFilter.put(getTitle(request)); counter.incrementAndGet(); } return isDuplicate; } public boolean isQueueDuplicate(Request request, Task task) { boolean isDuplicate = bloomFilter.mightContain(getTitle(request)); if (!isDuplicate) { bloomFilter.put(getTitle(request)); } return isDuplicate; } protected String getTitle(Request request) { return request.getTitle(); } @Override public void resetDuplicateCheck(Task task) { rebuildBloomFilter(); } @Override public int getTotalRequestsCount(Task task) { return counter.get(); } public BloomFilter<CharSequence> getBloomFilter() { return bloomFilter; } public void setBloomFilter(BloomFilter<CharSequence> bloomFilter) { this.bloomFilter = bloomFilter; } }