View Javadoc
1   package us.codecraft.webmagic.scheduler.component;
2   
3   import us.codecraft.webmagic.Request;
4   import us.codecraft.webmagic.Task;
5   
6   import java.util.Collections;
7   import java.util.Set;
8   import java.util.concurrent.ConcurrentHashMap;
9   
10  /**
11   * @author code4crafer@gmail.com
12   */
13  public class HashSetDuplicateRemover implements DuplicateRemover {
14  
15      private Set<String> urls = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
16  
17      @Override
18      public boolean isDuplicate(Request request, Task task) {
19          return !urls.add(getUrl(request));
20      }
21  
22      protected String getUrl(Request request) {
23          return request.getUrl();
24      }
25  
26      @Override
27      public void resetDuplicateCheck(Task task) {
28          urls.clear();
29      }
30  
31      @Override
32      public int getTotalRequestsCount(Task task) {
33          return urls.size();
34      }
35  }