1 package us.codecraft.webmagic.scheduler.component;
2
3 import us.codecraft.webmagic.Request;
4 import us.codecraft.webmagic.Task;
5
6 import java.util.Collections;
7 import java.util.Set;
8 import java.util.concurrent.ConcurrentHashMap;
9
10
11
12
13 public class HashSetDuplicateRemover implements DuplicateRemover {
14
15 private Set<String> urls = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
16
17 @Override
18 public boolean isDuplicate(Request request, Task task) {
19 return !urls.add(getUrl(request));
20 }
21
22 protected String getUrl(Request request) {
23 return request.getUrl();
24 }
25
26 @Override
27 public void resetDuplicateCheck(Task task) {
28 urls.clear();
29 }
30
31 @Override
32 public int getTotalRequestsCount(Task task) {
33 return urls.size();
34 }
35 }