View Javadoc
1   package us.codecraft.webmagic.recover;
2   
3   
4   import us.codecraft.webmagic.Spider;
5   import us.codecraft.webmagic.samples.SinaBlogProcessor;
6   import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
7   
8   /**
9    * @author code4crafter@gmail.com <br>
10   */
11  public class RecoverSample {
12  
13      public static void main(String[] args) {
14          String storage = "queue";
15          String duplicate = "duplicate";
16          Spider spider = new Spider(new SinaBlogProcessor());
17          DuplicateRemover remover = new DuplicateStorageRemover(duplicate);
18          spider.setScheduler(new MmapQueueScheduler(remover, storage));
19          spider.addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html")
20                  .run();
21      }
22  }