RecoverSample.java

package us.codecraft.webmagic.recover;


import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.samples.SinaBlogProcessor;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;

/**
 * @author code4crafter@gmail.com <br>
 */
public class RecoverSample {

    public static void main(String[] args) {
        String storage = "queue";
        String duplicate = "duplicate";
        Spider spider = new Spider(new SinaBlogProcessor());
        DuplicateRemover remover = new DuplicateStorageRemover(duplicate);
        spider.setScheduler(new MmapQueueScheduler(remover, storage));
        spider.addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html")
                .run();
    }
}