View Javadoc
1   package us.codecraft.webmagic.samples;
2   
3   import us.codecraft.webmagic.Page;
4   import us.codecraft.webmagic.Site;
5   import us.codecraft.webmagic.Spider;
6   import us.codecraft.webmagic.processor.PageProcessor;
7   
8   /**
9    * @author code4crafter@gmail.com <br>
10   */
11  public class IteyeBlogProcessor implements PageProcessor {
12  
13      private Site site;
14  
15      @Override
16      public void process(Page page) {
17          page.addTargetRequests(page.getHtml().links().regex(".*yanghaoli\\.iteye\\.com/blog/\\d+").all());
18          page.putField("title",page.getHtml().xpath("//title").toString());
19          page.putField("content",page.getHtml().smartContent().toString());
20      }
21  
22      @Override
23      public Site getSite() {
24          if (site == null) {
25              site = Site.me().setDomain("yanghaoli.iteye.com");
26          }
27          return site;
28      }
29  
30      public static void main(String[] args) {
31          Spider.create(new IteyeBlogProcessor()).thread(5).addUrl("http://yanghaoli.iteye.com/").run();
32      }
33  }