View Javadoc
1   package us.codecraft.webmagic.model.samples;
2   
3   import us.codecraft.webmagic.Site;
4   import us.codecraft.webmagic.Spider;
5   import us.codecraft.webmagic.Task;
6   import us.codecraft.webmagic.model.OOSpider;
7   import us.codecraft.webmagic.monitor.SpiderMonitor;
8   import us.codecraft.webmagic.pipeline.PageModelPipeline;
9   import us.codecraft.webmagic.model.annotation.ExtractBy;
10  import us.codecraft.webmagic.model.annotation.ExtractByUrl;
11  import us.codecraft.webmagic.model.annotation.HelpUrl;
12  import us.codecraft.webmagic.model.annotation.TargetUrl;
13  
14  import javax.management.JMException;
15  import java.io.IOException;
16  
17  /**
18   * @author code4crafter@gmail.com <br>
19   */
20  @TargetUrl("http://www.36kr.com/p/\\d+.html")
21  @HelpUrl("http://www.36kr.com/#/page/\\d+")
22  public class Kr36NewsModel {
23  
24      @ExtractBy("//h1[@class='entry-title sep10']")
25      private String title;
26  
27      @ExtractBy("//div[@class='mainContent sep-10']/tidyText()")
28      private String content;
29  
30      @ExtractByUrl
31      private String url;
32  
33      public static void main(String[] args) throws IOException, JMException {
34          //Just for benchmark
35          Spider thread = OOSpider.create(Site.me().setSleepTime(0), new PageModelPipeline() {
36              @Override
37              public void process(Object o, Task task) {
38  
39              }
40          }, Kr36NewsModel.class).thread(20).addUrl("http://www.36kr.com/");
41          thread.start();
42          SpiderMonitor spiderMonitor = SpiderMonitor.instance();
43          spiderMonitor.register(thread);
44      }
45  
46      public String getTitle() {
47          return title;
48      }
49  
50      public String getContent() {
51          return content;
52      }
53  
54      public String getUrl() {
55          return url;
56      }
57  }