1 package us.codecraft.webmagic.model.samples;
2
3 import us.codecraft.webmagic.Site;
4 import us.codecraft.webmagic.Task;
5 import us.codecraft.webmagic.model.OOSpider;
6 import us.codecraft.webmagic.pipeline.PageModelPipeline;
7 import us.codecraft.webmagic.model.annotation.ExtractBy;
8 import us.codecraft.webmagic.model.annotation.TargetUrl;
9
10 import java.util.List;
11
12
13
14
15 @TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
16 public class OschinaBlog{
17
18 @ExtractBy("//title")
19 private String title;
20
21 @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
22 private String content;
23
24 @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
25 private List<String> tags;
26
27 public static void main(String[] args) {
28 OOSpider.create(Site.me()
29 .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36")
30 .setSleepTime(0)
31 .setRetryTimes(3)
32 ,new PageModelPipeline() {
33 @Override
34 public void process(Object o, Task task) {
35
36 }
37 }, OschinaBlog.class).thread(10).addUrl("http://my.oschina.net/flashsword/blog").run();
38 }
39
40 public String getTitle() {
41 return title;
42 }
43
44 public String getContent() {
45 return content;
46 }
47
48 public List<String> getTags() {
49 return tags;
50 }
51 }