View Javadoc
1   package us.codecraft.webmagic.example;
2   
3   import us.codecraft.webmagic.Site;
4   import us.codecraft.webmagic.model.OOSpider;
5   import us.codecraft.webmagic.model.annotation.ExtractBy;
6   
7   import java.util.ArrayList;
8   import java.util.List;
9   
10  /**
11   * @since 0.4.0
12   * @author code4crafter@gmail.com
13   */
14  public class BaiduBaike{
15  
16      @ExtractBy("//h1[@class=title]/div[@class=lemmaTitleH1]/text()")
17      private String name;
18  
19      @ExtractBy("//div[@id='lemmaContent-0']//div[@class='para']/allText()")
20      private String description;
21  
22      @Override
23      public String toString() {
24          return "BaiduBaike{" +
25                  "name='" + name + '\'' +
26                  ", description='" + description + '\'' +
27                  '}';
28      }
29  
30      public static void main(String[] args) {
31          OOSpider ooSpider = OOSpider.create(Site.me().setSleepTime(0), BaiduBaike.class);
32          //single download
33          String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8";
34          BaiduBaike baike = ooSpider.<BaiduBaike>get("http://baike.baidu.com/search/word?word=httpclient&pic=1&sug=1&enc=utf8");
35          System.out.println(baike);
36  
37          //multidownload
38          List<String> list = new ArrayList<String>();
39          list.add(String.format(urlTemplate,"风力发电"));
40          list.add(String.format(urlTemplate,"太阳能"));
41          list.add(String.format(urlTemplate,"地热发电"));
42          list.add(String.format(urlTemplate,"地热发电"));
43          List<BaiduBaike> resultItemses = ooSpider.<BaiduBaike>getAll(list);
44          for (BaiduBaike resultItemse : resultItemses) {
45              System.out.println(resultItemse);
46          }
47          ooSpider.close();
48      }
49  
50      public String getName() {
51          return name;
52      }
53  
54      public String getDescription() {
55          return description;
56      }
57  }