View Javadoc
1   package us.codecraft.webmagic.example;
2   
3   import us.codecraft.webmagic.Site;
4   import us.codecraft.webmagic.model.ConsolePageModelPipeline;
5   import us.codecraft.webmagic.model.HasKey;
6   import us.codecraft.webmagic.model.OOSpider;
7   import us.codecraft.webmagic.model.annotation.ExtractBy;
8   import us.codecraft.webmagic.model.annotation.ExtractByUrl;
9   import us.codecraft.webmagic.model.annotation.HelpUrl;
10  import us.codecraft.webmagic.model.annotation.TargetUrl;
11  
12  import java.util.List;
13  
14  /**
15   * @author code4crafter@gmail.com <br>
16   * @since 0.3.2
17   */
18  @TargetUrl("https://github.com/\\w+/\\w+")
19  @HelpUrl({"https://github.com/\\w+\\?tab=repositories", "https://github.com/\\w+", "https://github.com/explore/*"})
20  public class GithubRepo implements HasKey {
21  
22      @ExtractBy(value = "//h1[@class='public']/strong/a/text()", notNull = true)
23      private String name;
24  
25      @ExtractByUrl("https://github\\.com/(\\w+)/.*")
26      private String author;
27  
28      @ExtractBy("//div[@id='readme']/tidyText()")
29      private String readme;
30  
31      @ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']/text()", multi = true)
32      private List<String> language;
33  
34      @ExtractBy("//ul[@class='pagehead-actions']/li[1]//a[@class='social-count js-social-count']/text()")
35      private int star;
36  
37      @ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count']/text()")
38      private int fork;
39  
40      @ExtractByUrl
41      private String url;
42  
43      public static void main(String[] args) {
44          OOSpider.create(Site.me().setSleepTime(100)
45                  , new ConsolePageModelPipeline(), GithubRepo.class)
46                  .addUrl("https://github.com/code4craft").thread(10).run();
47      }
48  
49      @Override
50      public String key() {
51          return author + ":" + name;
52      }
53  
54      public String getName() {
55          return name;
56      }
57  
58      public String getReadme() {
59          return readme;
60      }
61  
62      public String getAuthor() {
63          return author;
64      }
65  
66      public List<String> getLanguage() {
67          return language;
68      }
69  
70      public String getUrl() {
71          return url;
72      }
73  
74      public int getStar() {
75          return star;
76      }
77  
78      public int getFork() {
79          return fork;
80      }
81  
82      @Override
83      public String toString() {
84          return "GithubRepo{" +
85                  "name='" + name + '\'' +
86                  ", author='" + author + '\'' +
87                  ", readme='" + readme + '\'' +
88                  ", language=" + language +
89                  ", star=" + star +
90                  ", fork=" + fork +
91                  ", url='" + url + '\'' +
92                  '}';
93      }
94  }