1 package us.codecraft.webmagic.example;
2
3 import us.codecraft.webmagic.Site;
4 import us.codecraft.webmagic.model.ConsolePageModelPipeline;
5 import us.codecraft.webmagic.model.HasKey;
6 import us.codecraft.webmagic.model.OOSpider;
7 import us.codecraft.webmagic.model.annotation.ExtractBy;
8 import us.codecraft.webmagic.model.annotation.ExtractByUrl;
9 import us.codecraft.webmagic.model.annotation.HelpUrl;
10 import us.codecraft.webmagic.model.annotation.TargetUrl;
11
12 import java.util.List;
13
14
15
16
17
18 @TargetUrl("https://github.com/\\w+/\\w+")
19 @HelpUrl({"https://github.com/\\w+\\?tab=repositories", "https://github.com/\\w+", "https://github.com/explore/*"})
20 public class GithubRepo implements HasKey {
21
22 @ExtractBy(value = "//h1[@class='public']/strong/a/text()", notNull = true)
23 private String name;
24
25 @ExtractByUrl("https://github\\.com/(\\w+)/.*")
26 private String author;
27
28 @ExtractBy("//div[@id='readme']/tidyText()")
29 private String readme;
30
31 @ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']/text()", multi = true)
32 private List<String> language;
33
34 @ExtractBy("//ul[@class='pagehead-actions']/li[1]//a[@class='social-count js-social-count']/text()")
35 private int star;
36
37 @ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count']/text()")
38 private int fork;
39
40 @ExtractByUrl
41 private String url;
42
43 public static void main(String[] args) {
44 OOSpider.create(Site.me().setSleepTime(100)
45 , new ConsolePageModelPipeline(), GithubRepo.class)
46 .addUrl("https://github.com/code4craft").thread(10).run();
47 }
48
49 @Override
50 public String key() {
51 return author + ":" + name;
52 }
53
54 public String getName() {
55 return name;
56 }
57
58 public String getReadme() {
59 return readme;
60 }
61
62 public String getAuthor() {
63 return author;
64 }
65
66 public List<String> getLanguage() {
67 return language;
68 }
69
70 public String getUrl() {
71 return url;
72 }
73
74 public int getStar() {
75 return star;
76 }
77
78 public int getFork() {
79 return fork;
80 }
81
82 @Override
83 public String toString() {
84 return "GithubRepo{" +
85 "name='" + name + '\'' +
86 ", author='" + author + '\'' +
87 ", readme='" + readme + '\'' +
88 ", language=" + language +
89 ", star=" + star +
90 ", fork=" + fork +
91 ", url='" + url + '\'' +
92 '}';
93 }
94 }