1 package us.codecraft.webmagic.model.samples;
2
3 import us.codecraft.webmagic.Page;
4 import us.codecraft.webmagic.Site;
5 import us.codecraft.webmagic.model.AfterExtractor;
6 import us.codecraft.webmagic.model.OOSpider;
7 import us.codecraft.webmagic.model.annotation.ExtractBy;
8 import us.codecraft.webmagic.model.annotation.TargetUrl;
9
10 import java.util.List;
11
12
13
14
15
16
17 @TargetUrl("http://*.alpha.dp/*")
18 public class DianpingFtlDataScanner implements AfterExtractor {
19
20 @ExtractBy(value = "(DP\\.data\\(\\{.*\\}\\));", type = ExtractBy.Type.Regex, notNull = true, multi = true)
21 private List<String> data;
22
23 public static void main(String[] args) {
24 OOSpider.create(Site.me().setSleepTime(0), DianpingFtlDataScanner.class)
25 .thread(5).run();
26 }
27
28 @Override
29 public void afterProcess(Page page) {
30 if (data.size() > 1) {
31 System.err.println(page.getUrl());
32 }
33 if (data.size() > 0 && data.get(0).length() > 100) {
34 System.err.println(page.getUrl());
35 }
36 }
37 }