View Javadoc
1   package us.codecraft.webmagic.model.samples;
2   
3   import us.codecraft.webmagic.Page;
4   import us.codecraft.webmagic.Site;
5   import us.codecraft.webmagic.model.AfterExtractor;
6   import us.codecraft.webmagic.model.OOSpider;
7   import us.codecraft.webmagic.model.annotation.ExtractBy;
8   import us.codecraft.webmagic.model.annotation.TargetUrl;
9   
10  import java.util.List;
11  
12  /**
13   * @author yihua.huang@dianping.com <br>
14   *         Date: 13-8-13 <br>
15   *         Time: 上午10:13 <br>
16   */
17  @TargetUrl("http://*.alpha.dp/*")
18  public class DianpingFtlDataScanner implements AfterExtractor {
19  
20  	@ExtractBy(value = "(DP\\.data\\(\\{.*\\}\\));", type = ExtractBy.Type.Regex, notNull = true, multi = true)
21  	private List<String> data;
22  
23  	public static void main(String[] args) {
24  		OOSpider.create(Site.me().setSleepTime(0), DianpingFtlDataScanner.class)
25  				.thread(5).run();
26  	}
27  
28  	@Override
29  	public void afterProcess(Page page) {
30  		if (data.size() > 1) {
31  			System.err.println(page.getUrl());
32  		}
33  		if (data.size() > 0 && data.get(0).length() > 100) {
34  			System.err.println(page.getUrl());
35  		}
36  	}
37  }