DianpingFtlDataScanner.java
package us.codecraft.webmagic.model.samples;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import java.util.List;
/**
* @author yihua.huang@dianping.com <br>
* Date: 13-8-13 <br>
* Time: 上午10:13 <br>
*/
@TargetUrl("http://*.alpha.dp/*")
public class DianpingFtlDataScanner implements AfterExtractor {
@ExtractBy(value = "(DP\\.data\\(\\{.*\\}\\));", type = ExtractBy.Type.Regex, notNull = true, multi = true)
private List<String> data;
public static void main(String[] args) {
OOSpider.create(Site.me().setSleepTime(0), DianpingFtlDataScanner.class)
.thread(5).run();
}
@Override
public void afterProcess(Page page) {
if (data.size() > 1) {
System.err.println(page.getUrl());
}
if (data.size() > 0 && data.get(0).length() > 100) {
System.err.println(page.getUrl());
}
}
}