1 package us.codecraft.webmagic.processor; 2 3 import us.codecraft.webmagic.Page; 4 import us.codecraft.webmagic.Site; 5 6 /** 7 * Interface to be implemented to customize a crawler. 8 * 9 * <p> 10 * In PageProcessor, you can customize: 11 * </p> 12 * <ul> 13 * <li>start URLs and other settings in {@link Site}</li> 14 * <li>how the URLs to fetch are detected</li> 15 * <li>how the data are extracted and stored</li> 16 * </ul> 17 * 18 * @author code4crafter@gmail.com <br> 19 * @see Site 20 * @see Page 21 * @since 0.1.0 22 */ 23 public interface PageProcessor { 24 25 /** 26 * Processes the page, extract URLs to fetch, extract the data and store. 27 * 28 * @param page page 29 */ 30 void process(Page page); 31 32 /** 33 * Returns the site settings. 34 * 35 * @return site 36 * @see Site 37 */ 38 default Site getSite() { 39 return Site.me(); 40 } 41 42 }