PageProcessor.java

package us.codecraft.webmagic.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;

/**
 * Interface to be implemented to customize a crawler.
 *
 * <p>
 * In PageProcessor, you can customize:
 * </p>
 * <ul>
 * <li>start URLs and other settings in {@link Site}</li>
 * <li>how the URLs to fetch are detected</li>
 * <li>how the data are extracted and stored</li>
 * </ul>
 *
 * @author code4crafter@gmail.com <br>
 * @see Site
 * @see Page
 * @since 0.1.0
 */
public interface PageProcessor {

    /**
     * Processes the page, extract URLs to fetch, extract the data and store.
     *
     * @param page page
     */
    void process(Page page);

    /**
     * Returns the site settings.
     *
     * @return site
     * @see Site
     */
    default Site getSite() {
        return Site.me();
    }

}