1 package us.codecraft.webmagic.downloader; 2 3 import us.codecraft.webmagic.Page; 4 import us.codecraft.webmagic.Request; 5 import us.codecraft.webmagic.Task; 6 7 /** 8 * Downloader is the part that downloads web pages and store in Page object. <br> 9 * Downloader has {@link #setThread(int)} method because downloader is always the bottleneck of a crawler, 10 * there are always some mechanisms such as pooling in downloader, and pool size is related to thread numbers. 11 * 12 * @author code4crafter@gmail.com <br> 13 * @since 0.1.0 14 */ 15 public interface Downloader { 16 17 /** 18 * Downloads web pages and store in Page object. 19 * 20 * @param request request 21 * @param task task 22 * @return page 23 */ 24 public Page download(Request request, Task task); 25 26 /** 27 * Tell the downloader how many threads the spider used. 28 * @param threadNum number of threads 29 */ 30 public void setThread(int threadNum); 31 }