View Javadoc
1   package us.codecraft.webmagic.downloader;
2   
3   import us.codecraft.webmagic.Page;
4   import us.codecraft.webmagic.Request;
5   import us.codecraft.webmagic.Task;
6   
7   /**
8    * Downloader is the part that downloads web pages and store in Page object. <br>
9    * Downloader has {@link #setThread(int)} method because downloader is always the bottleneck of a crawler,
10   * there are always some mechanisms such as pooling in downloader, and pool size is related to thread numbers.
11   *
12   * @author code4crafter@gmail.com <br>
13   * @since 0.1.0
14   */
15  public interface Downloader {
16  
17      /**
18       * Downloads web pages and store in Page object.
19       *
20       * @param request request
21       * @param task task
22       * @return page
23       */
24      public Page download(Request request, Task task);
25  
26      /**
27       * Tell the downloader how many threads the spider used.
28       * @param threadNum number of threads
29       */
30      public void setThread(int threadNum);
31  }