Spider |
Spider.addPipeline(Pipeline pipeline) |
add a pipeline for Spider
|
Spider |
Spider.addRequest(Request... requests) |
Add urls with information to crawl.
|
Spider |
Spider.addUrl(java.lang.String... urls) |
Add urls to crawl.
|
Spider |
Spider.clearPipeline() |
clear the pipelines set
|
static Spider |
Spider.create(PageProcessor pageProcessor) |
create a spider with pageProcessor.
|
Spider |
Spider.downloader(Downloader downloader) |
Deprecated.
|
Spider |
Spider.pipeline(Pipeline pipeline) |
Deprecated.
|
Spider |
Spider.scheduler(Scheduler scheduler) |
Deprecated.
|
Spider |
Spider.setDownloader(Downloader downloader) |
set the downloader of spider
|
Spider |
Spider.setEmptySleepTime(long emptySleepTime) |
Set wait time when no url is polled.
|
Spider |
Spider.setExecutorService(java.util.concurrent.ExecutorService executorService) |
|
Spider |
Spider.setExitWhenComplete(boolean exitWhenComplete) |
Exit when complete.
|
Spider |
Spider.setPipelines(java.util.List<Pipeline> pipelines) |
set pipelines for Spider
|
Spider |
Spider.setScheduler(Scheduler updateScheduler) |
set scheduler for Spider
|
Spider |
Spider.setSpawnUrl(boolean spawnUrl) |
Whether add urls extracted to download.
Add urls to download when it is true, and just download seed urls when it is false.
|
Spider |
Spider.setSpiderListeners(java.util.List<SpiderListener> spiderListeners) |
|
Spider |
Spider.setUUID(java.lang.String uuid) |
Set an uuid for spider.
Default uuid is domain of site.
|
Spider |
Spider.startRequest(java.util.List<Request> startRequests) |
Set startUrls of Spider.
Prior to startUrls of Site.
|
Spider |
Spider.startUrls(java.util.List<java.lang.String> startUrls) |
Set startUrls of Spider.
Prior to startUrls of Site.
|
Spider |
Spider.thread(int threadNum) |
start with more than one threads
|
Spider |
Spider.thread(java.util.concurrent.ExecutorService executorService,
int threadNum) |
start with more than one threads
|