1 package us.codecraft.webmagic.downloader.selenium;
2
3 import org.openqa.selenium.By;
4 import org.openqa.selenium.Cookie;
5 import org.openqa.selenium.WebDriver;
6 import org.openqa.selenium.WebElement;
7 import org.slf4j.Logger;
8 import org.slf4j.LoggerFactory;
9
10 import us.codecraft.webmagic.Page;
11 import us.codecraft.webmagic.Request;
12 import us.codecraft.webmagic.Site;
13 import us.codecraft.webmagic.Task;
14 import us.codecraft.webmagic.downloader.AbstractDownloader;
15 import us.codecraft.webmagic.selector.Html;
16 import us.codecraft.webmagic.selector.PlainText;
17
18 import java.io.Closeable;
19 import java.io.IOException;
20 import java.util.Map;
21
22
23
24
25
26
27
28
29
30 public class SeleniumDownloader extends AbstractDownloader implements Closeable {
31
32 private volatile WebDriverPool webDriverPool;
33
34 private Logger logger = LoggerFactory.getLogger(getClass());
35
36 private int sleepTime = 0;
37
38 private int poolSize = 1;
39
40 private static final String DRIVER_PHANTOMJS = "phantomjs";
41
42
43
44
45
46
47 public SeleniumDownloader(String chromeDriverPath) {
48 System.getProperties().setProperty("webdriver.chrome.driver",
49 chromeDriverPath);
50 }
51
52
53
54
55
56
57 public SeleniumDownloader() {
58
59
60 }
61
62
63
64
65
66
67
68 public SeleniumDownloader setSleepTime(int sleepTime) {
69 this.sleepTime = sleepTime;
70 return this;
71 }
72
73 @Override
74 public Page download(Request request, Task task) {
75 checkInit();
76 WebDriver webDriver = null;
77 Page page = Page.fail(request);
78 try {
79 webDriver = webDriverPool.get();
80
81 logger.info("downloading page " + request.getUrl());
82 webDriver.get(request.getUrl());
83 try {
84 if (sleepTime > 0) {
85 Thread.sleep(sleepTime);
86 }
87 } catch (InterruptedException e) {
88 e.printStackTrace();
89 }
90 WebDriver.Options manage = webDriver.manage();
91 Site site = task.getSite();
92 if (site.getCookies() != null) {
93 for (Map.Entry<String, String> cookieEntry : site.getCookies()
94 .entrySet()) {
95 Cookie cookie = new Cookie(cookieEntry.getKey(),
96 cookieEntry.getValue());
97 manage.addCookie(cookie);
98 }
99 }
100
101
102
103
104
105
106
107 WebElement webElement = webDriver.findElement(By.xpath("/html"));
108 String content = webElement.getAttribute("outerHTML");
109 page.setDownloadSuccess(true);
110 page.setRawText(content);
111 page.setHtml(new Html(content, request.getUrl()));
112 page.setUrl(new PlainText(request.getUrl()));
113 page.setRequest(request);
114 onSuccess(page, task);
115 } catch (Exception e) {
116 logger.warn("download page {} error", request.getUrl(), e);
117 onError(page, task, e);
118 } finally {
119 if (webDriver != null) {
120 webDriverPool.returnToPool(webDriver);
121 }
122 }
123 return page;
124 }
125
126 private void checkInit() {
127 if (webDriverPool == null) {
128 synchronized (this) {
129 webDriverPool = new WebDriverPool(poolSize);
130 }
131 }
132 }
133
134 @Override
135 public void setThread(int thread) {
136 this.poolSize = thread;
137 }
138
139 @Override
140 public void close() throws IOException {
141 webDriverPool.closeAll();
142 }
143 }