1 package us.codecraft.webmagic.downloader;
2
3 import org.apache.commons.lang3.JavaVersion;
4 import org.apache.commons.lang3.SystemUtils;
5 import org.apache.http.HttpException;
6 import org.apache.http.HttpRequest;
7 import org.apache.http.HttpRequestInterceptor;
8 import org.apache.http.client.CookieStore;
9 import org.apache.http.config.Registry;
10 import org.apache.http.config.RegistryBuilder;
11 import org.apache.http.config.SocketConfig;
12 import org.apache.http.conn.socket.ConnectionSocketFactory;
13 import org.apache.http.conn.socket.PlainConnectionSocketFactory;
14 import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
15 import org.apache.http.impl.client.*;
16 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
17 import org.apache.http.impl.cookie.BasicClientCookie;
18 import org.apache.http.protocol.HttpContext;
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21 import us.codecraft.webmagic.Site;
22
23 import javax.net.ssl.SSLContext;
24 import javax.net.ssl.TrustManager;
25 import javax.net.ssl.X509TrustManager;
26 import java.io.IOException;
27 import java.security.KeyManagementException;
28 import java.security.NoSuchAlgorithmException;
29 import java.security.cert.CertificateException;
30 import java.security.cert.X509Certificate;
31 import java.util.Map;
32
33
34
35
36
37 public class HttpClientGenerator {
38
39 private transient Logger logger = LoggerFactory.getLogger(getClass());
40
41 private PoolingHttpClientConnectionManager connectionManager;
42
43 public HttpClientGenerator() {
44 Registry<ConnectionSocketFactory> reg = RegistryBuilder.<ConnectionSocketFactory>create()
45 .register("http", PlainConnectionSocketFactory.INSTANCE)
46 .register("https", buildSSLConnectionSocketFactory())
47 .build();
48 connectionManager = new PoolingHttpClientConnectionManager(reg);
49 connectionManager.setDefaultMaxPerRoute(100);
50 }
51
52 private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
53 try {
54 SSLContext sslContext = createIgnoreVerifySSL();
55 String[] supportedProtocols;
56 if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_11)) {
57 supportedProtocols = new String[]{"SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3"};
58 } else {
59 supportedProtocols = new String[]{"SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2"};
60 }
61 logger.debug("supportedProtocols: {}", String.join(", ", supportedProtocols));
62 return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
63 null,
64
65 (host, sslSession) -> true);
66 } catch (KeyManagementException | NoSuchAlgorithmException e) {
67 logger.error("ssl connection fail", e);
68 }
69 return SSLConnectionSocketFactory.getSocketFactory();
70 }
71
72 private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
73
74 X509TrustManager trustManager = new X509TrustManager() {
75
76 @Override
77 public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
78 }
79
80 @Override
81 public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
82 }
83
84 @Override
85 public X509Certificate[] getAcceptedIssuers() {
86 return null;
87 }
88
89 };
90
91 SSLContext sc = SSLContext.getInstance("TLS");
92 sc.init(null, new TrustManager[]{trustManager}, null);
93 return sc;
94 }
95
96 public HttpClientGenerator setPoolSize(int poolSize) {
97 connectionManager.setMaxTotal(poolSize);
98 return this;
99 }
100
101 public CloseableHttpClient getClient(Site site) {
102 return generateClient(site);
103 }
104
105 private CloseableHttpClient generateClient(Site site) {
106 HttpClientBuilder httpClientBuilder = HttpClients.custom();
107
108 httpClientBuilder.setConnectionManager(connectionManager);
109 if (site.getUserAgent() != null) {
110 httpClientBuilder.setUserAgent(site.getUserAgent());
111 } else {
112 httpClientBuilder.setUserAgent("");
113 }
114 if (site.isUseGzip()) {
115 httpClientBuilder.addInterceptorFirst(new HttpRequestInterceptor() {
116
117 public void process(
118 final HttpRequest request,
119 final HttpContext context) throws HttpException, IOException {
120 if (!request.containsHeader("Accept-Encoding")) {
121 request.addHeader("Accept-Encoding", "gzip");
122 }
123 }
124 });
125 }
126
127 httpClientBuilder.setRedirectStrategy(new CustomRedirectStrategy());
128
129 SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
130 socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true);
131 socketConfigBuilder.setSoTimeout(site.getTimeOut());
132 SocketConfig socketConfig = socketConfigBuilder.build();
133 httpClientBuilder.setDefaultSocketConfig(socketConfig);
134 connectionManager.setDefaultSocketConfig(socketConfig);
135 httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
136 generateCookie(httpClientBuilder, site);
137 return httpClientBuilder.build();
138 }
139
140 private void generateCookie(HttpClientBuilder httpClientBuilder, Site site) {
141 if (site.isDisableCookieManagement()) {
142 httpClientBuilder.disableCookieManagement();
143 return;
144 }
145 CookieStore cookieStore = new BasicCookieStore();
146 for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
147 BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
148 cookie.setDomain(site.getDomain());
149 cookieStore.addCookie(cookie);
150 }
151 for (Map.Entry<String, Map<String, String>> domainEntry : site.getAllCookies().entrySet()) {
152 for (Map.Entry<String, String> cookieEntry : domainEntry.getValue().entrySet()) {
153 BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
154 cookie.setDomain(domainEntry.getKey());
155 cookieStore.addCookie(cookie);
156 }
157 }
158 httpClientBuilder.setDefaultCookieStore(cookieStore);
159 }
160
161 }