View Javadoc
1   package us.codecraft.webmagic.downloader;
2   
3   import org.apache.commons.lang3.JavaVersion;
4   import org.apache.commons.lang3.SystemUtils;
5   import org.apache.http.HttpException;
6   import org.apache.http.HttpRequest;
7   import org.apache.http.HttpRequestInterceptor;
8   import org.apache.http.client.CookieStore;
9   import org.apache.http.config.Registry;
10  import org.apache.http.config.RegistryBuilder;
11  import org.apache.http.config.SocketConfig;
12  import org.apache.http.conn.socket.ConnectionSocketFactory;
13  import org.apache.http.conn.socket.PlainConnectionSocketFactory;
14  import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
15  import org.apache.http.impl.client.*;
16  import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
17  import org.apache.http.impl.cookie.BasicClientCookie;
18  import org.apache.http.protocol.HttpContext;
19  import org.slf4j.Logger;
20  import org.slf4j.LoggerFactory;
21  import us.codecraft.webmagic.Site;
22  
23  import javax.net.ssl.SSLContext;
24  import javax.net.ssl.TrustManager;
25  import javax.net.ssl.X509TrustManager;
26  import java.io.IOException;
27  import java.security.KeyManagementException;
28  import java.security.NoSuchAlgorithmException;
29  import java.security.cert.CertificateException;
30  import java.security.cert.X509Certificate;
31  import java.util.Map;
32  
33  /**
34   * @author code4crafter@gmail.com <br>
35   * @since 0.4.0
36   */
37  public class HttpClientGenerator {
38  
39      private transient Logger logger = LoggerFactory.getLogger(getClass());
40  
41      private PoolingHttpClientConnectionManager connectionManager;
42  
43      public HttpClientGenerator() {
44          Registry<ConnectionSocketFactory> reg = RegistryBuilder.<ConnectionSocketFactory>create()
45                  .register("http", PlainConnectionSocketFactory.INSTANCE)
46                  .register("https", buildSSLConnectionSocketFactory())
47                  .build();
48          connectionManager = new PoolingHttpClientConnectionManager(reg);
49          connectionManager.setDefaultMaxPerRoute(100);
50      }
51  
52      private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
53          try {
54              SSLContext sslContext = createIgnoreVerifySSL();
55              String[] supportedProtocols;
56              if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_11)) {
57                  supportedProtocols = new String[]{"SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3"};
58              } else {
59                  supportedProtocols = new String[]{"SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2"};
60              }
61              logger.debug("supportedProtocols: {}", String.join(", ", supportedProtocols));
62              return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
63                      null,
64                      //不进行主机校验
65                      (host, sslSession) -> true); // 优先绕过安全证书
66          } catch (KeyManagementException | NoSuchAlgorithmException e) {
67              logger.error("ssl connection fail", e);
68          }
69          return SSLConnectionSocketFactory.getSocketFactory();
70      }
71  
72      private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
73          // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
74          X509TrustManager trustManager = new X509TrustManager() {
75  
76              @Override
77              public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
78              }
79  
80              @Override
81              public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
82              }
83  
84              @Override
85              public X509Certificate[] getAcceptedIssuers() {
86                  return null;
87              }
88  
89          };
90  
91          SSLContext sc = SSLContext.getInstance("TLS");
92          sc.init(null, new TrustManager[]{trustManager}, null);
93          return sc;
94      }
95  
96      public HttpClientGenerator setPoolSize(int poolSize) {
97          connectionManager.setMaxTotal(poolSize);
98          return this;
99      }
100 
101     public CloseableHttpClient getClient(Site site) {
102         return generateClient(site);
103     }
104 
105     private CloseableHttpClient generateClient(Site site) {
106         HttpClientBuilder httpClientBuilder = HttpClients.custom();
107 
108         httpClientBuilder.setConnectionManager(connectionManager);
109         if (site.getUserAgent() != null) {
110             httpClientBuilder.setUserAgent(site.getUserAgent());
111         } else {
112             httpClientBuilder.setUserAgent("");
113         }
114         if (site.isUseGzip()) {
115             httpClientBuilder.addInterceptorFirst(new HttpRequestInterceptor() {
116 
117                 public void process(
118                         final HttpRequest request,
119                         final HttpContext context) throws HttpException, IOException {
120                     if (!request.containsHeader("Accept-Encoding")) {
121                         request.addHeader("Accept-Encoding", "gzip");
122                     }
123                 }
124             });
125         }
126         //解决post/redirect/post 302跳转问题
127         httpClientBuilder.setRedirectStrategy(new CustomRedirectStrategy());
128 
129         SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
130         socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true);
131         socketConfigBuilder.setSoTimeout(site.getTimeOut());
132         SocketConfig socketConfig = socketConfigBuilder.build();
133         httpClientBuilder.setDefaultSocketConfig(socketConfig);
134         connectionManager.setDefaultSocketConfig(socketConfig);
135         httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
136         generateCookie(httpClientBuilder, site);
137         return httpClientBuilder.build();
138     }
139 
140     private void generateCookie(HttpClientBuilder httpClientBuilder, Site site) {
141         if (site.isDisableCookieManagement()) {
142             httpClientBuilder.disableCookieManagement();
143             return;
144         }
145         CookieStore cookieStore = new BasicCookieStore();
146         for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
147             BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
148             cookie.setDomain(site.getDomain());
149             cookieStore.addCookie(cookie);
150         }
151         for (Map.Entry<String, Map<String, String>> domainEntry : site.getAllCookies().entrySet()) {
152             for (Map.Entry<String, String> cookieEntry : domainEntry.getValue().entrySet()) {
153                 BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
154                 cookie.setDomain(domainEntry.getKey());
155                 cookieStore.addCookie(cookie);
156             }
157         }
158         httpClientBuilder.setDefaultCookieStore(cookieStore);
159     }
160 
161 }