View Javadoc
1   package us.codecraft.webmagic.downloader;
2   
3   import java.net.URI;
4   
5   import org.apache.http.HttpRequest;
6   import org.apache.http.HttpResponse;
7   import org.apache.http.ProtocolException;
8   import org.apache.http.client.methods.HttpGet;
9   import org.apache.http.client.methods.HttpPost;
10  import org.apache.http.client.methods.HttpRequestWrapper;
11  import org.apache.http.client.methods.HttpUriRequest;
12  import org.apache.http.impl.client.LaxRedirectStrategy;
13  import org.apache.http.protocol.HttpContext;
14  import org.slf4j.Logger;
15  import org.slf4j.LoggerFactory;
16  
17  /**
18   *支持post 302跳转策略实现类
19   *HttpClient默认跳转:httpClientBuilder.setRedirectStrategy(new LaxRedirectStrategy());
20   *上述代码在post/redirect/post这种情况下不会传递原有请求的数据信息。所以参考了下SeimiCrawler这个项目的重定向策略。
21   *原代码地址:https://github.com/zhegexiaohuozi/SeimiCrawler/blob/master/project/src/main/java/cn/wanghaomiao/seimi/http/hc/SeimiRedirectStrategy.java
22   */
23  public class CustomRedirectStrategy extends LaxRedirectStrategy {
24      private Logger logger = LoggerFactory.getLogger(getClass());
25  
26      @Override
27      public HttpUriRequest getRedirect(HttpRequest request, HttpResponse response, HttpContext context) throws ProtocolException {
28          URI uri = getLocationURI(request, response, context);
29          String method = request.getRequestLine().getMethod();
30          if ("post".equalsIgnoreCase(method)) {
31              try {
32                  HttpRequestWrapper httpRequestWrapper = (HttpRequestWrapper) request;
33                  httpRequestWrapper.setURI(uri);
34                  httpRequestWrapper.removeHeaders("Content-Length");
35                  return httpRequestWrapper;
36              } catch (Exception e) {
37                  logger.error("强转为HttpRequestWrapper出错");
38              }
39              return new HttpPost(uri);
40          } else {
41              return new HttpGet(uri);
42          }
43      }
44  }