1 package us.codecraft.webmagic;
2
3 import org.apache.commons.lang3.StringUtils;
4 import us.codecraft.webmagic.selector.Html;
5 import us.codecraft.webmagic.selector.Json;
6 import us.codecraft.webmagic.selector.Selectable;
7 import us.codecraft.webmagic.utils.HttpConstant;
8 import us.codecraft.webmagic.utils.UrlUtils;
9
10 import java.util.ArrayList;
11 import java.util.Arrays;
12 import java.util.List;
13 import java.util.Map;
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 public class Page {
31
32 private Request request;
33
34 private ResultItems resultItems = new ResultItems();
35
36 private Html html;
37
38 private Json json;
39
40 private String rawText;
41
42 private Selectable url;
43
44 private Map<String,List<String>> headers;
45
46 private int statusCode = HttpConstant.StatusCode.CODE_200;
47
48 private boolean downloadSuccess = true;
49
50 private byte[] bytes;
51
52 private List<Request> targetRequests = new ArrayList<>();
53
54 private String charset;
55
56 public Page() {
57 }
58
59
60
61
62
63
64
65 @Deprecated
66 public static Page fail() {
67 return fail(null);
68 }
69
70
71
72
73
74
75
76
77
78 public static Page fail(Request request){
79 Page page = new Page();
80 page.setRequest(request);
81 page.setDownloadSuccess(false);
82 return page;
83 }
84
85 public Page setSkip(boolean skip) {
86 resultItems.setSkip(skip);
87 return this;
88
89 }
90
91
92
93
94
95
96
97 public void putField(String key, Object field) {
98 resultItems.put(key, field);
99 }
100
101
102
103
104
105
106 public Html getHtml() {
107 if (html == null) {
108 html = new Html(rawText, request.getUrl());
109 }
110 return html;
111 }
112
113
114
115
116
117
118
119 public Json getJson() {
120 if (json == null) {
121 json = new Json(rawText);
122 }
123 return json;
124 }
125
126
127
128
129
130
131 @Deprecated
132 public void setHtml(Html html) {
133 this.html = html;
134 }
135
136 public List<Request> getTargetRequests() {
137 return targetRequests;
138 }
139
140
141
142
143
144
145 public void addTargetRequests(Iterable<String> requests) {
146 addTargetRequests(requests, 0);
147 }
148
149
150
151
152
153
154
155 public void addTargetRequests(Iterable<String> requests, long priority) {
156 if(requests == null) {
157 return;
158 }
159
160 for (String req : requests) {
161 addRequestIfValid(req, priority);
162 }
163 }
164
165
166
167
168
169
170
171 private void addRequestIfValid(String url, long priority) {
172 if (StringUtils.isBlank(url) || url.equals("#") || url.startsWith("javascript:")) {
173 return;
174 }
175
176 String canonicalizedUrl = UrlUtils.canonicalizeUrl(url, this.url.toString());
177 Request req = new Request(canonicalizedUrl);
178 if(priority > 0) {
179 req.setPriority(priority);
180 }
181 targetRequests.add(req);
182 }
183
184
185
186
187
188
189 public void addTargetRequest(String requestString) {
190 if (StringUtils.isBlank(requestString) || requestString.equals("#")) {
191 return;
192 }
193 requestString = UrlUtils.canonicalizeUrl(requestString, url.toString());
194 targetRequests.add(new Request(requestString));
195 }
196
197
198
199
200
201
202 public void addTargetRequest(Request request) {
203 targetRequests.add(request);
204 }
205
206
207
208
209
210
211 public Selectable getUrl() {
212 return url;
213 }
214
215 public void setUrl(Selectable url) {
216 this.url = url;
217 }
218
219
220
221
222
223
224 public Request getRequest() {
225 return request;
226 }
227
228 public void setRequest(Request request) {
229 this.request = request;
230 this.resultItems.setRequest(request);
231 }
232
233 public ResultItems getResultItems() {
234 return resultItems;
235 }
236
237 public int getStatusCode() {
238 return statusCode;
239 }
240
241 public void setStatusCode(int statusCode) {
242 this.statusCode = statusCode;
243 }
244
245 public String getRawText() {
246 return rawText;
247 }
248
249 public Page setRawText(String rawText) {
250 this.rawText = rawText;
251 return this;
252 }
253
254 public Map<String, List<String>> getHeaders() {
255 return headers;
256 }
257
258 public void setHeaders(Map<String, List<String>> headers) {
259 this.headers = headers;
260 }
261
262 public boolean isDownloadSuccess() {
263 return downloadSuccess;
264 }
265
266 public void setDownloadSuccess(boolean downloadSuccess) {
267 this.downloadSuccess = downloadSuccess;
268 }
269
270 public byte[] getBytes() {
271 return bytes;
272 }
273
274 public void setBytes(byte[] bytes) {
275 this.bytes = bytes;
276 }
277
278 public String getCharset() {
279 return charset;
280 }
281
282 public void setCharset(String charset) {
283 this.charset = charset;
284 }
285
286 @Override
287 public String toString() {
288 return "Page{" +
289 "request=" + request +
290 ", resultItems=" + resultItems +
291 ", html=" + html +
292 ", json=" + json +
293 ", rawText='" + rawText + '\'' +
294 ", url=" + url +
295 ", headers=" + headers +
296 ", statusCode=" + statusCode +
297 ", downloadSuccess=" + downloadSuccess +
298 ", targetRequests=" + targetRequests +
299 ", charset='" + charset + '\'' +
300 ", bytes=" + Arrays.toString(bytes) +
301 '}';
302 }
303 }