1 package us.codecraft.webmagic;
2
3 import java.util.HashMap;
4 import java.util.HashSet;
5 import java.util.LinkedHashMap;
6 import java.util.Map;
7 import java.util.Set;
8 import java.util.UUID;
9
10 import us.codecraft.webmagic.utils.HttpConstant;
11
12
13
14
15
16
17
18
19 public class Site {
20
21 private String domain;
22
23 private String userAgent;
24
25 private Map<String, String> defaultCookies = new LinkedHashMap<String, String>();
26
27 private Map<String, Map<String, String>> cookies = new HashMap<String, Map<String, String>>();
28
29 private String charset;
30
31 private String defaultCharset;
32
33 private int sleepTime = 5000;
34
35 private int retryTimes = 0;
36
37 private int cycleRetryTimes = 0;
38
39 private int retrySleepTime = 1000;
40
41 private int timeOut = 5000;
42
43 private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
44
45 private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
46
47 private Map<String, String> headers = new HashMap<String, String>();
48
49 private boolean useGzip = true;
50
51 private boolean disableCookieManagement = false;
52
53 static {
54 DEFAULT_STATUS_CODE_SET.add(HttpConstant.StatusCode.CODE_200);
55 }
56
57
58
59
60
61
62 public static Site me() {
63 return new Site();
64 }
65
66
67
68
69
70
71
72
73 public Site addCookie(String name, String value) {
74 defaultCookies.put(name, value);
75 return this;
76 }
77
78
79
80
81
82
83
84
85
86 public Site addCookie(String domain, String name, String value) {
87 if (!cookies.containsKey(domain)){
88 cookies.put(domain,new HashMap<String, String>());
89 }
90 cookies.get(domain).put(name, value);
91 return this;
92 }
93
94
95
96
97
98
99
100 public Site setUserAgent(String userAgent) {
101 this.userAgent = userAgent;
102 return this;
103 }
104
105
106
107
108
109
110 public Map<String, String> getCookies() {
111 return defaultCookies;
112 }
113
114
115
116
117
118
119 public Map<String,Map<String, String>> getAllCookies() {
120 return cookies;
121 }
122
123
124
125
126
127
128 public String getUserAgent() {
129 return userAgent;
130 }
131
132
133
134
135
136
137 public String getDomain() {
138 return domain;
139 }
140
141
142
143
144
145
146
147 public Site setDomain(String domain) {
148 this.domain = domain;
149 return this;
150 }
151
152
153
154
155
156
157
158
159 public Site setCharset(String charset) {
160 this.charset = charset;
161 return this;
162 }
163
164
165
166
167
168
169 public String getCharset() {
170 return charset;
171 }
172
173
174
175
176
177
178
179
180
181
182 public Site setDefaultCharset(String defaultCharset) {
183 this.defaultCharset = defaultCharset;
184 return this;
185 }
186
187
188
189
190
191
192
193 public String getDefaultCharset() {
194 return defaultCharset;
195 }
196
197 public int getTimeOut() {
198 return timeOut;
199 }
200
201
202
203
204
205
206
207 public Site setTimeOut(int timeOut) {
208 this.timeOut = timeOut;
209 return this;
210 }
211
212
213
214
215
216
217
218
219
220
221 public Site setAcceptStatCode(Set<Integer> acceptStatCode) {
222 this.acceptStatCode = acceptStatCode;
223 return this;
224 }
225
226
227
228
229
230
231 public Set<Integer> getAcceptStatCode() {
232 return acceptStatCode;
233 }
234
235
236
237
238
239
240
241
242 public Site setSleepTime(int sleepTime) {
243 this.sleepTime = sleepTime;
244 return this;
245 }
246
247
248
249
250
251
252
253 public int getSleepTime() {
254 return sleepTime;
255 }
256
257
258
259
260
261
262 public int getRetryTimes() {
263 return retryTimes;
264 }
265
266 public Map<String, String> getHeaders() {
267 return headers;
268 }
269
270
271
272
273
274
275
276
277
278 public Site addHeader(String key, String value) {
279 headers.put(key, value);
280 return this;
281 }
282
283
284
285
286
287
288
289 public Site setRetryTimes(int retryTimes) {
290 this.retryTimes = retryTimes;
291 return this;
292 }
293
294
295
296
297
298
299 public int getCycleRetryTimes() {
300 return cycleRetryTimes;
301 }
302
303
304
305
306
307
308
309 public Site setCycleRetryTimes(int cycleRetryTimes) {
310 this.cycleRetryTimes = cycleRetryTimes;
311 return this;
312 }
313
314 public boolean isUseGzip() {
315 return useGzip;
316 }
317
318 public int getRetrySleepTime() {
319 return retrySleepTime;
320 }
321
322
323
324
325
326
327
328 public Site setRetrySleepTime(int retrySleepTime) {
329 this.retrySleepTime = retrySleepTime;
330 return this;
331 }
332
333
334
335
336
337
338
339
340 public Site setUseGzip(boolean useGzip) {
341 this.useGzip = useGzip;
342 return this;
343 }
344
345 public boolean isDisableCookieManagement() {
346 return disableCookieManagement;
347 }
348
349
350
351
352
353
354
355
356 public Site setDisableCookieManagement(boolean disableCookieManagement) {
357 this.disableCookieManagement = disableCookieManagement;
358 return this;
359 }
360
361 public Task toTask() {
362 return new Task() {
363 @Override
364 public String getUUID() {
365 String uuid = Site.this.getDomain();
366 if (uuid == null) {
367 uuid = UUID.randomUUID().toString();
368 }
369 return uuid;
370 }
371
372 @Override
373 public Site getSite() {
374 return Site.this;
375 }
376 };
377 }
378
379 @Override
380 public boolean equals(Object o) {
381 if (this == o) return true;
382 if (o == null || getClass() != o.getClass()) return false;
383
384 Site site = (Site) o;
385
386 if (cycleRetryTimes != site.cycleRetryTimes) return false;
387 if (retryTimes != site.retryTimes) return false;
388 if (sleepTime != site.sleepTime) return false;
389 if (timeOut != site.timeOut) return false;
390 if (acceptStatCode != null ? !acceptStatCode.equals(site.acceptStatCode) : site.acceptStatCode != null)
391 return false;
392 if (charset != null ? !charset.equals(site.charset) : site.charset != null) return false;
393 if (defaultCookies != null ? !defaultCookies.equals(site.defaultCookies) : site.defaultCookies != null)
394 return false;
395 if (domain != null ? !domain.equals(site.domain) : site.domain != null) return false;
396 if (headers != null ? !headers.equals(site.headers) : site.headers != null) return false;
397 if (userAgent != null ? !userAgent.equals(site.userAgent) : site.userAgent != null) return false;
398
399 return true;
400 }
401
402 @Override
403 public int hashCode() {
404 int result = domain != null ? domain.hashCode() : 0;
405 result = 31 * result + (userAgent != null ? userAgent.hashCode() : 0);
406 result = 31 * result + (defaultCookies != null ? defaultCookies.hashCode() : 0);
407 result = 31 * result + (charset != null ? charset.hashCode() : 0);
408 result = 31 * result + sleepTime;
409 result = 31 * result + retryTimes;
410 result = 31 * result + cycleRetryTimes;
411 result = 31 * result + timeOut;
412 result = 31 * result + (acceptStatCode != null ? acceptStatCode.hashCode() : 0);
413 result = 31 * result + (headers != null ? headers.hashCode() : 0);
414 return result;
415 }
416
417 @Override
418 public String toString() {
419 return "Site{" +
420 "domain='" + domain + '\'' +
421 ", userAgent='" + userAgent + '\'' +
422 ", cookies=" + defaultCookies +
423 ", charset='" + charset + '\'' +
424 ", sleepTime=" + sleepTime +
425 ", retryTimes=" + retryTimes +
426 ", cycleRetryTimes=" + cycleRetryTimes +
427 ", timeOut=" + timeOut +
428 ", acceptStatCode=" + acceptStatCode +
429 ", headers=" + headers +
430 '}';
431 }
432
433 }