1 package us.codecraft.webmagic.scripts;
2
3 import org.apache.commons.cli.*;
4
5 import us.codecraft.webmagic.ResultItems;
6 import us.codecraft.webmagic.Spider;
7 import us.codecraft.webmagic.Task;
8 import us.codecraft.webmagic.pipeline.Pipeline;
9 import us.codecraft.webmagic.scripts.config.CommandLineOption;
10 import us.codecraft.webmagic.utils.WMCollections;
11
12 import java.util.List;
13
14
15
16
17
18 public class ScriptConsole {
19 public static void main(String[] args) {
20 Params params = parseCommand(args);
21 startSpider(params);
22 }
23
24 private static void startSpider(Params params) {
25 ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
26 .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
27 pageProcessor.getSite().setSleepTime(params.getSleepTime());
28 pageProcessor.getSite().setRetryTimes(3);
29 pageProcessor.getSite().setAcceptStatCode(WMCollections.<Integer>newHashSet(200, 404,403, 500,502));
30 Spider spider = Spider.create(pageProcessor).thread(params.getThread());
31 spider.clearPipeline().addPipeline(new Pipeline() {
32 @Override
33 public void process(ResultItems resultItems, Task task) {
34
35 }
36 });
37 if (params.getUrls() == null || params.getUrls().size() == 0) {
38 System.err.println("Need at least one argument");
39 System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
40 System.exit(-1);
41 }
42 for (String url : params.getUrls()) {
43 spider.addUrl(url);
44 }
45 spider.run();
46 }
47
48
49 private static Params parseCommand(String[] args) {
50 try {
51 Options options = new Options();
52 options.addOption(new Option("l", "language", true, "language"));
53 options.addOption(new Option("t", "thread", true, "thread"));
54 options.addOption(new Option("f", "file", true, "script file"));
55 options.addOption(new Option("i", "input", true, "input file"));
56 options.addOption(new Option("s", "sleep", true, "sleep time"));
57 options.addOption(new Option("g", "logger", true, "sleep time"));
58 CommandLineParser commandLineParser = new PosixParser();
59 CommandLine commandLine = commandLineParser.parse(options, args);
60 return readOptions(commandLine);
61 } catch (Exception e) {
62 e.printStackTrace();
63 exit();
64 return null;
65 }
66 }
67
68 private static void exit() {
69 System.err.println("Format error");
70 System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
71 System.exit(-1);
72 }
73
74 private static Params readOptions(CommandLine commandLine) {
75 Params params = new Params();
76 List<CommandLineOption> options = CommandLineOption.getAllOptions();
77 for (CommandLineOption option : options)
78 option.addParamOptionIfInCommandLine(params, commandLine);
79 return params;
80 }
81 }