View Javadoc
1   package us.codecraft.webmagic.scripts;
2   
3   import org.apache.commons.cli.*;
4   
5   import us.codecraft.webmagic.ResultItems;
6   import us.codecraft.webmagic.Spider;
7   import us.codecraft.webmagic.Task;
8   import us.codecraft.webmagic.pipeline.Pipeline;
9   import us.codecraft.webmagic.scripts.config.CommandLineOption;
10  import us.codecraft.webmagic.utils.WMCollections;
11  
12  import java.util.List;
13  
14  /**
15   * @author code4crafter@gmail.com / FrancoisGib
16   * @since 0.4.1
17   */
18  public class ScriptConsole {
19      public static void main(String[] args) {
20          Params params = parseCommand(args);
21          startSpider(params);
22      }
23  
24      private static void startSpider(Params params) {
25          ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
26                  .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
27          pageProcessor.getSite().setSleepTime(params.getSleepTime());
28          pageProcessor.getSite().setRetryTimes(3);
29          pageProcessor.getSite().setAcceptStatCode(WMCollections.<Integer>newHashSet(200, 404,403, 500,502));
30          Spider spider = Spider.create(pageProcessor).thread(params.getThread());
31          spider.clearPipeline().addPipeline(new Pipeline() {
32              @Override
33              public void process(ResultItems resultItems, Task task) {
34  
35              }
36          });
37          if (params.getUrls() == null || params.getUrls().size() == 0) {
38              System.err.println("Need at least one argument");
39              System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
40              System.exit(-1);
41          }
42          for (String url : params.getUrls()) {
43              spider.addUrl(url);
44          }
45          spider.run();
46      }
47  
48  
49      private static Params parseCommand(String[] args) {
50          try {
51              Options options = new Options();
52              options.addOption(new Option("l", "language", true, "language"));
53              options.addOption(new Option("t", "thread", true, "thread"));
54              options.addOption(new Option("f", "file", true, "script file"));
55              options.addOption(new Option("i", "input", true, "input file"));
56              options.addOption(new Option("s", "sleep", true, "sleep time"));
57              options.addOption(new Option("g", "logger", true, "sleep time"));
58              CommandLineParser commandLineParser = new PosixParser();
59              CommandLine commandLine = commandLineParser.parse(options, args);
60              return readOptions(commandLine);
61          } catch (Exception e) {
62              e.printStackTrace();
63              exit();
64              return null;
65          }
66      }
67  
68      private static void exit() {
69          System.err.println("Format error");
70          System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
71          System.exit(-1);
72      }
73  
74      private static Params readOptions(CommandLine commandLine) {
75          Params params = new Params();
76          List<CommandLineOption> options = CommandLineOption.getAllOptions();
77          for (CommandLineOption option : options)
78              option.addParamOptionIfInCommandLine(params, commandLine);
79          return params;
80      }
81  }