ScriptConsole.java
package us.codecraft.webmagic.scripts;
import org.apache.commons.cli.*;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.scripts.config.CommandLineOption;
import us.codecraft.webmagic.utils.WMCollections;
import java.util.List;
/**
* @author code4crafter@gmail.com / FrancoisGib
* @since 0.4.1
*/
public class ScriptConsole {
public static void main(String[] args) {
Params params = parseCommand(args);
startSpider(params);
}
private static void startSpider(Params params) {
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
pageProcessor.getSite().setSleepTime(params.getSleepTime());
pageProcessor.getSite().setRetryTimes(3);
pageProcessor.getSite().setAcceptStatCode(WMCollections.<Integer>newHashSet(200, 404,403, 500,502));
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
spider.clearPipeline().addPipeline(new Pipeline() {
@Override
public void process(ResultItems resultItems, Task task) {
}
});
if (params.getUrls() == null || params.getUrls().size() == 0) {
System.err.println("Need at least one argument");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
for (String url : params.getUrls()) {
spider.addUrl(url);
}
spider.run();
}
private static Params parseCommand(String[] args) {
try {
Options options = new Options();
options.addOption(new Option("l", "language", true, "language"));
options.addOption(new Option("t", "thread", true, "thread"));
options.addOption(new Option("f", "file", true, "script file"));
options.addOption(new Option("i", "input", true, "input file"));
options.addOption(new Option("s", "sleep", true, "sleep time"));
options.addOption(new Option("g", "logger", true, "sleep time"));
CommandLineParser commandLineParser = new PosixParser();
CommandLine commandLine = commandLineParser.parse(options, args);
return readOptions(commandLine);
} catch (Exception e) {
e.printStackTrace();
exit();
return null;
}
}
private static void exit() {
System.err.println("Format error");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
private static Params readOptions(CommandLine commandLine) {
Params params = new Params();
List<CommandLineOption> options = CommandLineOption.getAllOptions();
for (CommandLineOption option : options)
option.addParamOptionIfInCommandLine(params, commandLine);
return params;
}
}