View Javadoc
1   package us.codecraft.webmagic.pipeline;
2   
3   import com.alibaba.fastjson.JSON;
4   import org.apache.commons.codec.digest.DigestUtils;
5   import org.apache.commons.lang3.builder.ToStringBuilder;
6   import org.slf4j.Logger;
7   import org.slf4j.LoggerFactory;
8   import us.codecraft.webmagic.Task;
9   import us.codecraft.webmagic.model.HasKey;
10  import us.codecraft.webmagic.utils.FilePersistentBase;
11  
12  import java.io.FileWriter;
13  import java.io.IOException;
14  import java.io.PrintWriter;
15  
16  /**
17   * Store results objects (page models) to files in JSON format.<br>
18   * Use model.getKey() as file name if the model implements HasKey.<br>
19   * Otherwise use SHA1 as file name.
20   *
21   * @author code4crafter@gmail.com <br>
22   * @since 0.2.0
23   */
24  public class JsonFilePageModelPipeline extends FilePersistentBase implements PageModelPipeline {
25  
26      private Logger logger = LoggerFactory.getLogger(getClass());
27  
28      /**
29       * new JsonFilePageModelPipeline with default path "/data/webmagic/"
30       */
31      public JsonFilePageModelPipeline() {
32          setPath("/data/webmagic/");
33      }
34  
35      public JsonFilePageModelPipeline(String path) {
36          setPath(path);
37      }
38  
39      @Override
40      public void process(Object o, Task task) {
41          String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
42          try {
43              String filename;
44              if (o instanceof HasKey) {
45                  filename = path + ((HasKey) o).key() + ".json";
46              } else {
47                  filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".json";
48              }
49              PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(filename)));
50              printWriter.write(JSON.toJSONString(o));
51              printWriter.close();
52          } catch (IOException e) {
53              logger.warn("write file error", e);
54          }
55      }
56  }