View Javadoc
1   package us.codecraft.webmagic.pipeline;
2   
3   import org.apache.commons.codec.digest.DigestUtils;
4   import org.apache.commons.lang3.builder.ToStringBuilder;
5   import org.slf4j.Logger;
6   import org.slf4j.LoggerFactory;
7   import us.codecraft.webmagic.Task;
8   import us.codecraft.webmagic.model.HasKey;
9   import us.codecraft.webmagic.utils.FilePersistentBase;
10  
11  import java.io.FileWriter;
12  import java.io.IOException;
13  import java.io.PrintWriter;
14  
15  /**
16   * Store results objects (page models) to files in plain format.<br>
17   * Use model.getKey() as file name if the model implements HasKey.<br>
18   * Otherwise use SHA1 as file name.
19   *
20   * @author code4crafter@gmail.com <br>
21   * @since 0.3.0
22   */
23  public class FilePageModelPipeline extends FilePersistentBase implements PageModelPipeline {
24  
25      private Logger logger = LoggerFactory.getLogger(getClass());
26  
27      /**
28       * new JsonFilePageModelPipeline with default path "/data/webmagic/"
29       */
30      public FilePageModelPipeline() {
31          setPath("/data/webmagic/");
32      }
33  
34      public FilePageModelPipeline(String path) {
35          setPath(path);
36      }
37  
38      @Override
39      public void process(Object o, Task task) {
40          String path = this.path + PATH_SEPERATOR + task.getUUID() + PATH_SEPERATOR;
41          try {
42              String filename;
43              if (o instanceof HasKey) {
44                  filename = path + ((HasKey) o).key() + ".html";
45              } else {
46                  filename = path + DigestUtils.md5Hex(ToStringBuilder.reflectionToString(o)) + ".html";
47              }
48              PrintWriter printWriter = new PrintWriter(new FileWriter(getFile(filename)));
49              printWriter.write(ToStringBuilder.reflectionToString(o));
50              printWriter.close();
51          } catch (IOException e) {
52              logger.warn("write file error", e);
53          }
54      }
55  }