View Javadoc
1   package us.codecraft.webmagic.model.sources;
2   
3   import java.util.List;
4   
5   import us.codecraft.webmagic.Page;
6   import us.codecraft.webmagic.model.FieldExtractor;
7   
8   public interface Source {
9      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor);
10     public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor);
11  
12     public class RawHtml implements Source {
13        public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
14           return page.getHtml().selectDocument(fieldExtractor.getSelector());
15        }
16     
17        public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
18           return page.getHtml().selectDocumentForList(fieldExtractor.getSelector());
19        }
20     }
21     
22     public class SelectedHtml implements Source {
23        public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
24           if (isRaw)
25              return page.getHtml().selectDocument(fieldExtractor.getSelector());
26           else
27              return fieldExtractor.getSelector().select(html);
28        }
29     
30        public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
31           if (isRaw)
32              return page.getHtml().selectDocumentForList(fieldExtractor.getSelector());
33           else
34              return fieldExtractor.getSelector().selectList(html);
35        }
36     }
37     
38     public class Url implements Source {
39        public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
40           return fieldExtractor.getSelector().select(page.getUrl().toString());
41        }
42     
43        public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
44           return fieldExtractor.getSelector().selectList(page.getUrl().toString());
45        }
46     }
47     
48     public class RawText implements Source {
49        public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
50           return fieldExtractor.getSelector().select(page.getRawText());
51        }
52     
53        public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
54           return fieldExtractor.getSelector().selectList(page.getRawText());
55        }
56     }
57     
58     public class DefaultSource implements Source {
59        public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
60           return fieldExtractor.getSelector().select(html);
61        }
62     
63        public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
64           return fieldExtractor.getSelector().selectList(html);
65        }
66     }
67  }
68