1 package us.codecraft.webmagic.model.sources; 2 3 import java.util.List; 4 5 import us.codecraft.webmagic.Page; 6 import us.codecraft.webmagic.model.FieldExtractor; 7 8 public interface Source { 9 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor); 10 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor); 11 12 public class RawHtml implements Source { 13 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 14 return page.getHtml().selectDocument(fieldExtractor.getSelector()); 15 } 16 17 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 18 return page.getHtml().selectDocumentForList(fieldExtractor.getSelector()); 19 } 20 } 21 22 public class SelectedHtml implements Source { 23 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 24 if (isRaw) 25 return page.getHtml().selectDocument(fieldExtractor.getSelector()); 26 else 27 return fieldExtractor.getSelector().select(html); 28 } 29 30 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 31 if (isRaw) 32 return page.getHtml().selectDocumentForList(fieldExtractor.getSelector()); 33 else 34 return fieldExtractor.getSelector().selectList(html); 35 } 36 } 37 38 public class Url implements Source { 39 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 40 return fieldExtractor.getSelector().select(page.getUrl().toString()); 41 } 42 43 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 44 return fieldExtractor.getSelector().selectList(page.getUrl().toString()); 45 } 46 } 47 48 public class RawText implements Source { 49 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 50 return fieldExtractor.getSelector().select(page.getRawText()); 51 } 52 53 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 54 return fieldExtractor.getSelector().selectList(page.getRawText()); 55 } 56 } 57 58 public class DefaultSource implements Source { 59 public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 60 return fieldExtractor.getSelector().select(html); 61 } 62 63 public List<String> getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) { 64 return fieldExtractor.getSelector().selectList(html); 65 } 66 } 67 } 68