View Javadoc
1   package us.codecraft.webmagic.selector;
2   
3   
4   import java.util.ArrayList;
5   import java.util.List;
6   import org.apache.commons.collections4.CollectionUtils;
7   import org.jsoup.nodes.Element;
8   import org.jsoup.nodes.Node;
9   import org.jsoup.nodes.TextNode;
10  import org.jsoup.select.Elements;
11  
12  /**
13   * CSS selector. Based on Jsoup.
14   *
15   * @author code4crafter@gmail.com <br>
16   * @since 0.1.0
17   */
18  public class CssSelector extends BaseElementSelector {
19  
20      private String selectorText;
21  
22      private String attrName;
23  
24      public CssSelector(String selectorText) {
25          this.selectorText = selectorText;
26      }
27  
28      public CssSelector(String selectorText, String attrName) {
29          this.selectorText = selectorText;
30          this.attrName = attrName;
31      }
32  
33      private String getValue(Element element) {
34          if (attrName == null) {
35              return element.outerHtml();
36          } else if ("innerHtml".equalsIgnoreCase(attrName)) {
37              return element.html();
38          } else if ("text".equalsIgnoreCase(attrName)) {
39              return getText(element);
40          } else if ("allText".equalsIgnoreCase(attrName)) {
41              return element.text();
42          } else {
43              return element.attr(attrName);
44          }
45      }
46  
47      protected String getText(Element element) {
48          StringBuilder accum = new StringBuilder();
49          for (Node node : element.childNodes()) {
50              if (node instanceof TextNode) {
51                  TextNode textNode = (TextNode) node;
52                  accum.append(textNode.text());
53              }
54          }
55          return accum.toString();
56      }
57  
58      @Override
59      public String select(Element element) {
60          List<Element> elements = selectElements(element);
61          if (CollectionUtils.isEmpty(elements)) {
62              return null;
63          }
64          return getValue(elements.get(0));
65      }
66  
67      @Override
68      public List<String> selectList(Element doc) {
69          List<String> strings = new ArrayList<String>();
70          List<Element> elements = selectElements(doc);
71          if (CollectionUtils.isNotEmpty(elements)) {
72              for (Element element : elements) {
73                  String value = getValue(element);
74                  if (value != null) {
75                      strings.add(value);
76                  }
77              }
78          }
79          return strings;
80      }
81  
82      @Override
83      public Element selectElement(Element element) {
84          Elements elements = element.select(selectorText);
85          if (CollectionUtils.isNotEmpty(elements)) {
86              return elements.get(0);
87          }
88          return null;
89      }
90  
91      @Override
92      public List<Element> selectElements(Element element) {
93          return element.select(selectorText);
94      }
95  
96      @Override
97      public boolean hasAttribute() {
98          return attrName != null;
99      }
100 }