1 package us.codecraft.webmagic.selector;
2
3
4 import java.util.ArrayList;
5 import java.util.List;
6 import org.apache.commons.collections4.CollectionUtils;
7 import org.jsoup.nodes.Element;
8 import org.jsoup.nodes.Node;
9 import org.jsoup.nodes.TextNode;
10 import org.jsoup.select.Elements;
11
12
13
14
15
16
17
18 public class CssSelector extends BaseElementSelector {
19
20 private String selectorText;
21
22 private String attrName;
23
24 public CssSelector(String selectorText) {
25 this.selectorText = selectorText;
26 }
27
28 public CssSelector(String selectorText, String attrName) {
29 this.selectorText = selectorText;
30 this.attrName = attrName;
31 }
32
33 private String getValue(Element element) {
34 if (attrName == null) {
35 return element.outerHtml();
36 } else if ("innerHtml".equalsIgnoreCase(attrName)) {
37 return element.html();
38 } else if ("text".equalsIgnoreCase(attrName)) {
39 return getText(element);
40 } else if ("allText".equalsIgnoreCase(attrName)) {
41 return element.text();
42 } else {
43 return element.attr(attrName);
44 }
45 }
46
47 protected String getText(Element element) {
48 StringBuilder accum = new StringBuilder();
49 for (Node node : element.childNodes()) {
50 if (node instanceof TextNode) {
51 TextNode textNode = (TextNode) node;
52 accum.append(textNode.text());
53 }
54 }
55 return accum.toString();
56 }
57
58 @Override
59 public String select(Element element) {
60 List<Element> elements = selectElements(element);
61 if (CollectionUtils.isEmpty(elements)) {
62 return null;
63 }
64 return getValue(elements.get(0));
65 }
66
67 @Override
68 public List<String> selectList(Element doc) {
69 List<String> strings = new ArrayList<String>();
70 List<Element> elements = selectElements(doc);
71 if (CollectionUtils.isNotEmpty(elements)) {
72 for (Element element : elements) {
73 String value = getValue(element);
74 if (value != null) {
75 strings.add(value);
76 }
77 }
78 }
79 return strings;
80 }
81
82 @Override
83 public Element selectElement(Element element) {
84 Elements elements = element.select(selectorText);
85 if (CollectionUtils.isNotEmpty(elements)) {
86 return elements.get(0);
87 }
88 return null;
89 }
90
91 @Override
92 public List<Element> selectElements(Element element) {
93 return element.select(selectorText);
94 }
95
96 @Override
97 public boolean hasAttribute() {
98 return attrName != null;
99 }
100 }